diff mbox series

[8/8] Support APX JMPABS

Message ID	20231102112911.2372810-9-lili.cui@intel.com
State	Unresolved
Headers	Received-SPF: pass (google.com: domain of binutils-bounces+ouuuleilei=gmail.com@sourceware.org designates 2620:52:3:1:0:246e:9693:128c as permitted sender) client-ip=2620:52:3:1:0:246e:9693:128c; DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org A73D03857835 From: "Cui, Lili" <lili.cui@intel.com> To: binutils@sourceware.org Cc: jbeulich@suse.com, hongjiu.lu@intel.com, ccoutant@gmail.com, "Hu, Lin1" <lin1.hu@intel.com> Subject: [PATCH 8/8] Support APX JMPABS Date: Thu, 2 Nov 2023 11:29:11 +0000 Message-Id: <20231102112911.2372810-9-lili.cui@intel.com> In-Reply-To: <20231102112911.2372810-1-lili.cui@intel.com> References: <20231102112911.2372810-1-lili.cui@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: binutils-bounces+ouuuleilei=gmail.com@sourceware.org X-getmail-retrieved-from-mailbox: INBOX
Series	Support Intel APX EGPR \| [v2,0/8] Support Intel APX EGPR [1/8] Support APX GPR32 with rex2 prefix [2/8] Created an empty EVEX_MAP4_ sub-table for EVEX instructions. [3/8] Support APX GPR32 with extend evex prefix [4/8] Add tests for APX GPR32 with extend evex prefix [5/8] Support APX NDD [6/8] Support APX Push2/Pop2 [7/8] Support APX NDD optimized encoding. [8/8] Support APX JMPABS

Checks

Context	Check	Description
snail/binutils-gdb-check	warning	Git am fail log

Commit Message

Cui, Lili Nov. 2, 2023, 11:29 a.m. UTC

  From: "Hu, Lin1" <lin1.hu@intel.com>

gas/ChangeLog:

	* config/tc-i386.c (is_any_apx_encoding): Add jmpabs.
	(is_any_apx_rex2_encoding): Ditto.
	* testsuite/gas/i386/i386.exp: Add tests.
	* testsuite/gas/i386/x86-64.exp: Ditto.
	* testsuite/gas/i386/apx-jmpabs-inval.l: New test.
	* testsuite/gas/i386/apx-jmpabs-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-apx-jmpabs-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-apx-jmpabs-inval.d: Ditto.
	* testsuite/gas/i386/x86-64-apx-jmpabs-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-apx-jmpabs.d: Ditto.
	* testsuite/gas/i386/x86-64-apx-jmpabs.s: Ditto.

opcodes/ChangeLog:

	* i386-dis.c (JMPABS_Fixup): New Fixup function to disassemble jmpabs.
	(print_insn): Add #UD exception for jmpabs.
	(dis386): Modify a1 unit for support jmpabs.
	* i386-mnem.h: Regenerated.
	* i386-opc.tbl: New insns.
	* i386-tbl.h: Regenerated.
---
 gas/config/tc-i386.c                          |  6 +-
 gas/testsuite/gas/i386/apx-jmpabs-inval.l     |  3 +
 gas/testsuite/gas/i386/apx-jmpabs-inval.s     |  6 ++
 gas/testsuite/gas/i386/i386.exp               |  1 +
 .../gas/i386/x86-64-apx-jmpabs-intel.d        | 14 +++++
 .../gas/i386/x86-64-apx-jmpabs-inval.d        | 55 +++++++++++++++++++
 .../gas/i386/x86-64-apx-jmpabs-inval.s        | 17 ++++++
 gas/testsuite/gas/i386/x86-64-apx-jmpabs.d    | 14 +++++
 gas/testsuite/gas/i386/x86-64-apx-jmpabs.s    | 10 ++++
 gas/testsuite/gas/i386/x86-64.exp             |  3 +
 opcodes/i386-dis.c                            | 43 ++++++++++++++-
 opcodes/i386-opc.tbl                          |  2 +
 12 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/apx-jmpabs-inval.l
 create mode 100644 gas/testsuite/gas/i386/apx-jmpabs-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-jmpabs-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-jmpabs.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-jmpabs.s

Comments

Jan Beulich Nov. 9, 2023, 12:59 p.m. UTC | #1

On 02.11.2023 12:29, Cui, Lili wrote:
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -7790,7 +7790,8 @@ match_template (char mnem_suffix)
>    if (!quiet_warnings)
>      {
>        if (!intel_syntax
> -	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
> +	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
> +	  && t->mnem_off != MN_jmpabs)
>  	as_warn (_("indirect %s without `*'"), insn_name (t));

Coming back to this, which I did comment on before already. The insn
taking an immediate operand doesn't really justify this, as it leaves
open the underlying question of why you use JumpAbsolute in the insn
template in the first place. I've gone through all the uses of
JUMP_ABSOLUTE, and I didn't find any where the respective handling
would be applicable here. In fact it's unclear whether the insn needs
marking as any JUMP_* at all: It's not really different from, say,
"mov $imm, %rcx".

There's a further question regarding its operand representation,
though: Can you explain why it's Imm64, not Disp64? The latter would,
to me, seem more natural to use here. Not only from a assembler
internals perspective, but also from the users' one: The $ in the
operand carries absolutely no meaning (see also the related testcase
comment below) in AT&T syntax, and there's no noticeable difference
in Intel syntax afaict.

> @@ -8939,6 +8940,9 @@ process_operands (void)
>  	}
>      }
>  
> +  if (i.tm.mnem_off == MN_jmpabs)
> +    i.rex2_encoding = true;

Please see my earlier remarks wrt "rex2" vs "{rex2}". What you do
here is effect the latter. Yet as indicated, the pseudo-prefix isn't
really an indication of "must have REX2 prefix", but only a weak
request to do so if possible. I think you want to set i.rex2 here
instead, requiring a way to express that an empty REX2 prefix is
wanted.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
> @@ -0,0 +1,17 @@
> +# Check bytecode of APX_F jmpabs instructions with illegal encode.
> +
> +	.text
> +# With 66 prefix
> +	.byte 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +	.byte 0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +# With 67 prefix
> +	.byte 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +	.byte 0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +# With F2 prefix
> +	.byte 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +	.byte 0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +# With F3 prefix
> +	.byte 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +	.byte 0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> +# REX2.M0 = 0 REX2.W = 1
> +	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00

As per earlier comments: This wants expressing via .insn, to yield input
to gas human-readable (even if, as it looks, two .insn are going to be
required per resulting construct). Further in the last comment, why is
REX2.M0 mentioned there, but not elsewhere? Also what purpose serve the
0x64 bytes here? The encodings are invalid irrespective of them. Instead
I'd kind have expected LOCK to also be covered.

Also a spec question as we're talking of what is or is not valid (i.e.
causing #UD) here: Why would XCR0.APX=0 need to cause #UD? There's no
use of eGPR-s here.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs.s
> @@ -0,0 +1,10 @@
> +# Check 64bit APX_F JMPABS instructions
> +
> +	.text
> + _start:
> +	jmpabs	      $0x0202020202020202
> +	jmpabs	      $0x2
> +
> +.intel_syntax noprefix
> +	jmpabs	      0x0202020202020202
> +	jmpabs	      0x2

I expect this isn't going to be the normal use of the insn. Instead I
would foresee the typical users to be "jmpabs symbol" (and - as per
above - intentionally omitting the $ already). IOW the testcase also
wants to cover the case requiring a relocation, including a check that
the correct relocation is emitted (covering both ELF and COFF; I'm not
going to insist on also covering Mach-O, as - for a reason that
escapes me - gas can't even be configured for x86_64-*-darwin*).

> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
> @@ -106,6 +106,7 @@ static bool MOVSXD_Fixup (instr_info *, int, int);
>  static bool DistinctDest_Fixup (instr_info *, int, int);
>  static bool PREFETCHI_Fixup (instr_info *, int, int);
>  static bool PUSH2_POP2_Fixup (instr_info *, int, int);
> +static bool JMPABS_Fixup (instr_info *, int, int);
>  
>  static void ATTRIBUTE_PRINTF_3 i386_dis_printf (const disassemble_info *,
>  						enum disassembler_style,
> @@ -258,6 +259,9 @@ struct instr_info
>    char scale_char;
>  
>    enum x86_64_isa isa64;
> +
> +  /* Remember if the current op is jmpabs.  */
> +  bool is_jmpabs;
>  };

This field would probably best live next to op_is_jump (and then also
be named op_is_jmpabs, assuming a separate boolean is indeed needed).
I further expect that op_is_jump also wants setting for JMPABS.

> @@ -2032,7 +2036,7 @@ static const struct dis386 dis386[] = {
>    { "lahf",		{ XX }, 0 },
>    /* a0 */
>    { "mov%LB",		{ AL, Ob }, PREFIX_REX2_ILLEGAL },
> -  { "mov%LS",		{ eAX, Ov }, PREFIX_REX2_ILLEGAL },
> +  { "mov%LS",		{ { JMPABS_Fixup, eAX_reg }, { JMPABS_Fixup, v_mode } }, PREFIX_REX2_ILLEGAL },
>    { "mov%LB",		{ Ob, AL }, PREFIX_REX2_ILLEGAL },
>    { "mov%LS",		{ Ov, eAX }, PREFIX_REX2_ILLEGAL },
>    { "movs{b|}",		{ Ybr, Xb }, PREFIX_REX2_ILLEGAL },
> @@ -9648,7 +9652,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
>      }
>  
>    if ((dp->prefix_requirement & PREFIX_REX2_ILLEGAL)
> -      && ins.last_rex2_prefix >= 0)
> +      && ins.last_rex2_prefix >= 0 && !ins.is_jmpabs)
>      {
>        i386_dis_printf (info, dis_style_text, "(bad)");
>        ret = ins.end_codep - priv.the_buffer;
> @@ -13857,3 +13861,38 @@ PUSH2_POP2_Fixup (instr_info *ins, int bytemode, int sizeflag)
>  
>    return OP_VEX (ins, bytemode, sizeflag);
>  }
> +
> +static bool
> +JMPABS_Fixup (instr_info *ins, int bytemode, int sizeflag)
> +{
> +  if (ins->address_mode == mode_64bit
> +      && ins->last_rex2_prefix >= 0
> +      && (ins->rex2 & 0x80) == 0x0)
> +    {
> +      uint64_t op;
> +
> +      if (bytemode == eAX_reg)
> +	return true;
> +
> +      if (!get64 (ins, &op))
> +	return false;
> +
> +      if ((ins->prefixes & (PREFIX_OPCODE | PREFIX_ADDR)) != 0x0
> +	  || (ins->rex & REX_W) != 0x0)
> +	{
> +	  oappend (ins, "(bad)");
> +	  return true;
> +	}
> +
> +      ins->mnemonicendp = stpcpy (ins->obuf, "jmpabs");
> +      ins->all_prefixes[ins->last_rex2_prefix] = 0;

This doesn't look right. REX2.{R,X,B}{3,4} set still want recording
in the output. I expect you may need to set a bit in rex2_used here,
but how exactly that ought to work depends on how comments on earlier
patches are going to be addressed. This may then also eliminate the
need for ...

> +      ins->is_jmpabs = true;

... this field, which likely will be covered by a more generic
approach.

> +      oappend_immediate (ins, op);
> +
> +      return true;
> +    }
> +
> +  if (bytemode == eAX_reg)
> +    return OP_IMREG (ins, bytemode, sizeflag);
> +  return OP_OFF64 (ins, v_mode, sizeflag);

v_mode is, afaics, properly passed into here. Why would you open-code
that, instead of using bytemode? Not doing so will give the compiler
more ICF opportunities.

Jan

Hu, Lin1 Nov. 14, 2023, 3:26 a.m. UTC | #2

> On 02.11.2023 12:29, Cui, Lili wrote:
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -7790,7 +7790,8 @@ match_template (char mnem_suffix)
> >    if (!quiet_warnings)
> >      {
> >        if (!intel_syntax
> > -	  && (i.jumpabsolute != (t->opcode_modifier.jump ==
> JUMP_ABSOLUTE)))
> > +	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
> > +	  && t->mnem_off != MN_jmpabs)
> >  	as_warn (_("indirect %s without `*'"), insn_name (t));
> 
> Coming back to this, which I did comment on before already. The insn taking an
> immediate operand doesn't really justify this, as it leaves open the underlying
> question of why you use JumpAbsolute in the insn template in the first place. I've
> gone through all the uses of JUMP_ABSOLUTE, and I didn't find any where the
> respective handling would be applicable here. In fact it's unclear whether the
> insn needs marking as any JUMP_* at all: It's not really different from, say, "mov
> $imm, %rcx".
>

According to the doc JMPABS is a 64-bit only ISA extension, and acts as a near-direct branch with an absolute target. I made this markup simply because I was mimicking other jmp's. If we don't need the attribute, I'm glad I can remove it.

> 
> There's a further question regarding its operand representation,
> though: Can you explain why it's Imm64, not Disp64? The latter would, to me,
> seem more natural to use here. Not only from a assembler internals perspective,
> but also from the users' one: The $ in the operand carries absolutely no meaning
> (see also the related testcase comment below) in AT&T syntax, and there's no
> noticeable difference in Intel syntax afaict.
>

In my opinion, If compiler want  to jump "anywhere" and the displacement can not fit in a 32-bit immediate , compiler will fallback to indirect branches. My current knowledge is that jmpabs came about as a solution to the problem about indirect braches. It's not the same as the jmp. Currently the parameters of jmpabs are absolute addresses optimized by PLT or JIT. I think using imm64 avoids confusion with the disp64. That's why the designer designed it this way.

One colleague in our group have written an introductory document can be referred. (https://kanrobert.github.io/rfc/All-about-APX-JMPABS/)

>
> > @@ -8939,6 +8940,9 @@ process_operands (void)
> >  	}
> >      }
> >
> > +  if (i.tm.mnem_off == MN_jmpabs)
> > +    i.rex2_encoding = true;
> 
> Please see my earlier remarks wrt "rex2" vs "{rex2}". What you do here is effect
> the latter. Yet as indicated, the pseudo-prefix isn't really an indication of "must
> have REX2 prefix", but only a weak request to do so if possible. I think you want
> to set i.rex2 here instead, requiring a way to express that an empty REX2 prefix
> is wanted.
>

But in terms of encoding, i.rex2 should be 0. Can I do special handling in build_rex2_prefix? 

> 
> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
> > @@ -0,0 +1,17 @@
> > +# Check bytecode of APX_F jmpabs instructions with illegal encode.
> > +
> > +	.text
> > +# With 66 prefix
> > +	.byte
> 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +	.byte 0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +# With 67 prefix
> > +	.byte
> 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +	.byte 0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +# With F2 prefix
> > +	.byte
> 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +	.byte 0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +# With F3 prefix
> > +	.byte
> 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +	.byte 0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> > +# REX2.M0 = 0 REX2.W = 1
> > +	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> 
> As per earlier comments: This wants expressing via .insn, to yield input to gas
> human-readable (even if, as it looks, two .insn are going to be required per
> resulting construct). Further in the last comment, why is
> REX2.M0 mentioned there, but not elsewhere? Also what purpose serve the
> 0x64 bytes here? The encodings are invalid irrespective of them. Instead I'd kind
> have expected LOCK to also be covered.
>

Because this error line is only for the special case where M0 == 0, and base_opcode == 0xa1, W should be 0, other than 1. If M0 = 1, W = 1, base_opcode == 0xa1, I think it could decoding as mov rax, moffs or ( some future insn). Elsewhere it's just excluding invalid prefixes. I don't see in the docs that it triggers #UD, am I missing something?

> 
> Also a spec question as we're talking of what is or is not valid (i.e.
> causing #UD) here: Why would XCR0.APX=0 need to cause #UD? There's no use
> of eGPR-s here.
>

Sorry, what is XCR0.APX?
 
>
> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs.s
> > @@ -0,0 +1,10 @@
> > +# Check 64bit APX_F JMPABS instructions
> > +
> > +	.text
> > + _start:
> > +	jmpabs	      $0x0202020202020202
> > +	jmpabs	      $0x2
> > +
> > +.intel_syntax noprefix
> > +	jmpabs	      0x0202020202020202
> > +	jmpabs	      0x2
> 
> I expect this isn't going to be the normal use of the insn. Instead I would foresee
> the typical users to be "jmpabs symbol" (and - as per above - intentionally
> omitting the $ already). IOW the testcase also wants to cover the case requiring
> a relocation, including a check that the correct relocation is emitted (covering
> both ELF and COFF; I'm not going to insist on also covering Mach-O, as - for a
> reason that escapes me - gas can't even be configured for x86_64-*-darwin*).
>

Based on the previous discussion, we think that this usage is not currently supported. If users want to use symbol, I think they can use "jmp symbol".

> 
> > --- a/opcodes/i386-dis.c
> > +++ b/opcodes/i386-dis.c
> > @@ -106,6 +106,7 @@ static bool MOVSXD_Fixup (instr_info *, int, int);
> > static bool DistinctDest_Fixup (instr_info *, int, int);  static bool
> > PREFETCHI_Fixup (instr_info *, int, int);  static bool
> > PUSH2_POP2_Fixup (instr_info *, int, int);
> > +static bool JMPABS_Fixup (instr_info *, int, int);
> >
> >  static void ATTRIBUTE_PRINTF_3 i386_dis_printf (const disassemble_info *,
> >  						enum disassembler_style,
> > @@ -258,6 +259,9 @@ struct instr_info
> >    char scale_char;
> >
> >    enum x86_64_isa isa64;
> > +
> > +  /* Remember if the current op is jmpabs.  */  bool is_jmpabs;
> >  };
> 
> This field would probably best live next to op_is_jump (and then also be named
> op_is_jmpabs, assuming a separate boolean is indeed needed).
> I further expect that op_is_jump also wants setting for JMPABS.
>

Can I change op_is_jump's type from bool to unsigned int?
 
>
> > @@ -2032,7 +2036,7 @@ static const struct dis386 dis386[] = {
> >    { "lahf",		{ XX }, 0 },
> >    /* a0 */
> >    { "mov%LB",		{ AL, Ob }, PREFIX_REX2_ILLEGAL },
> > -  { "mov%LS",		{ eAX, Ov }, PREFIX_REX2_ILLEGAL },
> > +  { "mov%LS",		{ { JMPABS_Fixup, eAX_reg }, { JMPABS_Fixup,
> v_mode } }, PREFIX_REX2_ILLEGAL },
> >    { "mov%LB",		{ Ob, AL }, PREFIX_REX2_ILLEGAL },
> >    { "mov%LS",		{ Ov, eAX }, PREFIX_REX2_ILLEGAL },
> >    { "movs{b|}",		{ Ybr, Xb }, PREFIX_REX2_ILLEGAL },
> > @@ -9648,7 +9652,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int
> intel_syntax)
> >      }
> >
> >    if ((dp->prefix_requirement & PREFIX_REX2_ILLEGAL)
> > -      && ins.last_rex2_prefix >= 0)
> > +      && ins.last_rex2_prefix >= 0 && !ins.is_jmpabs)
> >      {
> >        i386_dis_printf (info, dis_style_text, "(bad)");
> >        ret = ins.end_codep - priv.the_buffer; @@ -13857,3 +13861,38 @@
> > PUSH2_POP2_Fixup (instr_info *ins, int bytemode, int sizeflag)
> >
> >    return OP_VEX (ins, bytemode, sizeflag);  }
> > +
> > +static bool
> > +JMPABS_Fixup (instr_info *ins, int bytemode, int sizeflag) {
> > +  if (ins->address_mode == mode_64bit
> > +      && ins->last_rex2_prefix >= 0
> > +      && (ins->rex2 & 0x80) == 0x0)
> > +    {
> > +      uint64_t op;
> > +
> > +      if (bytemode == eAX_reg)
> > +	return true;
> > +
> > +      if (!get64 (ins, &op))
> > +	return false;
> > +
> > +      if ((ins->prefixes & (PREFIX_OPCODE | PREFIX_ADDR)) != 0x0
> > +	  || (ins->rex & REX_W) != 0x0)
> > +	{
> > +	  oappend (ins, "(bad)");
> > +	  return true;
> > +	}
> > +
> > +      ins->mnemonicendp = stpcpy (ins->obuf, "jmpabs");
> > +      ins->all_prefixes[ins->last_rex2_prefix] = 0;
> 
> This doesn't look right. REX2.{R,X,B}{3,4} set still want recording in the output. I
> expect you may need to set a bit in rex2_used here, but how exactly that ought
> to work depends on how comments on earlier patches are going to be
> addressed. This may then also eliminate the need for ...
> 
> > +      ins->is_jmpabs = true;
> 
> ... this field, which likely will be covered by a more generic approach.
> 
>

Then this part of the discussion, as well as the modifications, I will wait for the front patch to be finalized.

>
> > +      oappend_immediate (ins, op);
> > +
> > +      return true;
> > +    }
> > +
> > +  if (bytemode == eAX_reg)
> > +    return OP_IMREG (ins, bytemode, sizeflag);  return OP_OFF64 (ins,
> > + v_mode, sizeflag);
> 
> v_mode is, afaics, properly passed into here. Why would you open-code that,
> instead of using bytemode? Not doing so will give the compiler more ICF
> opportunities.
> 

I reckon it's a mistake. Have fixed.

BRs,
Lin

Jan Beulich Nov. 14, 2023, 11:15 a.m. UTC | #3

On 14.11.2023 04:26, Hu, Lin1 wrote:
>  > On 02.11.2023 12:29, Cui, Lili wrote:
>>> --- a/gas/config/tc-i386.c
>>> +++ b/gas/config/tc-i386.c
>>> @@ -7790,7 +7790,8 @@ match_template (char mnem_suffix)
>>>    if (!quiet_warnings)
>>>      {
>>>        if (!intel_syntax
>>> -	  && (i.jumpabsolute != (t->opcode_modifier.jump ==
>> JUMP_ABSOLUTE)))
>>> +	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
>>> +	  && t->mnem_off != MN_jmpabs)
>>>  	as_warn (_("indirect %s without `*'"), insn_name (t));
>>
>> Coming back to this, which I did comment on before already. The insn taking an
>> immediate operand doesn't really justify this, as it leaves open the underlying
>> question of why you use JumpAbsolute in the insn template in the first place. I've
>> gone through all the uses of JUMP_ABSOLUTE, and I didn't find any where the
>> respective handling would be applicable here. In fact it's unclear whether the
>> insn needs marking as any JUMP_* at all: It's not really different from, say, "mov
>> $imm, %rcx".
>>
> 
> According to the doc JMPABS is a 64-bit only ISA extension, and acts as a near-direct branch with an absolute target. I made this markup simply because I was mimicking other jmp's.

"absolute target" can have different meaning. There's nothing wrong with the
linker establishing the ultimate value; it may well not be an assembly-time
constant. In terms of ELF relocations it simply wouldn't be R_X86_64_PC64
(resembling R_X86_64_PC32 used for other direct branches), but R_X86_64_64.

> If we don't need the attribute, I'm glad I can remove it.

Please do, unless you can explain why you add it.

>> There's a further question regarding its operand representation,
>> though: Can you explain why it's Imm64, not Disp64? The latter would, to me,
>> seem more natural to use here. Not only from a assembler internals perspective,
>> but also from the users' one: The $ in the operand carries absolutely no meaning
>> (see also the related testcase comment below) in AT&T syntax, and there's no
>> noticeable difference in Intel syntax afaict.
> 
> In my opinion, If compiler want  to jump "anywhere" and the displacement can not fit in a 32-bit immediate , compiler will fallback to indirect branches.

Unless it knows that it may use this ISA extension.

> My current knowledge is that jmpabs came about as a solution to the problem about indirect braches. It's not the same as the jmp. Currently the parameters of jmpabs are absolute addresses optimized by PLT or JIT. I think using imm64 avoids confusion with the disp64. That's why the designer designed it this way.
> 
> One colleague in our group have written an introductory document can be referred. (https://kanrobert.github.io/rfc/All-about-APX-JMPABS/)

Well, "Compiler usages" there ignores other than small-code-model programs.
Furthermore "none currently" doesn't mean the assembler shouldn't support
all possible uses. If I was going from what's said there, there wouldn't be
a need to support JMPABS in gas at all.

>>> @@ -8939,6 +8940,9 @@ process_operands (void)
>>>  	}
>>>      }
>>>
>>> +  if (i.tm.mnem_off == MN_jmpabs)
>>> +    i.rex2_encoding = true;
>>
>> Please see my earlier remarks wrt "rex2" vs "{rex2}". What you do here is effect
>> the latter. Yet as indicated, the pseudo-prefix isn't really an indication of "must
>> have REX2 prefix", but only a weak request to do so if possible. I think you want
>> to set i.rex2 here instead, requiring a way to express that an empty REX2 prefix
>> is wanted.
>>
> 
> But in terms of encoding, i.rex2 should be 0. Can I do special handling in build_rex2_prefix? 

build_rex2_prefix() wants to remain generic. What I was trying to hint at though
is that it ought to be possible to set bits in i.rex2 (to make it non-zero) which
then aren't encoded into the REX2 payload byte (leveraging that only the low
three bits are actually contributing to the final encoding). The important point
is that both i.rex2 and i.rex2_encoding retain the specific meaning they are
intended to have.

>>> --- /dev/null
>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
>>> @@ -0,0 +1,17 @@
>>> +# Check bytecode of APX_F jmpabs instructions with illegal encode.
>>> +
>>> +	.text
>>> +# With 66 prefix
>>> +	.byte
>> 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +	.byte 0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +# With 67 prefix
>>> +	.byte
>> 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +	.byte 0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +# With F2 prefix
>>> +	.byte
>> 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +	.byte 0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +# With F3 prefix
>>> +	.byte
>> 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +	.byte 0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>> +# REX2.M0 = 0 REX2.W = 1
>>> +	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>
>> As per earlier comments: This wants expressing via .insn, to yield input to gas
>> human-readable (even if, as it looks, two .insn are going to be required per
>> resulting construct). Further in the last comment, why is
>> REX2.M0 mentioned there, but not elsewhere? Also what purpose serve the
>> 0x64 bytes here? The encodings are invalid irrespective of them. Instead I'd kind
>> have expected LOCK to also be covered.
>>
> 
> Because this error line is only for the special case where M0 == 0, and base_opcode == 0xa1, W should be 0, other than 1. If M0 = 1, W = 1, base_opcode == 0xa1, I think it could decoding as mov rax, moffs or ( some future insn). Elsewhere it's just excluding invalid prefixes.

Yet REX2.M == 0 is as relevant there (until such time where some of those
prefixes used is assigned meaning).

> I don't see in the docs that it triggers #UD, am I missing something?

What's "it" here?

>> Also a spec question as we're talking of what is or is not valid (i.e.
>> causing #UD) here: Why would XCR0.APX=0 need to cause #UD? There's no use
>> of eGPR-s here.
>>
> 
> Sorry, what is XCR0.APX?

Bit 19 of the XCR0 register. It is mentioned in exactly this way in the
APX-LEGACY-JMPABS exception class description.

>>> --- /dev/null
>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs.s
>>> @@ -0,0 +1,10 @@
>>> +# Check 64bit APX_F JMPABS instructions
>>> +
>>> +	.text
>>> + _start:
>>> +	jmpabs	      $0x0202020202020202
>>> +	jmpabs	      $0x2
>>> +
>>> +.intel_syntax noprefix
>>> +	jmpabs	      0x0202020202020202
>>> +	jmpabs	      0x2
>>
>> I expect this isn't going to be the normal use of the insn. Instead I would foresee
>> the typical users to be "jmpabs symbol" (and - as per above - intentionally
>> omitting the $ already). IOW the testcase also wants to cover the case requiring
>> a relocation, including a check that the correct relocation is emitted (covering
>> both ELF and COFF; I'm not going to insist on also covering Mach-O, as - for a
>> reason that escapes me - gas can't even be configured for x86_64-*-darwin*).
>>
> 
> Based on the previous discussion, we think that this usage is not currently supported. If users want to use symbol, I think they can use "jmp symbol".

See my respective comments above, the summary of which was "Then why add it to
gas in the first place?"

>>> --- a/opcodes/i386-dis.c
>>> +++ b/opcodes/i386-dis.c
>>> @@ -106,6 +106,7 @@ static bool MOVSXD_Fixup (instr_info *, int, int);
>>> static bool DistinctDest_Fixup (instr_info *, int, int);  static bool
>>> PREFETCHI_Fixup (instr_info *, int, int);  static bool
>>> PUSH2_POP2_Fixup (instr_info *, int, int);
>>> +static bool JMPABS_Fixup (instr_info *, int, int);
>>>
>>>  static void ATTRIBUTE_PRINTF_3 i386_dis_printf (const disassemble_info *,
>>>  						enum disassembler_style,
>>> @@ -258,6 +259,9 @@ struct instr_info
>>>    char scale_char;
>>>
>>>    enum x86_64_isa isa64;
>>> +
>>> +  /* Remember if the current op is jmpabs.  */  bool is_jmpabs;
>>>  };
>>
>> This field would probably best live next to op_is_jump (and then also be named
>> op_is_jmpabs, assuming a separate boolean is indeed needed).
>> I further expect that op_is_jump also wants setting for JMPABS.
>>
> 
> Can I change op_is_jump's type from bool to unsigned int?

If you need it to hold a 3rd value, perhaps. Albeit more to an enum then
than to unsigned int.

>>> @@ -2032,7 +2036,7 @@ static const struct dis386 dis386[] = {
>>>    { "lahf",		{ XX }, 0 },
>>>    /* a0 */
>>>    { "mov%LB",		{ AL, Ob }, PREFIX_REX2_ILLEGAL },
>>> -  { "mov%LS",		{ eAX, Ov }, PREFIX_REX2_ILLEGAL },
>>> +  { "mov%LS",		{ { JMPABS_Fixup, eAX_reg }, { JMPABS_Fixup,
>> v_mode } }, PREFIX_REX2_ILLEGAL },
>>>    { "mov%LB",		{ Ob, AL }, PREFIX_REX2_ILLEGAL },
>>>    { "mov%LS",		{ Ov, eAX }, PREFIX_REX2_ILLEGAL },
>>>    { "movs{b|}",		{ Ybr, Xb }, PREFIX_REX2_ILLEGAL },
>>> @@ -9648,7 +9652,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int
>> intel_syntax)
>>>      }
>>>
>>>    if ((dp->prefix_requirement & PREFIX_REX2_ILLEGAL)
>>> -      && ins.last_rex2_prefix >= 0)
>>> +      && ins.last_rex2_prefix >= 0 && !ins.is_jmpabs)
>>>      {
>>>        i386_dis_printf (info, dis_style_text, "(bad)");
>>>        ret = ins.end_codep - priv.the_buffer; @@ -13857,3 +13861,38 @@
>>> PUSH2_POP2_Fixup (instr_info *ins, int bytemode, int sizeflag)
>>>
>>>    return OP_VEX (ins, bytemode, sizeflag);  }
>>> +
>>> +static bool
>>> +JMPABS_Fixup (instr_info *ins, int bytemode, int sizeflag) {
>>> +  if (ins->address_mode == mode_64bit
>>> +      && ins->last_rex2_prefix >= 0
>>> +      && (ins->rex2 & 0x80) == 0x0)
>>> +    {
>>> +      uint64_t op;
>>> +
>>> +      if (bytemode == eAX_reg)
>>> +	return true;
>>> +
>>> +      if (!get64 (ins, &op))
>>> +	return false;
>>> +
>>> +      if ((ins->prefixes & (PREFIX_OPCODE | PREFIX_ADDR)) != 0x0
>>> +	  || (ins->rex & REX_W) != 0x0)
>>> +	{
>>> +	  oappend (ins, "(bad)");
>>> +	  return true;
>>> +	}
>>> +
>>> +      ins->mnemonicendp = stpcpy (ins->obuf, "jmpabs");
>>> +      ins->all_prefixes[ins->last_rex2_prefix] = 0;
>>
>> This doesn't look right. REX2.{R,X,B}{3,4} set still want recording in the output. I
>> expect you may need to set a bit in rex2_used here, but how exactly that ought
>> to work depends on how comments on earlier patches are going to be
>> addressed. This may then also eliminate the need for ...
>>
>>> +      ins->is_jmpabs = true;
>>
>> ... this field, which likely will be covered by a more generic approach.
>>
>>
> 
> Then this part of the discussion, as well as the modifications, I will wait for the front patch to be finalized.

I suggested to Lili to go with the simplified approach for now, printing
just {rex2} but no details. Yet such minimal printing may still be needed
here then as well, consistent with what is (going to be) done in the
earlier patch. ("Consistent" as in: If nothing would be printed there,
printing nothing would be the goal here as well then.)

Jan

Hu, Lin1 Nov. 24, 2023, 5:40 a.m. UTC | #4

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, November 14, 2023 7:15 PM
> To: Hu, Lin1 <lin1.hu@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; ccoutant@gmail.com;
> binutils@sourceware.org; Cui, Lili <lili.cui@intel.com>
> Subject: Re: [PATCH 8/8] Support APX JMPABS
> 
> On 14.11.2023 04:26, Hu, Lin1 wrote:
> >  > On 02.11.2023 12:29, Cui, Lili wrote:
> >> There's a further question regarding its operand representation,
> >> though: Can you explain why it's Imm64, not Disp64? The latter would,
> >> to me, seem more natural to use here. Not only from a assembler
> >> internals perspective, but also from the users' one: The $ in the
> >> operand carries absolutely no meaning (see also the related testcase
> >> comment below) in AT&T syntax, and there's no noticeable difference in Intel
> syntax afaict.
> >
> > In my opinion, If compiler want  to jump "anywhere" and the displacement can
> not fit in a 32-bit immediate , compiler will fallback to indirect branches.
> 
> Unless it knows that it may use this ISA extension.
> 
> > My current knowledge is that jmpabs came about as a solution to the problem
> about indirect braches. It's not the same as the jmp. Currently the parameters of
> jmpabs are absolute addresses optimized by PLT or JIT. I think using imm64
> avoids confusion with the disp64. That's why the designer designed it this way.
> >
> > One colleague in our group have written an introductory document can
> > be referred. (https://kanrobert.github.io/rfc/All-about-APX-JMPABS/)
> 
> Well, "Compiler usages" there ignores other than small-code-model programs.
> Furthermore "none currently" doesn't mean the assembler shouldn't support all
> possible uses. If I was going from what's said there, there wouldn't be a need to
> support JMPABS in gas at all.
>

We've decided to only support disassembler for now, and for future support for assembler, we're going to wait for HJ to come back to see if linker can support the way "jmpabs symbols" are used.

>
> >>> @@ -8939,6 +8940,9 @@ process_operands (void)
> >>>  	}
> >>>      }
> >>>
> >>> +  if (i.tm.mnem_off == MN_jmpabs)
> >>> +    i.rex2_encoding = true;
> >>
> >> Please see my earlier remarks wrt "rex2" vs "{rex2}". What you do
> >> here is effect the latter. Yet as indicated, the pseudo-prefix isn't
> >> really an indication of "must have REX2 prefix", but only a weak
> >> request to do so if possible. I think you want to set i.rex2 here
> >> instead, requiring a way to express that an empty REX2 prefix is wanted.
> >>
> >
> > But in terms of encoding, i.rex2 should be 0. Can I do special handling in
> build_rex2_prefix?
> 
> build_rex2_prefix() wants to remain generic. What I was trying to hint at though
> is that it ought to be possible to set bits in i.rex2 (to make it non-zero) which
> then aren't encoded into the REX2 payload byte (leveraging that only the low
> three bits are actually contributing to the final encoding). The important point is
> that both i.rex2 and i.rex2_encoding retain the specific meaning they are
> intended to have.
>

I have set i.rex2 = 16;
 
>
> >>> --- /dev/null
> >>> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
> >>> @@ -0,0 +1,17 @@
> >>> +# Check bytecode of APX_F jmpabs instructions with illegal encode.
> >>> +
> >>> +	.text
> >>> +# With 66 prefix
> >>> +	.byte
> >> 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +	.byte 0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +# With 67 prefix
> >>> +	.byte
> >> 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +	.byte 0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +# With F2 prefix
> >>> +	.byte
> >> 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +	.byte 0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +# With F3 prefix
> >>> +	.byte
> >> 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +	.byte 0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>> +# REX2.M0 = 0 REX2.W = 1
> >>> +	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>
> >> As per earlier comments: This wants expressing via .insn, to yield
> >> input to gas human-readable (even if, as it looks, two .insn are
> >> going to be required per resulting construct). Further in the last
> >> comment, why is
> >> REX2.M0 mentioned there, but not elsewhere? Also what purpose serve
> >> the
> >> 0x64 bytes here? The encodings are invalid irrespective of them.
> >> Instead I'd kind have expected LOCK to also be covered.
> >>
> >
> > Because this error line is only for the special case where M0 == 0, and
> base_opcode == 0xa1, W should be 0, other than 1. If M0 = 1, W = 1,
> base_opcode == 0xa1, I think it could decoding as mov rax, moffs or ( some
> future insn). Elsewhere it's just excluding invalid prefixes.
> 
> Yet REX2.M == 0 is as relevant there (until such time where some of those
> prefixes used is assigned meaning).
>

I don't think so. the original result with W = 1 was MOV RAX, moffs, but the insn can't support REX2. So If someone input .byte REX2.M = 0 && REX2.W = 1, it should be a Bad_Opcode. 

> 
> >> Also a spec question as we're talking of what is or is not valid (i.e.
> >> causing #UD) here: Why would XCR0.APX=0 need to cause #UD? There's no
> >> use of eGPR-s here.
> >>
> >
> > Sorry, what is XCR0.APX?
> 
> Bit 19 of the XCR0 register. It is mentioned in exactly this way in the APX-
> LEGACY-JMPABS exception class description.
>

I think XCR0.APX is a state bit to control if support APX instruction set not if support eGPR-s.
 
>
> >>> --- a/opcodes/i386-dis.c
> >>> +++ b/opcodes/i386-dis.c
> >>> @@ -106,6 +106,7 @@ static bool MOVSXD_Fixup (instr_info *, int,
> >>> int); static bool DistinctDest_Fixup (instr_info *, int, int);
> >>> static bool PREFETCHI_Fixup (instr_info *, int, int);  static bool
> >>> PUSH2_POP2_Fixup (instr_info *, int, int);
> >>> +static bool JMPABS_Fixup (instr_info *, int, int);
> >>>
> >>>  static void ATTRIBUTE_PRINTF_3 i386_dis_printf (const disassemble_info *,
> >>>  						enum disassembler_style,
> >>> @@ -258,6 +259,9 @@ struct instr_info
> >>>    char scale_char;
> >>>
> >>>    enum x86_64_isa isa64;
> >>> +
> >>> +  /* Remember if the current op is jmpabs.  */  bool is_jmpabs;
> >>>  };
> >>
> >> This field would probably best live next to op_is_jump (and then also
> >> be named op_is_jmpabs, assuming a separate boolean is indeed needed).
> >> I further expect that op_is_jump also wants setting for JMPABS.
> >>
> >
> > Can I change op_is_jump's type from bool to unsigned int?
> 
> If you need it to hold a 3rd value, perhaps. Albeit more to an enum then than to
> unsigned int.
>

Ok, Have modified.
 
BRs,
Lin

Jan Beulich Nov. 24, 2023, 7:21 a.m. UTC | #5

On 24.11.2023 06:40, Hu, Lin1 wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Tuesday, November 14, 2023 7:15 PM
>>
>> On 14.11.2023 04:26, Hu, Lin1 wrote:
>>>  > On 02.11.2023 12:29, Cui, Lili wrote:
>>>>> @@ -8939,6 +8940,9 @@ process_operands (void)
>>>>>  	}
>>>>>      }
>>>>>
>>>>> +  if (i.tm.mnem_off == MN_jmpabs)
>>>>> +    i.rex2_encoding = true;
>>>>
>>>> Please see my earlier remarks wrt "rex2" vs "{rex2}". What you do
>>>> here is effect the latter. Yet as indicated, the pseudo-prefix isn't
>>>> really an indication of "must have REX2 prefix", but only a weak
>>>> request to do so if possible. I think you want to set i.rex2 here
>>>> instead, requiring a way to express that an empty REX2 prefix is wanted.
>>>>
>>>
>>> But in terms of encoding, i.rex2 should be 0. Can I do special handling in
>> build_rex2_prefix?
>>
>> build_rex2_prefix() wants to remain generic. What I was trying to hint at though
>> is that it ought to be possible to set bits in i.rex2 (to make it non-zero) which
>> then aren't encoded into the REX2 payload byte (leveraging that only the low
>> three bits are actually contributing to the final encoding). The important point is
>> that both i.rex2 and i.rex2_encoding retain the specific meaning they are
>> intended to have.
>>
> 
> I have set i.rex2 = 16;

But hopefully not exactly this way, but using a self-descriptive #define for
the integer literal.

>>>>> --- /dev/null
>>>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
>>>>> @@ -0,0 +1,17 @@
>>>>> +# Check bytecode of APX_F jmpabs instructions with illegal encode.
>>>>> +
>>>>> +	.text
>>>>> +# With 66 prefix
>>>>> +	.byte
>>>> 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +	.byte 0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +# With 67 prefix
>>>>> +	.byte
>>>> 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +	.byte 0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +# With F2 prefix
>>>>> +	.byte
>>>> 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +	.byte 0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +# With F3 prefix
>>>>> +	.byte
>>>> 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +	.byte 0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>> +# REX2.M0 = 0 REX2.W = 1
>>>>> +	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>
>>>> As per earlier comments: This wants expressing via .insn, to yield
>>>> input to gas human-readable (even if, as it looks, two .insn are
>>>> going to be required per resulting construct). Further in the last
>>>> comment, why is
>>>> REX2.M0 mentioned there, but not elsewhere? Also what purpose serve
>>>> the
>>>> 0x64 bytes here? The encodings are invalid irrespective of them.
>>>> Instead I'd kind have expected LOCK to also be covered.
>>>>
>>>
>>> Because this error line is only for the special case where M0 == 0, and
>> base_opcode == 0xa1, W should be 0, other than 1. If M0 = 1, W = 1,
>> base_opcode == 0xa1, I think it could decoding as mov rax, moffs or ( some
>> future insn). Elsewhere it's just excluding invalid prefixes.
>>
>> Yet REX2.M == 0 is as relevant there (until such time where some of those
>> prefixes used is assigned meaning).
>>
> 
> I don't think so. the original result with W = 1 was MOV RAX, moffs, but the insn can't support REX2. So If someone input .byte REX2.M = 0 && REX2.W = 1, it should be a Bad_Opcode. 

I fully agree with this. But that doesn't invalidate my original comment:
REX2.M is relevant in all of these tests, and hence comments end up
inconsistent. At the risk of repeating myself - especially when you use
.byte for encoding an instruction, what that (unreadable) byte sequence
is supposed to encode wants to be properly stated in a comment then.
When using .insn, some parts may become self-descriptive, hence why
generally .insn ought to be preferred over .byte.

>>>> Also a spec question as we're talking of what is or is not valid (i.e.
>>>> causing #UD) here: Why would XCR0.APX=0 need to cause #UD? There's no
>>>> use of eGPR-s here.
>>>>
>>>
>>> Sorry, what is XCR0.APX?
>>
>> Bit 19 of the XCR0 register. It is mentioned in exactly this way in the APX-
>> LEGACY-JMPABS exception class description.
>>
> 
> I think XCR0.APX is a state bit to control if support APX instruction set not if support eGPR-s.

No, XCR0 very certainly is a set of controls affecting register use. If
JMPABS is also controlled by it, then I'd view this as an erratum if it
appeared like that in silicon; that erratum may well be a design one then,
or one "justified" by simplifying the implementation in some way, but it
would still be wrong from a conceptual pov.

Jan

Hu, Lin1 Nov. 27, 2023, 2:16 a.m. UTC | #6

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, November 24, 2023 3:21 PM
> To: Hu, Lin1 <lin1.hu@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; ccoutant@gmail.com;
> binutils@sourceware.org; Cui, Lili <lili.cui@intel.com>
> Subject: Re: [PATCH 8/8] Support APX JMPABS
> 
> On 24.11.2023 06:40, Hu, Lin1 wrote:
> >> -----Original Message-----
> >> From: Jan Beulich <jbeulich@suse.com>
> >> Sent: Tuesday, November 14, 2023 7:15 PM
> >>
> >> On 14.11.2023 04:26, Hu, Lin1 wrote:
> >>>  > On 02.11.2023 12:29, Cui, Lili wrote:
> >>>>> @@ -8939,6 +8940,9 @@ process_operands (void)
> >>>>>  	}
> >>>>>      }
> >>>>>
> >>>>> +  if (i.tm.mnem_off == MN_jmpabs)
> >>>>> +    i.rex2_encoding = true;
> >>>>
> >>>> Please see my earlier remarks wrt "rex2" vs "{rex2}". What you do
> >>>> here is effect the latter. Yet as indicated, the pseudo-prefix
> >>>> isn't really an indication of "must have REX2 prefix", but only a
> >>>> weak request to do so if possible. I think you want to set i.rex2
> >>>> here instead, requiring a way to express that an empty REX2 prefix is
> wanted.
> >>>>
> >>>
> >>> But in terms of encoding, i.rex2 should be 0. Can I do special
> >>> handling in
> >> build_rex2_prefix?
> >>
> >> build_rex2_prefix() wants to remain generic. What I was trying to
> >> hint at though is that it ought to be possible to set bits in i.rex2
> >> (to make it non-zero) which then aren't encoded into the REX2 payload
> >> byte (leveraging that only the low three bits are actually
> >> contributing to the final encoding). The important point is that both
> >> i.rex2 and i.rex2_encoding retain the specific meaning they are intended to
> have.
> >>
> >
> > I have set i.rex2 = 16;
> 
> But hopefully not exactly this way, but using a self-descriptive #define for the
> integer literal.
>

OK.

> 
> >>>>> --- /dev/null
> >>>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
> >>>>> @@ -0,0 +1,17 @@
> >>>>> +# Check bytecode of APX_F jmpabs instructions with illegal encode.
> >>>>> +
> >>>>> +	.text
> >>>>> +# With 66 prefix
> >>>>> +	.byte
> >>>> 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +	.byte
> >>>>> +0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +# With 67 prefix
> >>>>> +	.byte
> >>>> 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +	.byte
> >>>>> +0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +# With F2 prefix
> >>>>> +	.byte
> >>>> 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +	.byte
> >>>>> +0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +# With F3 prefix
> >>>>> +	.byte
> >>>> 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +	.byte
> >>>>> +0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>> +# REX2.M0 = 0 REX2.W = 1
> >>>>> +	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
> >>>>
> >>>> As per earlier comments: This wants expressing via .insn, to yield
> >>>> input to gas human-readable (even if, as it looks, two .insn are
> >>>> going to be required per resulting construct). Further in the last
> >>>> comment, why is
> >>>> REX2.M0 mentioned there, but not elsewhere? Also what purpose serve
> >>>> the
> >>>> 0x64 bytes here? The encodings are invalid irrespective of them.
> >>>> Instead I'd kind have expected LOCK to also be covered.
> >>>>
> >>>
> >>> Because this error line is only for the special case where M0 == 0,
> >>> and
> >> base_opcode == 0xa1, W should be 0, other than 1. If M0 = 1, W = 1,
> >> base_opcode == 0xa1, I think it could decoding as mov rax, moffs or (
> >> some future insn). Elsewhere it's just excluding invalid prefixes.
> >>
> >> Yet REX2.M == 0 is as relevant there (until such time where some of
> >> those prefixes used is assigned meaning).
> >>
> >
> > I don't think so. the original result with W = 1 was MOV RAX, moffs, but the
> insn can't support REX2. So If someone input .byte REX2.M = 0 && REX2.W = 1, it
> should be a Bad_Opcode.
> 
> I fully agree with this. But that doesn't invalidate my original comment:
> REX2.M is relevant in all of these tests, and hence comments end up inconsistent.
> At the risk of repeating myself - especially when you use .byte for encoding an
> instruction, what that (unreadable) byte sequence is supposed to encode wants
> to be properly stated in a comment then.
> When using .insn, some parts may become self-descriptive, hence why
> generally .insn ought to be preferred over .byte.
>

OK, I will add REX2.M = 0 in other comments. I'm afraid I can't use .insn, because I don't know how to express d5.

 
>
> >>>> Also a spec question as we're talking of what is or is not valid (i.e.
> >>>> causing #UD) here: Why would XCR0.APX=0 need to cause #UD? There's
> >>>> no use of eGPR-s here.
> >>>>
> >>>
> >>> Sorry, what is XCR0.APX?
> >>
> >> Bit 19 of the XCR0 register. It is mentioned in exactly this way in
> >> the APX- LEGACY-JMPABS exception class description.
> >>
> >
> > I think XCR0.APX is a state bit to control if support APX instruction set not if
> support eGPR-s.
> 
> No, XCR0 very certainly is a set of controls affecting register use. If JMPABS is
> also controlled by it, then I'd view this as an erratum if it appeared like that in
> silicon; that erratum may well be a design one then, or one "justified" by
> simplifying the implementation in some way, but it would still be wrong from a
> conceptual pov.
> 

In APX.pdf (https://cdrdv2.intel.com/v1/dl/getContent/784266) section 3.1.4.2, XCR0 govern APX State and prefixes. And In sdm.pdf (https://cdrdv2.intel.com/v1/dl/getContent/671200), section 13.3 page 323,  "Software can excute Intel AVX-512 instructions only if CR4.OSXSAVE = 1 and XCR0[7:5] = 111b." Their focus is on XCR0[7:5], not on what corresponding registers are used, For example, 
page 567 Table 2-37, alougth bit 6 is used for the upper 256 bits of the register ZMM0-ZMM15, 256 encoding instructions still require to set the bit.

I admit that in the earliest introductions of XCR0[7:5], it looks like it's for register state use only, but from actual use later on, they're more of an instruction state, but from the introductions of the XCR0.APX bit, that's not a problem.

BRs,
Lin

Jan Beulich Nov. 27, 2023, 8:03 a.m. UTC | #7

On 27.11.2023 03:16, Hu, Lin1 wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Friday, November 24, 2023 3:21 PM
>>
>> On 24.11.2023 06:40, Hu, Lin1 wrote:
>>>> -----Original Message-----
>>>> From: Jan Beulich <jbeulich@suse.com>
>>>> Sent: Tuesday, November 14, 2023 7:15 PM
>>>>
>>>> On 14.11.2023 04:26, Hu, Lin1 wrote:
>>>>>  > On 02.11.2023 12:29, Cui, Lili wrote:
>>>>>>> --- /dev/null
>>>>>>> +++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
>>>>>>> @@ -0,0 +1,17 @@
>>>>>>> +# Check bytecode of APX_F jmpabs instructions with illegal encode.
>>>>>>> +
>>>>>>> +	.text
>>>>>>> +# With 66 prefix
>>>>>>> +	.byte
>>>>>> 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +	.byte
>>>>>>> +0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +# With 67 prefix
>>>>>>> +	.byte
>>>>>> 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +	.byte
>>>>>>> +0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +# With F2 prefix
>>>>>>> +	.byte
>>>>>> 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +	.byte
>>>>>>> +0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +# With F3 prefix
>>>>>>> +	.byte
>>>>>> 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +	.byte
>>>>>>> +0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>> +# REX2.M0 = 0 REX2.W = 1
>>>>>>> +	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
>>>>>>
>>>>>> As per earlier comments: This wants expressing via .insn, to yield
>>>>>> input to gas human-readable (even if, as it looks, two .insn are
>>>>>> going to be required per resulting construct). Further in the last
>>>>>> comment, why is
>>>>>> REX2.M0 mentioned there, but not elsewhere? Also what purpose serve
>>>>>> the
>>>>>> 0x64 bytes here? The encodings are invalid irrespective of them.
>>>>>> Instead I'd kind have expected LOCK to also be covered.
>>>>>>
>>>>>
>>>>> Because this error line is only for the special case where M0 == 0,
>>>>> and
>>>> base_opcode == 0xa1, W should be 0, other than 1. If M0 = 1, W = 1,
>>>> base_opcode == 0xa1, I think it could decoding as mov rax, moffs or (
>>>> some future insn). Elsewhere it's just excluding invalid prefixes.
>>>>
>>>> Yet REX2.M == 0 is as relevant there (until such time where some of
>>>> those prefixes used is assigned meaning).
>>>>
>>>
>>> I don't think so. the original result with W = 1 was MOV RAX, moffs, but the
>> insn can't support REX2. So If someone input .byte REX2.M = 0 && REX2.W = 1, it
>> should be a Bad_Opcode.
>>
>> I fully agree with this. But that doesn't invalidate my original comment:
>> REX2.M is relevant in all of these tests, and hence comments end up inconsistent.
>> At the risk of repeating myself - especially when you use .byte for encoding an
>> instruction, what that (unreadable) byte sequence is supposed to encode wants
>> to be properly stated in a comment then.
>> When using .insn, some parts may become self-descriptive, hence why
>> generally .insn ought to be preferred over .byte.
>>
> 
> OK, I will add REX2.M = 0 in other comments. I'm afraid I can't use .insn, because I don't know how to express d5.

Things are going to already be far better if you use .byte only for the REX2
prefix, but a normal insn or .insn for the "main part". Then it'll be easily
recognizable which registers are involved in the operation, or in addressing
a memory operand. Of course you won't be able to use 64-bit register names,
as that would lead to unwanted REX prefixes. But using 32-bit register names
there fully serves the purpose, even when the actual encoding has REX2.W=1.

>>>>>> Also a spec question as we're talking of what is or is not valid (i.e.
>>>>>> causing #UD) here: Why would XCR0.APX=0 need to cause #UD? There's
>>>>>> no use of eGPR-s here.
>>>>>>
>>>>>
>>>>> Sorry, what is XCR0.APX?
>>>>
>>>> Bit 19 of the XCR0 register. It is mentioned in exactly this way in
>>>> the APX- LEGACY-JMPABS exception class description.
>>>>
>>>
>>> I think XCR0.APX is a state bit to control if support APX instruction set not if
>> support eGPR-s.
>>
>> No, XCR0 very certainly is a set of controls affecting register use. If JMPABS is
>> also controlled by it, then I'd view this as an erratum if it appeared like that in
>> silicon; that erratum may well be a design one then, or one "justified" by
>> simplifying the implementation in some way, but it would still be wrong from a
>> conceptual pov.
>>
> 
> In APX.pdf (https://cdrdv2.intel.com/v1/dl/getContent/784266) section 3.1.4.2, XCR0 govern APX State and prefixes. And In sdm.pdf (https://cdrdv2.intel.com/v1/dl/getContent/671200), section 13.3 page 323,  "Software can excute Intel AVX-512 instructions only if CR4.OSXSAVE = 1 and XCR0[7:5] = 111b." Their focus is on XCR0[7:5], not on what corresponding registers are used, For example, 
> page 567 Table 2-37, alougth bit 6 is used for the upper 256 bits of the register ZMM0-ZMM15, 256 encoding instructions still require to set the bit.
> 
> I admit that in the earliest introductions of XCR0[7:5], it looks like it's for register state use only, but from actual use later on, they're more of an instruction state, but from the introductions of the XCR0.APX bit, that's not a problem.

Since you take AVX512 for analogy: Can you please point me at an insn which
doesn't use any AVX512 register covered by said three XCR0 bits? Talking of
insn use and talking of register use simply is the same there. Hence the
analogy cannot be used when discussing JMPABS. Furthermore the specific
wording in SDM, ISE, or APX doc also cannot be blindly trusted. What matters
is how silicon is going to behave, and for JMPABS my impression is that if
a dependency on XCR0 was existing there, it would have been introduced
artificially, i.e. without real need. _That's_ what I'm putting under
question.

Jan

Hu, Lin1 Nov. 27, 2023, 8:46 a.m. UTC | #8

> >>>>>> Also a spec question as we're talking of what is or is not valid (i.e.
> >>>>>> causing #UD) here: Why would XCR0.APX=0 need to cause #UD?
> >>>>>> There's no use of eGPR-s here.
> >>>>>>
> >>>>>
> >>>>> Sorry, what is XCR0.APX?
> >>>>
> >>>> Bit 19 of the XCR0 register. It is mentioned in exactly this way in
> >>>> the APX- LEGACY-JMPABS exception class description.
> >>>>
> >>>
> >>> I think XCR0.APX is a state bit to control if support APX
> >>> instruction set not if
> >> support eGPR-s.
> >>
> >> No, XCR0 very certainly is a set of controls affecting register use.
> >> If JMPABS is also controlled by it, then I'd view this as an erratum
> >> if it appeared like that in silicon; that erratum may well be a
> >> design one then, or one "justified" by simplifying the implementation
> >> in some way, but it would still be wrong from a conceptual pov.
> >>
> >
> > In APX.pdf (https://cdrdv2.intel.com/v1/dl/getContent/784266) section
> > 3.1.4.2, XCR0 govern APX State and prefixes. And In sdm.pdf
> (https://cdrdv2.intel.com/v1/dl/getContent/671200), section 13.3 page 323,
> "Software can excute Intel AVX-512 instructions only if CR4.OSXSAVE = 1 and
> XCR0[7:5] = 111b." Their focus is on XCR0[7:5], not on what corresponding
> registers are used, For example, page 567 Table 2-37, alougth bit 6 is used for
> the upper 256 bits of the register ZMM0-ZMM15, 256 encoding instructions still
> require to set the bit.
> >
> > I admit that in the earliest introductions of XCR0[7:5], it looks like it's for
> register state use only, but from actual use later on, they're more of an
> instruction state, but from the introductions of the XCR0.APX bit, that's not a
> problem.
> 
> Since you take AVX512 for analogy: Can you please point me at an insn which
> doesn't use any AVX512 register covered by said three XCR0 bits? Talking of insn
> use and talking of register use simply is the same there. Hence the analogy
> cannot be used when discussing JMPABS. Furthermore the specific wording in
> SDM, ISE, or APX doc also cannot be blindly trusted. What matters is how silicon
> is going to behave, and for JMPABS my impression is that if a dependency on
> XCR0 was existing there, it would have been introduced artificially, i.e. without
> real need. _That's_ what I'm putting under question.
> 

If I use "{evex} vaddpd ymm0, ymm1, ymm2". If it's for register considerations, I think bit 5, 6, 7 can all be zero. Because the insn doesn't use k0-k7 and the upper 256 bits of the registers ZMM0-ZMM15. 

BRs,
Lin

Jan Beulich Nov. 27, 2023, 8:54 a.m. UTC | #9

On 27.11.2023 09:46, Hu, Lin1 wrote:
>>>>>>>> Also a spec question as we're talking of what is or is not valid (i.e.
>>>>>>>> causing #UD) here: Why would XCR0.APX=0 need to cause #UD?
>>>>>>>> There's no use of eGPR-s here.
>>>>>>>>
>>>>>>>
>>>>>>> Sorry, what is XCR0.APX?
>>>>>>
>>>>>> Bit 19 of the XCR0 register. It is mentioned in exactly this way in
>>>>>> the APX- LEGACY-JMPABS exception class description.
>>>>>>
>>>>>
>>>>> I think XCR0.APX is a state bit to control if support APX
>>>>> instruction set not if
>>>> support eGPR-s.
>>>>
>>>> No, XCR0 very certainly is a set of controls affecting register use.
>>>> If JMPABS is also controlled by it, then I'd view this as an erratum
>>>> if it appeared like that in silicon; that erratum may well be a
>>>> design one then, or one "justified" by simplifying the implementation
>>>> in some way, but it would still be wrong from a conceptual pov.
>>>>
>>>
>>> In APX.pdf (https://cdrdv2.intel.com/v1/dl/getContent/784266) section
>>> 3.1.4.2, XCR0 govern APX State and prefixes. And In sdm.pdf
>> (https://cdrdv2.intel.com/v1/dl/getContent/671200), section 13.3 page 323,
>> "Software can excute Intel AVX-512 instructions only if CR4.OSXSAVE = 1 and
>> XCR0[7:5] = 111b." Their focus is on XCR0[7:5], not on what corresponding
>> registers are used, For example, page 567 Table 2-37, alougth bit 6 is used for
>> the upper 256 bits of the register ZMM0-ZMM15, 256 encoding instructions still
>> require to set the bit.
>>>
>>> I admit that in the earliest introductions of XCR0[7:5], it looks like it's for
>> register state use only, but from actual use later on, they're more of an
>> instruction state, but from the introductions of the XCR0.APX bit, that's not a
>> problem.
>>
>> Since you take AVX512 for analogy: Can you please point me at an insn which
>> doesn't use any AVX512 register covered by said three XCR0 bits? Talking of insn
>> use and talking of register use simply is the same there. Hence the analogy
>> cannot be used when discussing JMPABS. Furthermore the specific wording in
>> SDM, ISE, or APX doc also cannot be blindly trusted. What matters is how silicon
>> is going to behave, and for JMPABS my impression is that if a dependency on
>> XCR0 was existing there, it would have been introduced artificially, i.e. without
>> real need. _That's_ what I'm putting under question.
>>
> 
> If I use "{evex} vaddpd ymm0, ymm1, ymm2". If it's for register considerations, I think bit 5, 6, 7 can all be zero. Because the insn doesn't use k0-k7 and the upper 256 bits of the registers ZMM0-ZMM15. 

How that? It clears the upper 256 bits of the destination register.

Jan

Hu, Lin1 Nov. 27, 2023, 9:03 a.m. UTC | #10

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Monday, November 27, 2023 4:55 PM
> To: Hu, Lin1 <lin1.hu@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; ccoutant@gmail.com;
> binutils@sourceware.org; Cui, Lili <lili.cui@intel.com>
> Subject: Re: [PATCH 8/8] Support APX JMPABS
> 
> On 27.11.2023 09:46, Hu, Lin1 wrote:
> >>>>>>>> Also a spec question as we're talking of what is or is not valid (i.e.
> >>>>>>>> causing #UD) here: Why would XCR0.APX=0 need to cause #UD?
> >>>>>>>> There's no use of eGPR-s here.
> >>>>>>>>
> >>>>>>>
> >>>>>>> Sorry, what is XCR0.APX?
> >>>>>>
> >>>>>> Bit 19 of the XCR0 register. It is mentioned in exactly this way
> >>>>>> in the APX- LEGACY-JMPABS exception class description.
> >>>>>>
> >>>>>
> >>>>> I think XCR0.APX is a state bit to control if support APX
> >>>>> instruction set not if
> >>>> support eGPR-s.
> >>>>
> >>>> No, XCR0 very certainly is a set of controls affecting register use.
> >>>> If JMPABS is also controlled by it, then I'd view this as an
> >>>> erratum if it appeared like that in silicon; that erratum may well
> >>>> be a design one then, or one "justified" by simplifying the
> >>>> implementation in some way, but it would still be wrong from a conceptual
> pov.
> >>>>
> >>>
> >>> In APX.pdf (https://cdrdv2.intel.com/v1/dl/getContent/784266)
> >>> section 3.1.4.2, XCR0 govern APX State and prefixes. And In sdm.pdf
> >> (https://cdrdv2.intel.com/v1/dl/getContent/671200), section 13.3 page
> >> 323, "Software can excute Intel AVX-512 instructions only if
> >> CR4.OSXSAVE = 1 and XCR0[7:5] = 111b." Their focus is on XCR0[7:5],
> >> not on what corresponding registers are used, For example, page 567
> >> Table 2-37, alougth bit 6 is used for the upper 256 bits of the
> >> register ZMM0-ZMM15, 256 encoding instructions still require to set the bit.
> >>>
> >>> I admit that in the earliest introductions of XCR0[7:5], it looks
> >>> like it's for
> >> register state use only, but from actual use later on, they're more
> >> of an instruction state, but from the introductions of the XCR0.APX
> >> bit, that's not a problem.
> >>
> >> Since you take AVX512 for analogy: Can you please point me at an insn
> >> which doesn't use any AVX512 register covered by said three XCR0
> >> bits? Talking of insn use and talking of register use simply is the
> >> same there. Hence the analogy cannot be used when discussing JMPABS.
> >> Furthermore the specific wording in SDM, ISE, or APX doc also cannot
> >> be blindly trusted. What matters is how silicon is going to behave,
> >> and for JMPABS my impression is that if a dependency on
> >> XCR0 was existing there, it would have been introduced artificially,
> >> i.e. without real need. _That's_ what I'm putting under question.
> >>
> >
> > If I use "{evex} vaddpd ymm0, ymm1, ymm2". If it's for register considerations,
> I think bit 5, 6, 7 can all be zero. Because the insn doesn't use k0-k7 and the
> upper 256 bits of the registers ZMM0-ZMM15.
> 
> How that? It clears the upper 256 bits of the destination register.
> 

OK, so bit 5 and 7 are not affected, it they are zero, I  think the exception shouldn't be triggered.

BRs,
Lin

Jan Beulich Nov. 27, 2023, 10:32 a.m. UTC | #11

On 27.11.2023 10:03, Hu, Lin1 wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Monday, November 27, 2023 4:55 PM
>>
>> On 27.11.2023 09:46, Hu, Lin1 wrote:
>>> If I use "{evex} vaddpd ymm0, ymm1, ymm2". If it's for register considerations,
>> I think bit 5, 6, 7 can all be zero. Because the insn doesn't use k0-k7 and the
>> upper 256 bits of the registers ZMM0-ZMM15.
>>
>> How that? It clears the upper 256 bits of the destination register.
> 
> OK, so bit 5 and 7 are not affected, it they are zero, I  think the exception shouldn't be triggered.

Except that the spec mandates that the three bits are all set or all clear.
This could have been less strict, but that's too late now. For APX otoh it's
not too late yet to avoid quirky behavior.

Jan

Hu, Lin1 Dec. 4, 2023, 7:33 a.m. UTC | #12

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Monday, November 27, 2023 6:32 PM
> To: Hu, Lin1 <lin1.hu@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; ccoutant@gmail.com;
> binutils@sourceware.org; Cui, Lili <lili.cui@intel.com>
> Subject: Re: [PATCH 8/8] Support APX JMPABS
> 
> On 27.11.2023 10:03, Hu, Lin1 wrote:
> >> -----Original Message-----
> >> From: Jan Beulich <jbeulich@suse.com>
> >> Sent: Monday, November 27, 2023 4:55 PM
> >>
> >> On 27.11.2023 09:46, Hu, Lin1 wrote:
> >>> If I use "{evex} vaddpd ymm0, ymm1, ymm2". If it's for register
> >>> considerations,
> >> I think bit 5, 6, 7 can all be zero. Because the insn doesn't use
> >> k0-k7 and the upper 256 bits of the registers ZMM0-ZMM15.
> >>
> >> How that? It clears the upper 256 bits of the destination register.
> >
> > OK, so bit 5 and 7 are not affected, it they are zero, I  think the exception
> shouldn't be triggered.
> 
> Except that the spec mandates that the three bits are all set or all clear.
> This could have been less strict, but that's too late now. For APX otoh it's not too
> late yet to avoid quirky behavior.
> 

We had a discussion with the people involved. XCR0.APX controls if enable the REX2 and Extended EVEX prefixes in CPU now. So JMPABS is affected by this bit.

New fields in XCR0:
	– APX_F – Intel® APX state and prefixes are governed by XCR0[APX_F=19]. This control bit
	enables Intel® APX ISA by enabling the use of the REX2 and Extended EVEX prefixes in IA-32e
	64-bit mode and by enabling the XSAVE feature set to manage Intel® APX state. Note that in
	64-bit mode, none of the Intel® APX features (including the REX2 and Extended EVEX prefixes
	and all new Intel® APX instructions) can be used until they are XCR0-enabled.

BRs,
Lin

diff mbox series

Patch

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 787108cedc8..42019c61a33 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7790,7 +7790,8 @@  match_template (char mnem_suffix)
   if (!quiet_warnings)
     {
       if (!intel_syntax
-	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
+	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
+	  && t->mnem_off != MN_jmpabs)
 	as_warn (_("indirect %s without `*'"), insn_name (t));
 
       if (t->opcode_modifier.isprefix
@@ -8939,6 +8940,9 @@  process_operands (void)
 	}
     }
 
+  if (i.tm.mnem_off == MN_jmpabs)
+    i.rex2_encoding = true;
+
   /* If a segment was explicitly specified, and the specified segment
      is neither the default nor the one already recorded from a prefix,
      use an opcode prefix to select it.  If we never figured out what
diff --git a/gas/testsuite/gas/i386/apx-jmpabs-inval.l b/gas/testsuite/gas/i386/apx-jmpabs-inval.l
new file mode 100644
index 00000000000..87e7a800f1a
--- /dev/null
+++ b/gas/testsuite/gas/i386/apx-jmpabs-inval.l
@@ -0,0 +1,3 @@ 
+.* Assembler messages:
+.*:5: Error: `jmpabs' is only supported in 64-bit mode
+.*:6: Error: `jmpabs' is only supported in 64-bit mode
diff --git a/gas/testsuite/gas/i386/apx-jmpabs-inval.s b/gas/testsuite/gas/i386/apx-jmpabs-inval.s
new file mode 100644
index 00000000000..1f9f1f80b72
--- /dev/null
+++ b/gas/testsuite/gas/i386/apx-jmpabs-inval.s
@@ -0,0 +1,6 @@ 
+# Check 32bit illegal APX_F JMPABS instructions
+
+	.text
+ _start:
+	jmpabs	      $0x0202020202020202
+	jmpabs	      $0x2
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 75e1a4ca369..9280785d41d 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -510,6 +510,7 @@  if [gas_32_check] then {
     run_dump_test "sm4-intel"
     run_list_test "pbndkb-inval"
     run_list_test "apx-push2pop2-inval"
+    run_list_test "apx-jmpabs-inval"
     run_list_test "sg"
     run_dump_test "clzero"
     run_dump_test "invlpgb"
diff --git a/gas/testsuite/gas/i386/x86-64-apx-jmpabs-intel.d b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-intel.d
new file mode 100644
index 00000000000..3b51aead651
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-intel.d
@@ -0,0 +1,14 @@ 
+#as:
+#objdump: -dw -Mintel
+#name: x86_64 APX_F JMPABS insns (Intel disassembly)
+#source: x86-64-apx-jmpabs.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*d5 00 a1 02 02 02 02 02 02 02 02[	 ]+jmpabs 0x202020202020202
+\s*[a-f0-9]+:\s*d5 00 a1 02 00 00 00 00 00 00 00[	 ]+jmpabs 0x2
+\s*[a-f0-9]+:\s*d5 00 a1 02 02 02 02 02 02 02 02[	 ]+jmpabs 0x202020202020202
+\s*[a-f0-9]+:\s*d5 00 a1 02 00 00 00 00 00 00 00[	 ]+jmpabs 0x2
diff --git a/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.d b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.d
new file mode 100644
index 00000000000..ef3c1fa55e2
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.d
@@ -0,0 +1,55 @@ 
+#as: --64
+#objdump: -dw
+#name: illegal decoding of APX_F jmpabs insns
+#source: x86-64-apx-jmpabs-inval.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <.text>:
+\s*[a-f0-9]+:	66 64 d5 00 a1[ 	 ]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	66 d5 00 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	67 64 d5 00 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	67 d5 00 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	f2 64 d5 00 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	f2 d5 00 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	f3 64 d5 00 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	f3 d5 00 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	d5 08 a1[  	]+\(bad\)
+\s*[a-f0-9]+:	01 00[  	]+add    %eax,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*[a-f0-9]+:	00 00[  	]+add    %al,\(%rax\)
+\s*...
diff --git a/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
new file mode 100644
index 00000000000..e41240972d7
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs-inval.s
@@ -0,0 +1,17 @@ 
+# Check bytecode of APX_F jmpabs instructions with illegal encode.
+
+	.text
+# With 66 prefix
+	.byte 0x66,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+	.byte 0x66,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+# With 67 prefix
+	.byte 0x67,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+	.byte 0x67,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+# With F2 prefix
+	.byte 0xf2,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+	.byte 0xf2,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+# With F3 prefix
+	.byte 0xf3,0x64,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+	.byte 0xf3,0xd5,0x00,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+# REX2.M0 = 0 REX2.W = 1
+	.byte 0xd5,0x08,0xa1,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00
diff --git a/gas/testsuite/gas/i386/x86-64-apx-jmpabs.d b/gas/testsuite/gas/i386/x86-64-apx-jmpabs.d
new file mode 100644
index 00000000000..0c1875230c6
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs.d
@@ -0,0 +1,14 @@ 
+#as:
+#objdump: -dw
+#name: x86_64 APX_F JMPABS insns
+#source: x86-64-apx-jmpabs.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*d5 00 a1 02 02 02 02 02 02 02 02[ 	 ]+jmpabs \$0x202020202020202
+\s*[a-f0-9]+:\s*d5 00 a1 02 00 00 00 00 00 00 00[	 ]+jmpabs \$0x2
+\s*[a-f0-9]+:\s*d5 00 a1 02 02 02 02 02 02 02 02[	 ]+jmpabs \$0x202020202020202
+\s*[a-f0-9]+:\s*d5 00 a1 02 00 00 00 00 00 00 00[	 ]+jmpabs \$0x2
diff --git a/gas/testsuite/gas/i386/x86-64-apx-jmpabs.s b/gas/testsuite/gas/i386/x86-64-apx-jmpabs.s
new file mode 100644
index 00000000000..beb722421bd
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-jmpabs.s
@@ -0,0 +1,10 @@ 
+# Check 64bit APX_F JMPABS instructions
+
+	.text
+ _start:
+	jmpabs	      $0x0202020202020202
+	jmpabs	      $0x2
+
+.intel_syntax noprefix
+	jmpabs	      0x0202020202020202
+	jmpabs	      0x2
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index eab99f9e52b..ad6f7be9c4f 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -371,6 +371,9 @@  run_dump_test "x86-64-apx-evex-promoted"
 run_dump_test "x86-64-apx-evex-promoted-intel"
 run_dump_test "x86-64-apx-evex-egpr"
 run_dump_test "x86-64-apx-ndd"
+run_dump_test "x86-64-apx-jmpabs"
+run_dump_test "x86-64-apx-jmpabs-intel"
+run_dump_test "x86-64-apx-jmpabs-inval"
 run_dump_test "x86-64-avx512f-rcigrz-intel"
 run_dump_test "x86-64-avx512f-rcigrz"
 run_dump_test "x86-64-clwb"
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 825b14ad0dd..d767090aa65 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -106,6 +106,7 @@  static bool MOVSXD_Fixup (instr_info *, int, int);
 static bool DistinctDest_Fixup (instr_info *, int, int);
 static bool PREFETCHI_Fixup (instr_info *, int, int);
 static bool PUSH2_POP2_Fixup (instr_info *, int, int);
+static bool JMPABS_Fixup (instr_info *, int, int);
 
 static void ATTRIBUTE_PRINTF_3 i386_dis_printf (const disassemble_info *,
 						enum disassembler_style,
@@ -258,6 +259,9 @@  struct instr_info
   char scale_char;
 
   enum x86_64_isa isa64;
+
+  /* Remember if the current op is jmpabs.  */
+  bool is_jmpabs;
 };
 
 struct dis_private {
@@ -2032,7 +2036,7 @@  static const struct dis386 dis386[] = {
   { "lahf",		{ XX }, 0 },
   /* a0 */
   { "mov%LB",		{ AL, Ob }, PREFIX_REX2_ILLEGAL },
-  { "mov%LS",		{ eAX, Ov }, PREFIX_REX2_ILLEGAL },
+  { "mov%LS",		{ { JMPABS_Fixup, eAX_reg }, { JMPABS_Fixup, v_mode } }, PREFIX_REX2_ILLEGAL },
   { "mov%LB",		{ Ob, AL }, PREFIX_REX2_ILLEGAL },
   { "mov%LS",		{ Ov, eAX }, PREFIX_REX2_ILLEGAL },
   { "movs{b|}",		{ Ybr, Xb }, PREFIX_REX2_ILLEGAL },
@@ -9648,7 +9652,7 @@  print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
     }
 
   if ((dp->prefix_requirement & PREFIX_REX2_ILLEGAL)
-      && ins.last_rex2_prefix >= 0)
+      && ins.last_rex2_prefix >= 0 && !ins.is_jmpabs)
     {
       i386_dis_printf (info, dis_style_text, "(bad)");
       ret = ins.end_codep - priv.the_buffer;
@@ -13857,3 +13861,38 @@  PUSH2_POP2_Fixup (instr_info *ins, int bytemode, int sizeflag)
 
   return OP_VEX (ins, bytemode, sizeflag);
 }
+
+static bool
+JMPABS_Fixup (instr_info *ins, int bytemode, int sizeflag)
+{
+  if (ins->address_mode == mode_64bit
+      && ins->last_rex2_prefix >= 0
+      && (ins->rex2 & 0x80) == 0x0)
+    {
+      uint64_t op;
+
+      if (bytemode == eAX_reg)
+	return true;
+
+      if (!get64 (ins, &op))
+	return false;
+
+      if ((ins->prefixes & (PREFIX_OPCODE | PREFIX_ADDR)) != 0x0
+	  || (ins->rex & REX_W) != 0x0)
+	{
+	  oappend (ins, "(bad)");
+	  return true;
+	}
+
+      ins->mnemonicendp = stpcpy (ins->obuf, "jmpabs");
+      ins->all_prefixes[ins->last_rex2_prefix] = 0;
+      ins->is_jmpabs = true;
+      oappend_immediate (ins, op);
+
+      return true;
+    }
+
+  if (bytemode == eAX_reg)
+    return OP_IMREG (ins, bytemode, sizeflag);
+  return OP_OFF64 (ins, v_mode, sizeflag);
+}
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 5e36f6f67eb..76f670c0a9d 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -554,6 +554,8 @@  ljmp, 0xea, No64, JumpInterSegment|No_bSuf|No_sSuf|No_qSuf, { Imm16, Imm16|Imm32
 ljmp, 0xff/5, 0, Amd64|Modrm|JumpAbsolute|No_bSuf|No_sSuf|No_qSuf, { Unspecified|BaseIndex }
 ljmp, 0xff/5, x64, Intel64|Modrm|JumpAbsolute|No_bSuf|No_sSuf, { Unspecified|BaseIndex }
 
+jmpabs, 0xa1, APX_F|x64, JumpAbsolute|NoSuf, { Imm64 }
+
 ret, 0xc3, No64, DefaultSize|No_bSuf|No_sSuf|No_qSuf|RepPrefixOk|BNDPrefixOk, {}
 ret, 0xc2, No64, DefaultSize|No_bSuf|No_sSuf|No_qSuf|RepPrefixOk|BNDPrefixOk, { Imm16 }
 ret, 0xc3, x64, Amd64|DefaultSize|No_bSuf|No_lSuf|No_sSuf|NoRex64|RepPrefixOk|BNDPrefixOk, {}