On 25.10.2023 11:11, Hu, Lin1 wrote:
> @@ -5235,6 +5237,17 @@ md_assemble (char *line)
> if (i.imm_operands)
This if() ...
> optimize_imm ();
>
> + /* user_msr instructions can match Imm32 templates when
> + guess_suffix == QWORD_MNEM_SUFFIX. */
> + if (t->mnem_off == MN_urdmsr)
> + i.types[0]
> + = operand_type_or (i.types[0],
> + smallest_imm_type (i.op[0].imms->X_add_number));
> + if (t->mnem_off == MN_uwrmsr)
> + i.types[1]
> + = operand_type_or (i.types[1],
> + smallest_imm_type (i.op[1].imms->X_add_number));
... should now enclose all of these additions as well. Both for
performance reasons (insns without immediates can skip the extra
conditionals) and to avoid latent issues (i.op[].imms is not valid to
de-reference without first checking [or knowing by implication] that
the respective operand actually is an immediate; considering we're
ahead of template matching, that'll need some further adjustment here
anyway).
But then the question is - do you actually need to go through
optimize_imm() for these two insns? Or, worse, is it perhaps even
wrong to do so? It looks at least latently risky to me.
> @@ -7566,6 +7579,18 @@ match_template (char mnem_suffix)
> break;
> }
>
> + /* This pattern aims to put the unusually placed imm operand to a usual
> + place. The constraints are currently only adapted to uwrmsr, and may
> + need further tweaking when new similar instructions become available. */
> + if (i.operands > 0
> + && i.tm.operand_types[0].bitfield.class == Reg
This part is needlessly strict. Altogether I'd suggest that you check that
you have more than one operand, the last is an immediate (as you ...
> + && operand_type_check (i.tm.operand_types[i.operands - 1], imm))
... do already), and the first is not.
Generated code wise the checks would likely be cheaper when done against
the local variable operand_types[].
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
> @@ -0,0 +1,31 @@
> +# Check 64bit USER_MSR instructions
> +
> + .allow_index_reg
This doesn't look to have any meaning here.
> + .text
> +_start:
> + urdmsr %r14, %r12
> + urdmsr %r14, %rax
> + urdmsr %rdx, %r12
> + urdmsr %rdx, %rax
> + urdmsr $51515151, %r12
> + urdmsr $51515151, %rax
> + uwrmsr %r12, %r14
> + uwrmsr %rax, %r14
> + uwrmsr %r12, %rdx
> + uwrmsr %rax, %rdx
> + uwrmsr %r12, $51515151
> + uwrmsr %rax, $51515151
Considering the special handling of immediates, may I ask that you check
further values. E.g. 0x7f, 0x7fff, and 0x80000000? It may further be
worthwhile to have another testcase checking that out of range values
(negative or too large) are properly rejected.
> @@ -624,6 +629,8 @@ enum
> d_swap_mode,
> /* quad word operand */
> q_mode,
> + /* 64-byte MM operand */
> + q_mm_mode,
Byte or rather bit?
> @@ -1240,6 +1252,7 @@ enum
> X86_64_VEX_0F38ED,
> X86_64_VEX_0F38EE,
> X86_64_VEX_0F38EF,
> + X86_64_VEX_MAP7_F8_L_0_W_0_R_0,
> };
As you can observe from e.g. the change you're making here, ...
> @@ -1259,7 +1272,8 @@ enum
> {
> VEX_0F = 0,
> VEX_0F38,
> - VEX_0F3A
> + VEX_0F3A,
> + VEX_MAP7
> };
... it is beneficial to have a trailing comma in enumeration which may
further be extended.
> @@ -8803,7 +8872,12 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
> ins->need_vex = 3;
> ins->codep++;
> vindex = *ins->codep++;
> - dp = &vex_table[vex_table_index][vindex];
> + if (vex_table_index == VEX_MAP7 && vindex == 0xf8)
> + {
> + dp = &map7_f8_opcode;
> + }
> + else
> + dp = &vex_table[vex_table_index][vindex];
In the VEX_MAP7 case this is an out of bounds access now, isn't it?
> @@ -9130,6 +9204,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
> .last_rex_prefix = -1,
> .last_seg_prefix = -1,
> .fwait_prefix = -1,
> + .has_skipped_modrm = 0,
> };
No need to add explicit initializers when the value is zero. Omitting
the line also would save me from demanding that you use "false", not
"0".
> @@ -10017,7 +10092,11 @@ dofloat (instr_info *ins, int sizeflag)
> }
> /* Skip mod/rm byte. */
> MODRM_CHECK;
> - ins->codep++;
> + if (!ins->has_skipped_modrm)
> + {
> + ins->codep++;
> + ins->has_skipped_modrm = true;
> + }
>
> dp = &float_reg[floatop - 0xd8][ins->modrm.reg];
> if (dp->name == NULL)
> @@ -11299,7 +11378,11 @@ OP_Skip_MODRM (instr_info *ins, int bytemode ATTRIBUTE_UNUSED,
>
> /* Skip mod/rm byte. */
> MODRM_CHECK;
> - ins->codep++;
> + if (!ins->has_skipped_modrm)
> + {
> + ins->codep++;
> + ins->has_skipped_modrm = true;
> + }
> return true;
> }
>
> @@ -11818,7 +11901,11 @@ OP_E (instr_info *ins, int bytemode, int sizeflag)
> {
> /* Skip mod/rm byte. */
> MODRM_CHECK;
> - ins->codep++;
> + if (!ins->has_skipped_modrm)
> + {
> + ins->codep++;
> + ins->has_skipped_modrm = true;
> + }
>
> if (ins->modrm.mod == 3)
> {
> @@ -12522,7 +12609,11 @@ OP_EM (instr_info *ins, int bytemode, int sizeflag)
>
> /* Skip mod/rm byte. */
> MODRM_CHECK;
> - ins->codep++;
> + if (!ins->has_skipped_modrm)
> + {
> + ins->codep++;
> + ins->has_skipped_modrm = true;
> + }
> ins->used_prefixes |= (ins->prefixes & PREFIX_DATA);
> reg = ins->modrm.rm;
> if (ins->prefixes & PREFIX_DATA)
> @@ -12558,7 +12649,11 @@ OP_EMC (instr_info *ins, int bytemode, int sizeflag)
>
> /* Skip mod/rm byte. */
> MODRM_CHECK;
> - ins->codep++;
> + if (!ins->has_skipped_modrm)
> + {
> + ins->codep++;
> + ins->has_skipped_modrm = true;
> + }
> ins->used_prefixes |= (ins->prefixes & PREFIX_DATA);
> oappend_register (ins, att_names_mm[ins->modrm.rm]);
> return true;
> @@ -12580,7 +12675,11 @@ OP_EX (instr_info *ins, int bytemode, int sizeflag)
>
> /* Skip mod/rm byte. */
> MODRM_CHECK;
> - ins->codep++;
> + if (!ins->has_skipped_modrm)
> + {
> + ins->codep++;
> + ins->has_skipped_modrm = true;
> + }
>
> if (bytemode == dq_mode)
> bytemode = ins->vex.w ? q_mode : d_mode;
Do you really need all of these adjustments? The only place I can see
it's needed is ...
> @@ -12623,9 +12722,10 @@ OP_R (instr_info *ins, int bytemode, int sizeflag)
> {
> case d_mode:
> case dq_mode:
> + case q_mode:
> case mask_mode:
> return OP_E (ins, bytemode, sizeflag);
... OP_E() for now. Otherwise, if you really want to do it uniformly,
I think you'd want to fold this into MODRM_CHECK, such that the same
code doesn't need repeating a whopping 9 times.
Jan
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Wednesday, October 25, 2023 7:43 PM
> To: Hu, Lin1 <lin1.hu@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; binutils@sourceware.org
> Subject: Re: [PATCH][v3] Support Intel USER_MSR
>
> On 25.10.2023 11:11, Hu, Lin1 wrote:
> > @@ -5235,6 +5237,17 @@ md_assemble (char *line)
> > if (i.imm_operands)
>
> This if() ...
>
> > optimize_imm ();
> >
> > + /* user_msr instructions can match Imm32 templates when
> > + guess_suffix == QWORD_MNEM_SUFFIX. */
> > + if (t->mnem_off == MN_urdmsr)
> > + i.types[0]
> > + = operand_type_or (i.types[0],
> > + smallest_imm_type (i.op[0].imms->X_add_number));
> > + if (t->mnem_off == MN_uwrmsr)
> > + i.types[1]
> > + = operand_type_or (i.types[1],
> > + smallest_imm_type (i.op[1].imms->X_add_number));
>
> ... should now enclose all of these additions as well. Both for performance
> reasons (insns without immediates can skip the extra
> conditionals) and to avoid latent issues (i.op[].imms is not valid to de-reference
> without first checking [or knowing by implication] that the respective operand
> actually is an immediate; considering we're ahead of template matching, that'll
> need some further adjustment here anyway).
>
> But then the question is - do you actually need to go through
> optimize_imm() for these two insns? Or, worse, is it perhaps even wrong to do
> so? It looks at least latently risky to me.
>
I am sorry for I didn't notice that current_template was a global variable, that part of the change has been removed by me, and the solution is now
@@ -6371,8 +6371,11 @@ optimize_imm (void)
smallest_imm_type (i.op[op].imms->X_add_number));
/* We must avoid matching of Imm32 templates when 64bit
- only immediate is available. */
- if (guess_suffix == QWORD_MNEM_SUFFIX)
+ only immediate is available. user_msr instructions can
+ match Imm32 templates when guess_suffix == QWORD_MNEM_SUFFIX.
+ */
+ if (guess_suffix == QWORD_MNEM_SUFFIX
+ && !is_cpu(current_templates->start, CpuUSER_MSR))
i.types[op].bitfield.imm32 = 0;
break;
Or you want USER_MSR not to go through optimize_imm(), I can extract the code that the USER_MSR should perform out of optimize_imm().
> > @@ -7566,6 +7579,18 @@ match_template (char mnem_suffix)
> > break;
> > }
> >
> > + /* This pattern aims to put the unusually placed imm operand to a usual
> > + place. The constraints are currently only adapted to uwrmsr, and may
> > + need further tweaking when new similar instructions become
> > + available. */ if (i.operands > 0
> > + && i.tm.operand_types[0].bitfield.class == Reg
>
> This part is needlessly strict. Altogether I'd suggest that you check that you have
> more than one operand, the last is an immediate (as you ...
>
> > + && operand_type_check (i.tm.operand_types[i.operands - 1],
> > + imm))
>
> ... do already), and the first is not.
>
> Generated code wise the checks would likely be cheaper when done against the
> local variable operand_types[].
>
OK, I have modified them.
>
> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-user_msr.s
> > @@ -0,0 +1,31 @@
> > +# Check 64bit USER_MSR instructions
> > +
> > + .allow_index_reg
>
> This doesn't look to have any meaning here.
>
> > + .text
> > +_start:
> > + urdmsr %r14, %r12
> > + urdmsr %r14, %rax
> > + urdmsr %rdx, %r12
> > + urdmsr %rdx, %rax
> > + urdmsr $51515151, %r12
> > + urdmsr $51515151, %rax
> > + uwrmsr %r12, %r14
> > + uwrmsr %rax, %r14
> > + uwrmsr %r12, %rdx
> > + uwrmsr %rax, %rdx
> > + uwrmsr %r12, $51515151
> > + uwrmsr %rax, $51515151
>
> Considering the special handling of immediates, may I ask that you check further
> values. E.g. 0x7f, 0x7fff, and 0x80000000? It may further be worthwhile to have
> another testcase checking that out of range values (negative or too large) are
> properly rejected.
>
Ok, I have added some tests.
>
> > @@ -624,6 +629,8 @@ enum
> > d_swap_mode,
> > /* quad word operand */
> > q_mode,
> > + /* 64-byte MM operand */
> > + q_mm_mode,
>
> Byte or rather bit?
Oh, It's a mistake.
>
> > @@ -1240,6 +1252,7 @@ enum
> > X86_64_VEX_0F38ED,
> > X86_64_VEX_0F38EE,
> > X86_64_VEX_0F38EF,
> > + X86_64_VEX_MAP7_F8_L_0_W_0_R_0,
> > };
>
> As you can observe from e.g. the change you're making here, ...
>
> > @@ -1259,7 +1272,8 @@ enum
> > {
> > VEX_0F = 0,
> > VEX_0F38,
> > - VEX_0F3A
> > + VEX_0F3A,
> > + VEX_MAP7
> > };
>
> ... it is beneficial to have a trailing comma in enumeration which may further be
> extended.
>
OK.
>
> > @@ -8803,7 +8872,12 @@ get_valid_dis386 (const struct dis386 *dp,
> instr_info *ins)
> > ins->need_vex = 3;
> > ins->codep++;
> > vindex = *ins->codep++;
> > - dp = &vex_table[vex_table_index][vindex];
> > + if (vex_table_index == VEX_MAP7 && vindex == 0xf8)
> > + {
> > + dp = &map7_f8_opcode;
> > + }
> > + else
> > + dp = &vex_table[vex_table_index][vindex];
>
> In the VEX_MAP7 case this is an out of bounds access now, isn't it?
>
Yes, you are right.
>
> > @@ -9130,6 +9204,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int
> intel_syntax)
> > .last_rex_prefix = -1,
> > .last_seg_prefix = -1,
> > .fwait_prefix = -1,
> > + .has_skipped_modrm = 0,
> > };
>
> No need to add explicit initializers when the value is zero. Omitting the line also
> would save me from demanding that you use "false", not "0".
>
OK, I have removed the line.
>
> > @@ -10017,7 +10092,11 @@ dofloat (instr_info *ins, int sizeflag)
> > }
> > /* Skip mod/rm byte. */
> > MODRM_CHECK;
> > - ins->codep++;
> > + if (!ins->has_skipped_modrm)
> > + {
> > + ins->codep++;
> > + ins->has_skipped_modrm = true;
> > + }
> >
> > dp = &float_reg[floatop - 0xd8][ins->modrm.reg];
> > if (dp->name == NULL)
> > @@ -11299,7 +11378,11 @@ OP_Skip_MODRM (instr_info *ins, int bytemode
> > ATTRIBUTE_UNUSED,
> >
> > /* Skip mod/rm byte. */
> > MODRM_CHECK;
> > - ins->codep++;
> > + if (!ins->has_skipped_modrm)
> > + {
> > + ins->codep++;
> > + ins->has_skipped_modrm = true;
> > + }
> > return true;
> > }
> >
> > @@ -11818,7 +11901,11 @@ OP_E (instr_info *ins, int bytemode, int
> > sizeflag) {
> > /* Skip mod/rm byte. */
> > MODRM_CHECK;
> > - ins->codep++;
> > + if (!ins->has_skipped_modrm)
> > + {
> > + ins->codep++;
> > + ins->has_skipped_modrm = true;
> > + }
> >
> > if (ins->modrm.mod == 3)
> > {
> > @@ -12522,7 +12609,11 @@ OP_EM (instr_info *ins, int bytemode, int
> > sizeflag)
> >
> > /* Skip mod/rm byte. */
> > MODRM_CHECK;
> > - ins->codep++;
> > + if (!ins->has_skipped_modrm)
> > + {
> > + ins->codep++;
> > + ins->has_skipped_modrm = true;
> > + }
> > ins->used_prefixes |= (ins->prefixes & PREFIX_DATA);
> > reg = ins->modrm.rm;
> > if (ins->prefixes & PREFIX_DATA)
> > @@ -12558,7 +12649,11 @@ OP_EMC (instr_info *ins, int bytemode, int
> > sizeflag)
> >
> > /* Skip mod/rm byte. */
> > MODRM_CHECK;
> > - ins->codep++;
> > + if (!ins->has_skipped_modrm)
> > + {
> > + ins->codep++;
> > + ins->has_skipped_modrm = true;
> > + }
> > ins->used_prefixes |= (ins->prefixes & PREFIX_DATA);
> > oappend_register (ins, att_names_mm[ins->modrm.rm]);
> > return true;
> > @@ -12580,7 +12675,11 @@ OP_EX (instr_info *ins, int bytemode, int
> > sizeflag)
> >
> > /* Skip mod/rm byte. */
> > MODRM_CHECK;
> > - ins->codep++;
> > + if (!ins->has_skipped_modrm)
> > + {
> > + ins->codep++;
> > + ins->has_skipped_modrm = true;
> > + }
> >
> > if (bytemode == dq_mode)
> > bytemode = ins->vex.w ? q_mode : d_mode;
>
> Do you really need all of these adjustments? The only place I can see it's needed
> is ...
>
> > @@ -12623,9 +12722,10 @@ OP_R (instr_info *ins, int bytemode, int sizeflag)
> > {
> > case d_mode:
> > case dq_mode:
> > + case q_mode:
> > case mask_mode:
> > return OP_E (ins, bytemode, sizeflag);
>
> ... OP_E() for now. Otherwise, if you really want to do it uniformly, I think you'd
> want to fold this into MODRM_CHECK, such that the same code doesn't need
> repeating a whopping 9 times.
>
Ok, I'm fine with both, in terms of saving time and lines of code, it's better to just add what's needed for now.
>
> Jan
BRs,
Lin
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel USER_MSR instructions.
+
* Add support for Intel AVX10.1.
* Add support for Intel PBNDKB instructions.
@@ -1164,6 +1164,7 @@ static const arch_entry cpu_arch[] =
VECARCH (sm4, SM4, ANY_SM4, reset),
SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
+ SUBARCH (user_msr, USER_MSR, USER_MSR, false),
};
#undef SUBARCH
@@ -3863,6 +3864,7 @@ build_vex_prefix (const insn_template *t)
case SPACE_0F:
case SPACE_0F38:
case SPACE_0F3A:
+ case SPACE_VEXMAP7:
i.vex.bytes[0] = 0xc4;
break;
case SPACE_XOP08:
@@ -5235,6 +5237,17 @@ md_assemble (char *line)
if (i.imm_operands)
optimize_imm ();
+ /* user_msr instructions can match Imm32 templates when
+ guess_suffix == QWORD_MNEM_SUFFIX. */
+ if (t->mnem_off == MN_urdmsr)
+ i.types[0]
+ = operand_type_or (i.types[0],
+ smallest_imm_type (i.op[0].imms->X_add_number));
+ if (t->mnem_off == MN_uwrmsr)
+ i.types[1]
+ = operand_type_or (i.types[1],
+ smallest_imm_type (i.op[1].imms->X_add_number));
+
if (i.disp_operands && !optimize_disp (t))
return;
@@ -7566,6 +7579,18 @@ match_template (char mnem_suffix)
break;
}
+ /* This pattern aims to put the unusually placed imm operand to a usual
+ place. The constraints are currently only adapted to uwrmsr, and may
+ need further tweaking when new similar instructions become available. */
+ if (i.operands > 0
+ && i.tm.operand_types[0].bitfield.class == Reg
+ && operand_type_check (i.tm.operand_types[i.operands - 1], imm))
+ {
+ i.tm.operand_types[0] = operand_types[i.operands - 1];
+ i.tm.operand_types[i.operands - 1] = operand_types[0];
+ swap_2_operands(0, i.operands - 1);
+ }
+
return t;
}
@@ -216,6 +216,7 @@ accept various extension mnemonics. For example,
@code{avx10.1/512},
@code{avx10.1/256},
@code{avx10.1/128},
+@code{user_msr},
@code{amx_int8},
@code{amx_bf16},
@code{amx_fp16},
@@ -1650,7 +1651,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.cmpccxadd} @tab @samp{.wrmsrns} @tab @samp{.msrlist}
@item @samp{.avx_ne_convert} @tab @samp{.rao_int} @tab @samp{.fred} @tab @samp{.lkgs}
@item @samp{.avx_vnni_int16} @tab @samp{.sha512} @tab @samp{.sm3} @tab @samp{.sm4}
-@item @samp{.pbndkb}
+@item @samp{.pbndkb} @tab @samp{.user_msr}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
@@ -509,6 +509,7 @@ if [gas_32_check] then {
run_dump_test "sm4"
run_dump_test "sm4-intel"
run_list_test "pbndkb-inval"
+ run_list_test "user_msr-inval"
run_list_test "sg"
run_dump_test "clzero"
run_dump_test "invlpgb"
new file mode 100644
@@ -0,0 +1,3 @@
+.* Assembler messages:
+.*:6: Error: `urdmsr' is only supported in 64-bit mode
+.*:7: Error: `uwrmsr' is only supported in 64-bit mode
new file mode 100644
@@ -0,0 +1,7 @@
+# Check Illegal 32bit USER_MSR instructions
+
+ .allow_index_reg
+ .text
+_start:
+ urdmsr %r12, %r14
+ uwrmsr %r12, %r14
new file mode 100644
@@ -0,0 +1,34 @@
+#as:
+#objdump: -dw -Mintel
+#name: x86_64 USER_MSR insns (Intel disassembly)
+#source: x86-64-user_msr.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*f2 45 0f 38 f8 f4\s+urdmsr r12,r14
+\s*[a-f0-9]+:\s*f2 44 0f 38 f8 f0\s+urdmsr rax,r14
+\s*[a-f0-9]+:\s*f2 41 0f 38 f8 d4\s+urdmsr r12,rdx
+\s*[a-f0-9]+:\s*f2 0f 38 f8 d0\s+urdmsr rax,rdx
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr r12,0x3120f0f
+\s*[a-f0-9]+:\s*c4 e7 7b f8 c0 0f 0f 12 03\s+urdmsr rax,0x3120f0f
+\s*[a-f0-9]+:\s*f3 45 0f 38 f8 f4\s+uwrmsr r14,r12
+\s*[a-f0-9]+:\s*f3 44 0f 38 f8 f0\s+uwrmsr r14,rax
+\s*[a-f0-9]+:\s*f3 41 0f 38 f8 d4\s+uwrmsr rdx,r12
+\s*[a-f0-9]+:\s*f3 0f 38 f8 d0\s+uwrmsr rdx,rax
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr 0x3120f0f,r12
+\s*[a-f0-9]+:\s*c4 e7 7a f8 c0 0f 0f 12 03\s+uwrmsr 0x3120f0f,rax
+\s*[a-f0-9]+:\s*f2 45 0f 38 f8 f4\s+urdmsr r12,r14
+\s*[a-f0-9]+:\s*f2 44 0f 38 f8 f0\s+urdmsr rax,r14
+\s*[a-f0-9]+:\s*f2 41 0f 38 f8 d4\s+urdmsr r12,rdx
+\s*[a-f0-9]+:\s*f2 0f 38 f8 d0\s+urdmsr rax,rdx
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr r12,0x3120f0f
+\s*[a-f0-9]+:\s*c4 e7 7b f8 c0 0f 0f 12 03\s+urdmsr rax,0x3120f0f
+\s*[a-f0-9]+:\s*f3 45 0f 38 f8 f4\s+uwrmsr r14,r12
+\s*[a-f0-9]+:\s*f3 44 0f 38 f8 f0\s+uwrmsr r14,rax
+\s*[a-f0-9]+:\s*f3 41 0f 38 f8 d4\s+uwrmsr rdx,r12
+\s*[a-f0-9]+:\s*f3 0f 38 f8 d0\s+uwrmsr rdx,rax
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr 0x3120f0f,r12
+\s*[a-f0-9]+:\s*c4 e7 7a f8 c0 0f 0f 12 03\s+uwrmsr 0x3120f0f,rax
new file mode 100644
@@ -0,0 +1,34 @@
+#as:
+#objdump: -dw
+#name: x86_64 USER_MSR insns
+#source: x86-64-user_msr.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*f2 45 0f 38 f8 f4\s+urdmsr %r14,%r12
+\s*[a-f0-9]+:\s*f2 44 0f 38 f8 f0\s+urdmsr %r14,%rax
+\s*[a-f0-9]+:\s*f2 41 0f 38 f8 d4\s+urdmsr %rdx,%r12
+\s*[a-f0-9]+:\s*f2 0f 38 f8 d0\s+urdmsr %rdx,%rax
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr \$0x3120f0f,%r12
+\s*[a-f0-9]+:\s*c4 e7 7b f8 c0 0f 0f 12 03\s+urdmsr \$0x3120f0f,%rax
+\s*[a-f0-9]+:\s*f3 45 0f 38 f8 f4\s+uwrmsr %r12,%r14
+\s*[a-f0-9]+:\s*f3 44 0f 38 f8 f0\s+uwrmsr %rax,%r14
+\s*[a-f0-9]+:\s*f3 41 0f 38 f8 d4\s+uwrmsr %r12,%rdx
+\s*[a-f0-9]+:\s*f3 0f 38 f8 d0\s+uwrmsr %rax,%rdx
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr %r12,\$0x3120f0f
+\s*[a-f0-9]+:\s*c4 e7 7a f8 c0 0f 0f 12 03\s+uwrmsr %rax,\$0x3120f0f
+\s*[a-f0-9]+:\s*f2 45 0f 38 f8 f4\s+urdmsr %r14,%r12
+\s*[a-f0-9]+:\s*f2 44 0f 38 f8 f0\s+urdmsr %r14,%rax
+\s*[a-f0-9]+:\s*f2 41 0f 38 f8 d4\s+urdmsr %rdx,%r12
+\s*[a-f0-9]+:\s*f2 0f 38 f8 d0\s+urdmsr %rdx,%rax
+\s*[a-f0-9]+:\s*c4 c7 7b f8 c4 0f 0f 12 03\s+urdmsr \$0x3120f0f,%r12
+\s*[a-f0-9]+:\s*c4 e7 7b f8 c0 0f 0f 12 03\s+urdmsr \$0x3120f0f,%rax
+\s*[a-f0-9]+:\s*f3 45 0f 38 f8 f4\s+uwrmsr %r12,%r14
+\s*[a-f0-9]+:\s*f3 44 0f 38 f8 f0\s+uwrmsr %rax,%r14
+\s*[a-f0-9]+:\s*f3 41 0f 38 f8 d4\s+uwrmsr %r12,%rdx
+\s*[a-f0-9]+:\s*f3 0f 38 f8 d0\s+uwrmsr %rax,%rdx
+\s*[a-f0-9]+:\s*c4 c7 7a f8 c4 0f 0f 12 03\s+uwrmsr %r12,\$0x3120f0f
+\s*[a-f0-9]+:\s*c4 e7 7a f8 c0 0f 0f 12 03\s+uwrmsr %rax,\$0x3120f0f
new file mode 100644
@@ -0,0 +1,31 @@
+# Check 64bit USER_MSR instructions
+
+ .allow_index_reg
+ .text
+_start:
+ urdmsr %r14, %r12
+ urdmsr %r14, %rax
+ urdmsr %rdx, %r12
+ urdmsr %rdx, %rax
+ urdmsr $51515151, %r12
+ urdmsr $51515151, %rax
+ uwrmsr %r12, %r14
+ uwrmsr %rax, %r14
+ uwrmsr %r12, %rdx
+ uwrmsr %rax, %rdx
+ uwrmsr %r12, $51515151
+ uwrmsr %rax, $51515151
+
+ .intel_syntax noprefix
+ urdmsr r12, r14
+ urdmsr rax, r14
+ urdmsr r12, rdx
+ urdmsr rax, rdx
+ urdmsr r12, 51515151
+ urdmsr rax, 51515151
+ uwrmsr r14, r12
+ uwrmsr r14, rax
+ uwrmsr rdx, r12
+ uwrmsr rdx, rax
+ uwrmsr 51515151, r12
+ uwrmsr 51515151, rax
@@ -450,6 +450,8 @@ run_dump_test "x86-64-sm4"
run_dump_test "x86-64-sm4-intel"
run_dump_test "x86-64-pbndkb"
run_dump_test "x86-64-pbndkb-intel"
+run_dump_test "x86-64-user_msr"
+run_dump_test "x86-64-user_msr-intel"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
@@ -221,6 +221,9 @@ struct instr_info
/* Record whether EVEX masking is used incorrectly. */
bool illegal_masking;
+ /* Record whether the modrm byte has been skipped. */
+ bool has_skipped_modrm;
+
unsigned char op_ad;
signed char op_index[MAX_OPERANDS];
bool op_riprel[MAX_OPERANDS];
@@ -418,6 +421,7 @@ fetch_error (const instr_info *ins)
#define Gv { OP_G, v_mode }
#define Gd { OP_G, d_mode }
#define Gdq { OP_G, dq_mode }
+#define Gq { OP_G, q_mode }
#define Gm { OP_G, m_mode }
#define Gva { OP_G, va_mode }
#define Gw { OP_G, w_mode }
@@ -527,7 +531,8 @@ fetch_error (const instr_info *ins)
#define EXEvexXNoBcst { OP_EX, evex_x_nobcst_mode }
#define Rd { OP_R, d_mode }
#define Rdq { OP_R, dq_mode }
-#define Nq { OP_R, q_mode }
+#define Rq { OP_R, q_mode }
+#define Nq { OP_R, q_mm_mode }
#define Ux { OP_R, x_mode }
#define Uxmm { OP_R, xmm_mode }
#define Rxmmq { OP_R, xmmq_mode }
@@ -624,6 +629,8 @@ enum
d_swap_mode,
/* quad word operand */
q_mode,
+ /* 64-byte MM operand */
+ q_mm_mode,
/* quad word operand with operand swapped */
q_swap_mode,
/* ten-byte operand */
@@ -845,6 +852,7 @@ enum
REG_VEX_0FAE,
REG_VEX_0F3849_X86_64_L_0_W_0_M_1_P_0,
REG_VEX_0F38F3_L_0,
+ REG_VEX_MAP7_F8_L_0_W_0,
REG_XOP_09_01_L_0,
REG_XOP_09_02_L_0,
@@ -893,6 +901,7 @@ enum
MOD_0FC7_REG_6,
MOD_0FC7_REG_7,
MOD_0F38DC_PREFIX_1,
+ MOD_0F38F8,
MOD_VEX_0F3849_X86_64_L_0_W_0,
};
@@ -1010,7 +1019,8 @@ enum
PREFIX_0F38F0,
PREFIX_0F38F1,
PREFIX_0F38F6,
- PREFIX_0F38F8,
+ PREFIX_0F38F8_M_0,
+ PREFIX_0F38F8_M_1_X86_64,
PREFIX_0F38FA,
PREFIX_0F38FB,
PREFIX_0F38FC,
@@ -1073,6 +1083,7 @@ enum
PREFIX_VEX_0F38F6_L_0,
PREFIX_VEX_0F38F7_L_0,
PREFIX_VEX_0F3AF0_L_0,
+ PREFIX_VEX_MAP7_F8_L_0_W_0_R_0_X86_64,
PREFIX_EVEX_0F5B,
PREFIX_EVEX_0F6F,
@@ -1217,6 +1228,7 @@ enum
X86_64_0F18_REG_7_MOD_0,
X86_64_0F24,
X86_64_0F26,
+ X86_64_0F38F8_M_1,
X86_64_0FC7_REG_6_MOD_3_PREFIX_1,
X86_64_VEX_0F3849,
@@ -1240,6 +1252,7 @@ enum
X86_64_VEX_0F38ED,
X86_64_VEX_0F38EE,
X86_64_VEX_0F38EF,
+ X86_64_VEX_MAP7_F8_L_0_W_0_R_0,
};
enum
@@ -1259,7 +1272,8 @@ enum
{
VEX_0F = 0,
VEX_0F38,
- VEX_0F3A
+ VEX_0F3A,
+ VEX_MAP7
};
enum
@@ -1350,6 +1364,7 @@ enum
VEX_LEN_0F3ADE_W_0,
VEX_LEN_0F3ADF,
VEX_LEN_0F3AF0,
+ VEX_LEN_MAP7_F8,
VEX_LEN_XOP_08_85,
VEX_LEN_XOP_08_86,
VEX_LEN_XOP_08_87,
@@ -1510,6 +1525,7 @@ enum
VEX_W_0F3ACE,
VEX_W_0F3ACF,
VEX_W_0F3ADE,
+ VEX_W_MAP7_F8_L_0,
VEX_W_XOP_08_85_L_0,
VEX_W_XOP_08_86_L_0,
@@ -2849,6 +2865,10 @@ static const struct dis386 reg_table[][8] = {
{ "blsmskS", { VexGdq, Edq }, PREFIX_OPCODE },
{ "blsiS", { VexGdq, Edq }, PREFIX_OPCODE },
},
+ /* REG_VEX_MAP7_F8_L_0_W_0 */
+ {
+ { X86_64_TABLE (X86_64_VEX_MAP7_F8_L_0_W_0_R_0) },
+ },
/* REG_XOP_09_01_L_0 */
{
{ Bad_Opcode },
@@ -3555,13 +3575,22 @@ static const struct dis386 prefix_table[][4] = {
{ Bad_Opcode },
},
- /* PREFIX_0F38F8 */
+ /* PREFIX_0F38F8_M_0 */
{
{ Bad_Opcode },
{ "enqcmds", { Gva, M }, 0 },
{ "movdir64b", { Gva, M }, 0 },
{ "enqcmd", { Gva, M }, 0 },
},
+
+ /* PREFIX_0F38F8_M_1_X86_64 */
+ {
+ { Bad_Opcode },
+ { "uwrmsr", { Gq, Rq }, 0 },
+ { Bad_Opcode },
+ { "urdmsr", { Rq, Gq }, 0 },
+ },
+
/* PREFIX_0F38FA */
{
{ Bad_Opcode },
@@ -4014,6 +4043,14 @@ static const struct dis386 prefix_table[][4] = {
{ "rorxS", { Gdq, Edq, Ib }, 0 },
},
+ /* PREFIX_VEX_MAP7_F8_L_0_W_0_R_0_X86_64 */
+ {
+ { Bad_Opcode },
+ { "uwrmsr", { Skip_MODRM, Id, Rq }, 0 },
+ { Bad_Opcode },
+ { "urdmsr", { Rq, Id }, 0 },
+ },
+
#include "i386-dis-evex-prefix.h"
};
@@ -4322,6 +4359,12 @@ static const struct dis386 x86_64_table[][2] = {
{ "movZ", { Td, Em }, 0 },
},
+ {
+ /* X86_64_0F38F8_M_1 */
+ { Bad_Opcode },
+ { PREFIX_TABLE (PREFIX_0F38F8_M_1_X86_64) },
+ },
+
/* X86_64_0FC7_REG_6_MOD_3_PREFIX_1 */
{
{ Bad_Opcode },
@@ -4453,6 +4496,13 @@ static const struct dis386 x86_64_table[][2] = {
{ Bad_Opcode },
{ "cmpnlexadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
},
+
+ /* X86_64_VEX_MAP7_F8_L_0_W_0_R_0 */
+ {
+ { Bad_Opcode },
+ { PREFIX_TABLE (PREFIX_VEX_MAP7_F8_L_0_W_0_R_0_X86_64) },
+ },
+
};
static const struct dis386 three_byte_table[][256] = {
@@ -4739,7 +4789,7 @@ static const struct dis386 three_byte_table[][256] = {
{ PREFIX_TABLE (PREFIX_0F38F6) },
{ Bad_Opcode },
/* f8 */
- { PREFIX_TABLE (PREFIX_0F38F8) },
+ { MOD_TABLE (MOD_0F38F8) },
{ "movdiri", { Mdq, Gdq }, PREFIX_OPCODE },
{ PREFIX_TABLE (PREFIX_0F38FA) },
{ PREFIX_TABLE (PREFIX_0F38FB) },
@@ -7205,6 +7255,11 @@ static const struct dis386 vex_len_table[][2] = {
{ PREFIX_TABLE (PREFIX_VEX_0F3AF0_L_0) },
},
+ /* VEX_LEN_MAP7_F8 */
+ {
+ { VEX_W_TABLE (VEX_W_MAP7_F8_L_0) },
+ },
+
/* VEX_LEN_XOP_08_85 */
{
{ VEX_W_TABLE (VEX_W_XOP_08_85_L_0) },
@@ -7811,6 +7866,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F3ADE */
{ VEX_LEN_TABLE (VEX_LEN_0F3ADE_W_0) },
},
+ {
+ /* VEX_W_MAP7_F8_L_0 */
+ { REG_TABLE (REG_VEX_MAP7_F8_L_0_W_0) },
+ },
/* VEX_W_XOP_08_85_L_0 */
{
{ "vpmacssww", { XM, Vex, EXx, XMVexI4 }, 0 },
@@ -8153,6 +8212,11 @@ static const struct dis386 mod_table[][2] = {
{ "aesenc128kl", { XM, M }, 0 },
{ "loadiwkey", { XM, EXx }, 0 },
},
+ /* MOD_0F38F8 */
+ {
+ { PREFIX_TABLE (PREFIX_0F38F8_M_0) },
+ { X86_64_TABLE (X86_64_0F38F8_M_1) },
+ },
{
/* MOD_VEX_0F3849_X86_64_L_0_W_0 */
{ PREFIX_TABLE (PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0) },
@@ -8527,6 +8591,8 @@ static const struct dis386 bad_opcode = { "(bad)", { XX }, 0 };
/* Fetch error indicator. */
static const struct dis386 err_opcode = { NULL, { XX }, 0 };
+static const struct dis386 map7_f8_opcode = { VEX_LEN_TABLE (VEX_LEN_MAP7_F8) };
+
/* Get a pointer to struct dis386 with a valid name. */
static const struct dis386 *
@@ -8769,6 +8835,9 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
case 0x3:
vex_table_index = VEX_0F3A;
break;
+ case 0x7:
+ vex_table_index = VEX_MAP7;
+ break;
}
ins->codep++;
ins->vex.w = *ins->codep & 0x80;
@@ -8803,7 +8872,12 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
ins->need_vex = 3;
ins->codep++;
vindex = *ins->codep++;
- dp = &vex_table[vex_table_index][vindex];
+ if (vex_table_index == VEX_MAP7 && vindex == 0xf8)
+ {
+ dp = &map7_f8_opcode;
+ }
+ else
+ dp = &vex_table[vex_table_index][vindex];
ins->end_codep = ins->codep;
/* There is no MODRM byte for VEX0F 77. */
if ((vex_table_index != VEX_0F || vindex != 0x77)
@@ -9130,6 +9204,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
.last_rex_prefix = -1,
.last_seg_prefix = -1,
.fwait_prefix = -1,
+ .has_skipped_modrm = 0,
};
char op_out[MAX_OPERANDS][MAX_OPERAND_BUFFER_SIZE];
@@ -10017,7 +10092,11 @@ dofloat (instr_info *ins, int sizeflag)
}
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
dp = &float_reg[floatop - 0xd8][ins->modrm.reg];
if (dp->name == NULL)
@@ -11299,7 +11378,11 @@ OP_Skip_MODRM (instr_info *ins, int bytemode ATTRIBUTE_UNUSED,
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
return true;
}
@@ -11818,7 +11901,11 @@ OP_E (instr_info *ins, int bytemode, int sizeflag)
{
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
if (ins->modrm.mod == 3)
{
@@ -12522,7 +12609,11 @@ OP_EM (instr_info *ins, int bytemode, int sizeflag)
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
ins->used_prefixes |= (ins->prefixes & PREFIX_DATA);
reg = ins->modrm.rm;
if (ins->prefixes & PREFIX_DATA)
@@ -12558,7 +12649,11 @@ OP_EMC (instr_info *ins, int bytemode, int sizeflag)
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
ins->used_prefixes |= (ins->prefixes & PREFIX_DATA);
oappend_register (ins, att_names_mm[ins->modrm.rm]);
return true;
@@ -12580,7 +12675,11 @@ OP_EX (instr_info *ins, int bytemode, int sizeflag)
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
if (bytemode == dq_mode)
bytemode = ins->vex.w ? q_mode : d_mode;
@@ -12623,9 +12722,10 @@ OP_R (instr_info *ins, int bytemode, int sizeflag)
{
case d_mode:
case dq_mode:
+ case q_mode:
case mask_mode:
return OP_E (ins, bytemode, sizeflag);
- case q_mode:
+ case q_mm_mode:
return OP_EM (ins, x_mode, sizeflag);
case xmm_mode:
if (ins->vex.length <= 128)
@@ -12641,7 +12741,11 @@ OP_M (instr_info *ins, int bytemode, int sizeflag)
{
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
if (ins->modrm.mod == 3)
/* bad bound,lea,lds,les,lfs,lgs,lss,cmpxchg8b,vmptrst modrm */
@@ -12866,7 +12970,11 @@ OP_Mwait (instr_info *ins, int bytemode, int sizeflag ATTRIBUTE_UNUSED)
}
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
return true;
}
@@ -12897,7 +13005,11 @@ OP_Monitor (instr_info *ins, int bytemode ATTRIBUTE_UNUSED,
}
/* Skip mod/rm byte. */
MODRM_CHECK;
- ins->codep++;
+ if (!ins->has_skipped_modrm)
+ {
+ ins->codep++;
+ ins->has_skipped_modrm = true;
+ }
return true;
}
@@ -380,6 +380,7 @@ static bitfield cpu_flags[] =
BITFIELD (RAO_INT),
BITFIELD (FRED),
BITFIELD (LKGS),
+ BITFIELD (USER_MSR),
BITFIELD (MWAITX),
BITFIELD (CLZERO),
BITFIELD (OSPKE),
@@ -1023,6 +1024,7 @@ process_i386_opcode_modifier (FILE *table, char *mod, unsigned int space,
SPACE(0F3A),
SPACE(EVEXMAP5),
SPACE(EVEXMAP6),
+ SPACE(VEXMAP7),
SPACE(XOP08),
SPACE(XOP09),
SPACE(XOP0A),
@@ -223,6 +223,8 @@ enum i386_cpu
CpuFRED,
/* lkgs instruction required */
CpuLKGS,
+ /* Intel USER_MSR Instruction support required. */
+ CpuUSER_MSR,
/* mwaitx instruction required */
CpuMWAITX,
/* Clzero instruction required */
@@ -471,6 +473,7 @@ typedef union i386_cpu_flags
unsigned int cpurao_int:1;
unsigned int cpufred:1;
unsigned int cpulkgs:1;
+ unsigned int cpuuser_msr:1;
unsigned int cpumwaitx:1;
unsigned int cpuclzero:1;
unsigned int cpuospke:1;
@@ -966,6 +969,7 @@ typedef struct insn_template
3: 0F3A opcode prefix / space.
5: EVEXMAP5 opcode prefix / space.
6: EVEXMAP6 opcode prefix / space.
+ 7: VEXMAP7 opcode prefix / space.
8: XOP 08 opcode space.
9: XOP 09 opcode space.
A: XOP 0A opcode space.
@@ -976,6 +980,7 @@ typedef struct insn_template
#define SPACE_0F3A 3
#define SPACE_EVEXMAP5 5
#define SPACE_EVEXMAP6 6
+#define SPACE_VEXMAP7 7
#define SPACE_XOP08 8
#define SPACE_XOP09 9
#define SPACE_XOP0A 0xA
@@ -112,6 +112,8 @@
#define EVexMap5 OpcodeSpace=SPACE_EVEXMAP5
#define EVexMap6 OpcodeSpace=SPACE_EVEXMAP6
+#define VexMap7 OpcodeSpace=SPACE_VEXMAP7
+
#define VexW0 VexW=VEXW0
#define VexW1 VexW=VEXW1
#define VexWIG VexW=VEXWIG
@@ -3346,3 +3348,12 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}
eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
// FRED instructions end.
+
+// USER_MSR instructions.
+
+urdmsr, 0xf20f38f8, USER_MSR|x64, RegMem|NoSuf|NoRex64, { Reg64, Reg64 }
+urdmsr, 0xf2f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Imm32, Reg64 }
+uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|NoSuf|NoRex64, { Reg64, Reg64 }
+uwrmsr, 0xf3f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Reg64, Imm32 }
+
+// USER_MSR instructions end.