[v3,5/7] ix86: don't recognize/derive Q suffix in the common case

Message ID 540678fc-8bff-ec68-c97c-478d2631998c@suse.com
State New, archived
Headers
Series x86: suffix handling changes |

Commit Message

Jan Beulich Oct. 5, 2022, 7:24 a.m. UTC
  Have its use, except where actually legitimate, result in the same "only
supported in 64-bit mode" diagnostic as emitted for other 64-bit only
insns. Also suppress deriving of the suffix in Intel mode except in the
legitimate cases. This in exchange allows dropping the respective code
from match_template().

Oddly enough despite gcc's preference towards FILDQ and FIST{,T}Q we
had no testcase whatsoever for these. Therefore such tests are being
added. Note that the removed line in the x86-64-lfence-load testcase
was redundant with the exact same one a few lines up.
---
With gcc's preference towards FILDQ / FIST{,T}Q I wonder whether the
disassembler wouldn't better emit a Q suffix instead of the LL one.
---
v3: Re-base over changes to earlier patches.
  

Comments

H.J. Lu Oct. 11, 2022, 5:49 p.m. UTC | #1
On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> Have its use, except where actually legitimate, result in the same "only
> supported in 64-bit mode" diagnostic as emitted for other 64-bit only
> insns. Also suppress deriving of the suffix in Intel mode except in the
> legitimate cases. This in exchange allows dropping the respective code
> from match_template().
>
> Oddly enough despite gcc's preference towards FILDQ and FIST{,T}Q we
> had no testcase whatsoever for these. Therefore such tests are being
> added. Note that the removed line in the x86-64-lfence-load testcase
> was redundant with the exact same one a few lines up.
> ---
> With gcc's preference towards FILDQ / FIST{,T}Q I wonder whether the
> disassembler wouldn't better emit a Q suffix instead of the LL one.

Since glibc uses fildll, I don't think the change is needed.

> ---
> v3: Re-base over changes to earlier patches.
>
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -4826,7 +4826,7 @@ void
>  md_assemble (char *line)
>  {
>    unsigned int j;
> -  char mnemonic[MAX_MNEM_SIZE], mnem_suffix, *copy = NULL;
> +  char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
>    const char *end, *pass1_mnem = NULL;
>    enum i386_error pass1_err = 0;
>    const insn_template *t;
> @@ -4858,6 +4858,7 @@ md_assemble (char *line)
>             goto no_match;
>           /* No point in trying a 2nd pass - it'll only find the same suffix
>              again.  */
> +         mnem_suffix = i.suffix;
>           goto match_error;
>         }
>        return;
> @@ -5013,9 +5014,15 @@ md_assemble (char *line)
>                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
>           return;
>         case unsupported_64bit:
> -         as_bad (_("`%s' is %s supported in 64-bit mode"),
> -                 pass1_mnem ? pass1_mnem : current_templates->start->name,
> -                 flag_code == CODE_64BIT ? _("not") : _("only"));
> +         if (ISLOWER (mnem_suffix))
> +           as_bad (_("`%s%c' is %s supported in 64-bit mode"),
> +                   pass1_mnem ? pass1_mnem : current_templates->start->name,
> +                   mnem_suffix,
> +                   flag_code == CODE_64BIT ? _("not") : _("only"));
> +         else
> +           as_bad (_("`%s' is %s supported in 64-bit mode"),
> +                   pass1_mnem ? pass1_mnem : current_templates->start->name,
> +                   flag_code == CODE_64BIT ? _("not") : _("only"));
>           return;
>         case invalid_sib_address:
>           err_msg = _("invalid SIB address");
> @@ -5358,6 +5365,23 @@ md_assemble (char *line)
>      last_insn.kind = last_insn_other;
>  }
>
> +/* The Q suffix is generally valid only in 64-bit mode, with very few
> +   exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
> +   and fisttp only one of their two templates is matched below: That's
> +   sufficient since other relevant attributes are the same between both
> +   respective templates.  */
> +static INLINE bool q_suffix_allowed(const insn_template *t)
> +{
> +  return flag_code == CODE_64BIT
> +        || (t->opcode_modifier.opcodespace == SPACE_BASE
> +            && t->base_opcode == 0xdf
> +            && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
> +        || (t->opcode_modifier.opcodespace == SPACE_0F
> +            && t->base_opcode == 0xc7
> +            && t->opcode_modifier.opcodeprefix == PREFIX_NONE
> +            && t->extension_opcode == 1) /* cmpxchg8b */;
> +}
> +
>  static const char *
>  parse_insn (const char *line, char *mnemonic)
>  {
> @@ -5628,6 +5652,10 @@ parse_insn (const char *line, char *mnem
>    for (t = current_templates->start; t < current_templates->end; ++t)
>      {
>        supported |= cpu_flags_match (t);
> +
> +      if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
> +       supported &= ~CPU_FLAGS_64BIT_MATCH;
> +
>        if (supported == CPU_FLAGS_PERFECT_MATCH)
>         return l;
>      }
> @@ -6663,20 +6691,12 @@ match_template (char mnem_suffix)
>        for (j = 0; j < MAX_OPERANDS; j++)
>         operand_types[j] = t->operand_types[j];
>
> -      /* In general, don't allow
> -        - 64-bit operands outside of 64-bit mode,
> -        - 32-bit operands on pre-386.  */
> +      /* In general, don't allow 32-bit operands on pre-386.  */
>        specific_error = progress (mnem_suffix ? invalid_instruction_suffix
>                                              : operand_size_mismatch);
>        j = i.imm_operands + (t->operands > i.imm_operands + 1);
> -      if (((i.suffix == QWORD_MNEM_SUFFIX
> -           && flag_code != CODE_64BIT
> -           && !(t->opcode_modifier.opcodespace == SPACE_0F
> -                && t->base_opcode == 0xc7
> -                && t->opcode_modifier.opcodeprefix == PREFIX_NONE
> -                && t->extension_opcode == 1) /* cmpxchg8b */)
> -          || (i.suffix == LONG_MNEM_SUFFIX
> -              && !cpu_arch_flags.bitfield.cpui386))
> +      if (i.suffix == LONG_MNEM_SUFFIX
> +         && !cpu_arch_flags.bitfield.cpui386
>           && (intel_syntax
>               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
>                  && !intel_float_operand (t->name))
> --- a/gas/config/tc-i386-intel.c
> +++ b/gas/config/tc-i386-intel.c
> @@ -824,7 +824,7 @@ i386_intel_operand (char *operand_string
>                     continue;
>                   break;
>                 case QWORD_MNEM_SUFFIX:
> -                 if (t->opcode_modifier.no_qsuf)
> +                 if (t->opcode_modifier.no_qsuf || !q_suffix_allowed (t))
>                     continue;
>                   break;
>                 case SHORT_MNEM_SUFFIX:
> --- a/gas/testsuite/gas/i386/opcode.d
> +++ b/gas/testsuite/gas/i386/opcode.d
> @@ -592,6 +592,10 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    0f 4b 90 90 90 90 90    cmovnp -0x6f6f6f70\(%eax\),%edx
>  [      ]*[a-f0-9]+:    66 0f 4a 90 90 90 90 90         cmovp  -0x6f6f6f70\(%eax\),%dx
>  [      ]*[a-f0-9]+:    66 0f 4b 90 90 90 90 90         cmovnp -0x6f6f6f70\(%eax\),%dx
> +[      ]*[a-f0-9]+:    df 28                   fildll \(%eax\)
> +[      ]*[a-f0-9]+:    df 28                   fildll \(%eax\)
> +[      ]*[a-f0-9]+:    df 38                   fistpll \(%eax\)
> +[      ]*[a-f0-9]+:    df 38                   fistpll \(%eax\)
>   +[a-f0-9]+:   82 c3 01                add    \$0x1,%bl
>   +[a-f0-9]+:   82 f3 01                xor    \$0x1,%bl
>   +[a-f0-9]+:   82 d3 01                adc    \$0x1,%bl
> --- a/gas/testsuite/gas/i386/opcode.s
> +++ b/gas/testsuite/gas/i386/opcode.s
> @@ -592,6 +592,11 @@ foo:
>   cmovpe  0x90909090(%eax),%dx
>   cmovpo 0x90909090(%eax),%dx
>
> + fildq  (%eax)
> + fildll (%eax)
> + fistpq (%eax)
> + fistpll (%eax)
> +
>         .byte 0x82, 0xc3, 0x01
>         .byte 0x82, 0xf3, 0x01
>         .byte 0x82, 0xd3, 0x01
> --- a/gas/testsuite/gas/i386/opcode-intel.d
> +++ b/gas/testsuite/gas/i386/opcode-intel.d
> @@ -593,6 +593,10 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    0f 4b 90 90 90 90 90    cmovnp edx,DWORD PTR \[eax-0x6f6f6f70\]
>  [      ]*[a-f0-9]+:    66 0f 4a 90 90 90 90 90         cmovp  dx,WORD PTR \[eax-0x6f6f6f70\]
>  [      ]*[a-f0-9]+:    66 0f 4b 90 90 90 90 90         cmovnp dx,WORD PTR \[eax-0x6f6f6f70\]
> +[      ]*[a-f0-9]+:    df 28                   fild   QWORD PTR \[eax\]
> +[      ]*[a-f0-9]+:    df 28                   fild   QWORD PTR \[eax\]
> +[      ]*[a-f0-9]+:    df 38                   fistp  QWORD PTR \[eax\]
> +[      ]*[a-f0-9]+:    df 38                   fistp  QWORD PTR \[eax\]
>   +[a-f0-9]+:   82 c3 01                add    bl,0x1
>   +[a-f0-9]+:   82 f3 01                xor    bl,0x1
>   +[a-f0-9]+:   82 d3 01                adc    bl,0x1
> --- a/gas/testsuite/gas/i386/opcode-suffix.d
> +++ b/gas/testsuite/gas/i386/opcode-suffix.d
> @@ -593,6 +593,10 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    0f 4b 90 90 90 90 90    cmovnpl -0x6f6f6f70\(%eax\),%edx
>  [      ]*[a-f0-9]+:    66 0f 4a 90 90 90 90 90         cmovpw -0x6f6f6f70\(%eax\),%dx
>  [      ]*[a-f0-9]+:    66 0f 4b 90 90 90 90 90         cmovnpw -0x6f6f6f70\(%eax\),%dx
> +[      ]*[a-f0-9]+:    df 28                   fildll \(%eax\)
> +[      ]*[a-f0-9]+:    df 28                   fildll \(%eax\)
> +[      ]*[a-f0-9]+:    df 38                   fistpll \(%eax\)
> +[      ]*[a-f0-9]+:    df 38                   fistpll \(%eax\)
>   +[a-f0-9]+:   82 c3 01                addb   \$0x1,%bl
>   +[a-f0-9]+:   82 f3 01                xorb   \$0x1,%bl
>   +[a-f0-9]+:   82 d3 01                adcb   \$0x1,%bl
> --- a/gas/testsuite/gas/i386/sse3.d
> +++ b/gas/testsuite/gas/i386/sse3.d
> @@ -13,29 +13,30 @@ Disassembly of section .text:
>    10:  df 88 90 90 90 90 [     ]*fisttps -0x6f6f6f70\(%eax\)
>    16:  db 88 90 90 90 90 [     ]*fisttpl -0x6f6f6f70\(%eax\)
>    1c:  dd 88 90 90 90 90 [     ]*fisttpll -0x6f6f6f70\(%eax\)
> -  22:  66 0f 7c 65 00 [        ]*haddpd 0x0\(%ebp\),%xmm4
> -  27:  66 0f 7c ee [   ]*haddpd %xmm6,%xmm5
> -  2b:  f2 0f 7c 37 [   ]*haddps \(%edi\),%xmm6
> -  2f:  f2 0f 7c f8 [   ]*haddps %xmm0,%xmm7
> -  33:  66 0f 7d c1 [   ]*hsubpd %xmm1,%xmm0
> -  37:  66 0f 7d 0a [   ]*hsubpd \(%edx\),%xmm1
> -  3b:  f2 0f 7d d2 [   ]*hsubps %xmm2,%xmm2
> -  3f:  f2 0f 7d 1c 24 [        ]*hsubps \(%esp\),%xmm3
> -  44:  f2 0f f0 2e [   ]*lddqu  \(%esi\),%xmm5
> -  48:  0f 01 c8 [      ]*monitor %eax,%ecx,%edx
> -  4b:  0f 01 c8 [      ]*monitor %eax,%ecx,%edx
> -  4e:  f2 0f 12 f7 [   ]*movddup %xmm7,%xmm6
> -  52:  f2 0f 12 38 [   ]*movddup \(%eax\),%xmm7
> -  56:  f3 0f 16 01 [   ]*movshdup \(%ecx\),%xmm0
> -  5a:  f3 0f 16 ca [   ]*movshdup %xmm2,%xmm1
> -  5e:  f3 0f 12 13 [   ]*movsldup \(%ebx\),%xmm2
> -  62:  f3 0f 12 dc [   ]*movsldup %xmm4,%xmm3
> -  66:  0f 01 c9 [      ]*mwait  %eax,%ecx
> -  69:  0f 01 c9 [      ]*mwait  %eax,%ecx
> -  6c:  67 0f 01 c8 [   ]*monitor %ax,%ecx,%edx
> -  70:  67 0f 01 c8 [   ]*monitor %ax,%ecx,%edx
> -  74:  f2 0f 12 38 [   ]*movddup \(%eax\),%xmm7
> -  78:  f2 0f 12 38 [   ]*movddup \(%eax\),%xmm7
> +[      ]*[0-9a-f]+:    dd 88 90 90 90 90 [     ]*fisttpll -0x6f6f6f70\(%eax\)
> +[      ]*[0-9a-f]+:    66 0f 7c 65 00 [        ]*haddpd 0x0\(%ebp\),%xmm4
> +[      ]*[0-9a-f]+:    66 0f 7c ee [   ]*haddpd %xmm6,%xmm5
> +[      ]*[0-9a-f]+:    f2 0f 7c 37 [   ]*haddps \(%edi\),%xmm6
> +[      ]*[0-9a-f]+:    f2 0f 7c f8 [   ]*haddps %xmm0,%xmm7
> +[      ]*[0-9a-f]+:    66 0f 7d c1 [   ]*hsubpd %xmm1,%xmm0
> +[      ]*[0-9a-f]+:    66 0f 7d 0a [   ]*hsubpd \(%edx\),%xmm1
> +[      ]*[0-9a-f]+:    f2 0f 7d d2 [   ]*hsubps %xmm2,%xmm2
> +[      ]*[0-9a-f]+:    f2 0f 7d 1c 24 [        ]*hsubps \(%esp\),%xmm3
> +[      ]*[0-9a-f]+:    f2 0f f0 2e [   ]*lddqu  \(%esi\),%xmm5
> +[      ]*[0-9a-f]+:    0f 01 c8 [      ]*monitor %eax,%ecx,%edx
> +[      ]*[0-9a-f]+:    0f 01 c8 [      ]*monitor %eax,%ecx,%edx
> +[      ]*[0-9a-f]+:    f2 0f 12 f7 [   ]*movddup %xmm7,%xmm6
> +[      ]*[0-9a-f]+:    f2 0f 12 38 [   ]*movddup \(%eax\),%xmm7
> +[      ]*[0-9a-f]+:    f3 0f 16 01 [   ]*movshdup \(%ecx\),%xmm0
> +[      ]*[0-9a-f]+:    f3 0f 16 ca [   ]*movshdup %xmm2,%xmm1
> +[      ]*[0-9a-f]+:    f3 0f 12 13 [   ]*movsldup \(%ebx\),%xmm2
> +[      ]*[0-9a-f]+:    f3 0f 12 dc [   ]*movsldup %xmm4,%xmm3
> +[      ]*[0-9a-f]+:    0f 01 c9 [      ]*mwait  %eax,%ecx
> +[      ]*[0-9a-f]+:    0f 01 c9 [      ]*mwait  %eax,%ecx
> +[      ]*[0-9a-f]+:    67 0f 01 c8 [   ]*monitor %ax,%ecx,%edx
> +[      ]*[0-9a-f]+:    67 0f 01 c8 [   ]*monitor %ax,%ecx,%edx
> +[      ]*[0-9a-f]+:    f2 0f 12 38 [   ]*movddup \(%eax\),%xmm7
> +[      ]*[0-9a-f]+:    f2 0f 12 38 [   ]*movddup \(%eax\),%xmm7
>  [      ]*[0-9a-f]+:    0f 01 c8[       ]+monitor %eax,%ecx,%edx
>  [      ]*[0-9a-f]+:    67 0f 01 c8[    ]+monitor %ax,%ecx,%edx
>  [      ]*[0-9a-f]+:    0f 01 c9[       ]+mwait  %eax,%ecx
> --- a/gas/testsuite/gas/i386/sse3.s
> +++ b/gas/testsuite/gas/i386/sse3.s
> @@ -8,6 +8,7 @@ foo:
>         addsubps        %xmm4,%xmm3
>         fisttps         0x90909090(%eax)
>         fisttpl         0x90909090(%eax)
> +       fisttpq         0x90909090(%eax)
>         fisttpll        0x90909090(%eax)
>         haddpd          0x0(%ebp),%xmm4
>         haddpd          %xmm6,%xmm5
> --- a/gas/testsuite/gas/i386/sse3-intel.d
> +++ b/gas/testsuite/gas/i386/sse3-intel.d
> @@ -14,6 +14,7 @@ Disassembly of section .text:
>  [      ]*[0-9a-f]+:    df 88 90 90 90 90[      ]+fisttp WORD PTR \[eax-0x6f6f6f70\]
>  [      ]*[0-9a-f]+:    db 88 90 90 90 90[      ]+fisttp DWORD PTR \[eax-0x6f6f6f70\]
>  [      ]*[0-9a-f]+:    dd 88 90 90 90 90[      ]+fisttp QWORD PTR \[eax-0x6f6f6f70\]
> +[      ]*[0-9a-f]+:    dd 88 90 90 90 90[      ]+fisttp QWORD PTR \[eax-0x6f6f6f70\]
>  [      ]*[0-9a-f]+:    66 0f 7c 65 00[         ]+haddpd xmm4,(XMMWORD PTR )?\[ebp(\+0x0)\]
>  [      ]*[0-9a-f]+:    66 0f 7c ee[    ]+haddpd xmm5,xmm6
>  [      ]*[0-9a-f]+:    f2 0f 7c 37[    ]+haddps xmm6,(XMMWORD PTR )?\[edi\]
> --- a/gas/testsuite/gas/i386/x86-64-lfence-load.d
> +++ b/gas/testsuite/gas/i386/x86-64-lfence-load.d
> @@ -44,16 +44,21 @@ Disassembly of section .text:
>   +[a-f0-9]+:   0f ae e8                lfence
>   +[a-f0-9]+:   db 55 00                fistl  0x0\(%rbp\)
>   +[a-f0-9]+:   df 55 00                fists  0x0\(%rbp\)
> + +[a-f0-9]+:   db 5d 00                fistpl 0x0\(%rbp\)
> + +[a-f0-9]+:   df 5d 00                fistps 0x0\(%rbp\)
> + +[a-f0-9]+:   df 7d 00                fistpll 0x0\(%rbp\)
>   +[a-f0-9]+:   db 45 00                fildl  0x0\(%rbp\)
>   +[a-f0-9]+:   0f ae e8                lfence
>   +[a-f0-9]+:   df 45 00                filds  0x0\(%rbp\)
>   +[a-f0-9]+:   0f ae e8                lfence
> + +[a-f0-9]+:   df 6d 00                fildll 0x0\(%rbp\)
> + +[a-f0-9]+:   0f ae e8                lfence
>   +[a-f0-9]+:   9b dd 75 00             fsave  0x0\(%rbp\)
>   +[a-f0-9]+:   dd 65 00                frstor 0x0\(%rbp\)
>   +[a-f0-9]+:   0f ae e8                lfence
> - +[a-f0-9]+:   df 45 00                filds  0x0\(%rbp\)
> - +[a-f0-9]+:   0f ae e8                lfence
> + +[a-f0-9]+:   db 4d 00                fisttpl 0x0\(%rbp\)
>   +[a-f0-9]+:   df 4d 00                fisttps 0x0\(%rbp\)
> + +[a-f0-9]+:   dd 4d 00                fisttpll 0x0\(%rbp\)
>   +[a-f0-9]+:   d9 65 00                fldenv 0x0\(%rbp\)
>   +[a-f0-9]+:   0f ae e8                lfence
>   +[a-f0-9]+:   9b d9 75 00             fstenv 0x0\(%rbp\)
> --- a/gas/testsuite/gas/i386/x86-64-lfence-load.s
> +++ b/gas/testsuite/gas/i386/x86-64-lfence-load.s
> @@ -27,12 +27,17 @@ _start:
>         flds (%rbp)
>         fistl (%rbp)
>         fists (%rbp)
> +       fistpl (%rbp)
> +       fistps (%rbp)
> +       fistpq (%rbp)
>         fildl (%rbp)
>         filds (%rbp)
> +       fildq (%rbp)
>         fsave (%rbp)
>         frstor (%rbp)
> -       filds (%rbp)
> +       fisttpl (%rbp)
>         fisttps (%rbp)
> +       fisttpq (%rbp)
>         fldenv (%rbp)
>         fstenv (%rbp)
>         fadds  (%rbp)
> --- a/gas/testsuite/gas/i386/x86-64-sse3.d
> +++ b/gas/testsuite/gas/i386/x86-64-sse3.d
> @@ -13,6 +13,7 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    df 88 90 90 90 00 [     ]*fisttps 0x909090\(%rax\)
>  [      ]*[a-f0-9]+:    db 88 90 90 90 00 [     ]*fisttpl 0x909090\(%rax\)
>  [      ]*[a-f0-9]+:    dd 88 90 90 90 00 [     ]*fisttpll 0x909090\(%rax\)
> +[      ]*[a-f0-9]+:    dd 88 90 90 90 00 [     ]*fisttpll 0x909090\(%rax\)
>  [      ]*[a-f0-9]+:    66 0f 7c 65 00 [        ]*haddpd 0x0\(%rbp\),%xmm4
>  [      ]*[a-f0-9]+:    66 0f 7c ee [   ]*haddpd %xmm6,%xmm5
>  [      ]*[a-f0-9]+:    f2 0f 7c 37 [   ]*haddps \(%rdi\),%xmm6
> --- a/gas/testsuite/gas/i386/x86-64-sse3.s
> +++ b/gas/testsuite/gas/i386/x86-64-sse3.s
> @@ -8,6 +8,7 @@ foo:
>         addsubps        %xmm4,%xmm3
>         fisttps         0x909090(%rax)
>         fisttpl         0x909090(%rax)
> +       fisttpq         0x909090(%rax)
>         fisttpll        0x909090(%rax)
>         haddpd          0x0(%rbp),%xmm4
>         haddpd          %xmm6,%xmm5
> --- a/gas/testsuite/gas/i386/x86-64-sse3-intel.d
> +++ b/gas/testsuite/gas/i386/x86-64-sse3-intel.d
> @@ -14,6 +14,7 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    df 88 90 90 90 00[      ]+fisttp WORD PTR \[rax\+0x909090\]
>  [      ]*[a-f0-9]+:    db 88 90 90 90 00[      ]+fisttp DWORD PTR \[rax\+0x909090\]
>  [      ]*[a-f0-9]+:    dd 88 90 90 90 00[      ]+fisttp QWORD PTR \[rax\+0x909090\]
> +[      ]*[a-f0-9]+:    dd 88 90 90 90 00[      ]+fisttp QWORD PTR \[rax\+0x909090\]
>  [      ]*[a-f0-9]+:    66 0f 7c 65 00[         ]+haddpd xmm4,(XMMWORD PTR )?\[rbp(\+0x0)\]
>  [      ]*[a-f0-9]+:    66 0f 7c ee[    ]+haddpd xmm5,xmm6
>  [      ]*[a-f0-9]+:    f2 0f 7c 37[    ]+haddps xmm6,(XMMWORD PTR )?\[rdi\]
>

OK.

Thanks.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4826,7 +4826,7 @@  void
 md_assemble (char *line)
 {
   unsigned int j;
-  char mnemonic[MAX_MNEM_SIZE], mnem_suffix, *copy = NULL;
+  char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
   const char *end, *pass1_mnem = NULL;
   enum i386_error pass1_err = 0;
   const insn_template *t;
@@ -4858,6 +4858,7 @@  md_assemble (char *line)
 	    goto no_match;
 	  /* No point in trying a 2nd pass - it'll only find the same suffix
 	     again.  */
+	  mnem_suffix = i.suffix;
 	  goto match_error;
 	}
       return;
@@ -5013,9 +5014,15 @@  md_assemble (char *line)
 		  cpu_sub_arch_name ? cpu_sub_arch_name : "");
 	  return;
 	case unsupported_64bit:
-	  as_bad (_("`%s' is %s supported in 64-bit mode"),
-		  pass1_mnem ? pass1_mnem : current_templates->start->name,
-		  flag_code == CODE_64BIT ? _("not") : _("only"));
+	  if (ISLOWER (mnem_suffix))
+	    as_bad (_("`%s%c' is %s supported in 64-bit mode"),
+		    pass1_mnem ? pass1_mnem : current_templates->start->name,
+		    mnem_suffix,
+		    flag_code == CODE_64BIT ? _("not") : _("only"));
+	  else
+	    as_bad (_("`%s' is %s supported in 64-bit mode"),
+		    pass1_mnem ? pass1_mnem : current_templates->start->name,
+		    flag_code == CODE_64BIT ? _("not") : _("only"));
 	  return;
 	case invalid_sib_address:
 	  err_msg = _("invalid SIB address");
@@ -5358,6 +5365,23 @@  md_assemble (char *line)
     last_insn.kind = last_insn_other;
 }
 
+/* The Q suffix is generally valid only in 64-bit mode, with very few
+   exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
+   and fisttp only one of their two templates is matched below: That's
+   sufficient since other relevant attributes are the same between both
+   respective templates.  */
+static INLINE bool q_suffix_allowed(const insn_template *t)
+{
+  return flag_code == CODE_64BIT
+	 || (t->opcode_modifier.opcodespace == SPACE_BASE
+	     && t->base_opcode == 0xdf
+	     && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
+	 || (t->opcode_modifier.opcodespace == SPACE_0F
+	     && t->base_opcode == 0xc7
+	     && t->opcode_modifier.opcodeprefix == PREFIX_NONE
+	     && t->extension_opcode == 1) /* cmpxchg8b */;
+}
+
 static const char *
 parse_insn (const char *line, char *mnemonic)
 {
@@ -5628,6 +5652,10 @@  parse_insn (const char *line, char *mnem
   for (t = current_templates->start; t < current_templates->end; ++t)
     {
       supported |= cpu_flags_match (t);
+
+      if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
+	supported &= ~CPU_FLAGS_64BIT_MATCH;
+
       if (supported == CPU_FLAGS_PERFECT_MATCH)
 	return l;
     }
@@ -6663,20 +6691,12 @@  match_template (char mnem_suffix)
       for (j = 0; j < MAX_OPERANDS; j++)
 	operand_types[j] = t->operand_types[j];
 
-      /* In general, don't allow
-	 - 64-bit operands outside of 64-bit mode,
-	 - 32-bit operands on pre-386.  */
+      /* In general, don't allow 32-bit operands on pre-386.  */
       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
 					     : operand_size_mismatch);
       j = i.imm_operands + (t->operands > i.imm_operands + 1);
-      if (((i.suffix == QWORD_MNEM_SUFFIX
-	    && flag_code != CODE_64BIT
-	    && !(t->opcode_modifier.opcodespace == SPACE_0F
-		 && t->base_opcode == 0xc7
-		 && t->opcode_modifier.opcodeprefix == PREFIX_NONE
-		 && t->extension_opcode == 1) /* cmpxchg8b */)
-	   || (i.suffix == LONG_MNEM_SUFFIX
-	       && !cpu_arch_flags.bitfield.cpui386))
+      if (i.suffix == LONG_MNEM_SUFFIX
+	  && !cpu_arch_flags.bitfield.cpui386
 	  && (intel_syntax
 	      ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
 		 && !intel_float_operand (t->name))
--- a/gas/config/tc-i386-intel.c
+++ b/gas/config/tc-i386-intel.c
@@ -824,7 +824,7 @@  i386_intel_operand (char *operand_string
 		    continue;
 		  break;
 		case QWORD_MNEM_SUFFIX:
-		  if (t->opcode_modifier.no_qsuf)
+		  if (t->opcode_modifier.no_qsuf || !q_suffix_allowed (t))
 		    continue;
 		  break;
 		case SHORT_MNEM_SUFFIX:
--- a/gas/testsuite/gas/i386/opcode.d
+++ b/gas/testsuite/gas/i386/opcode.d
@@ -592,6 +592,10 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:	0f 4b 90 90 90 90 90 	cmovnp -0x6f6f6f70\(%eax\),%edx
 [ 	]*[a-f0-9]+:	66 0f 4a 90 90 90 90 90 	cmovp  -0x6f6f6f70\(%eax\),%dx
 [ 	]*[a-f0-9]+:	66 0f 4b 90 90 90 90 90 	cmovnp -0x6f6f6f70\(%eax\),%dx
+[ 	]*[a-f0-9]+:	df 28                	fildll \(%eax\)
+[ 	]*[a-f0-9]+:	df 28                	fildll \(%eax\)
+[ 	]*[a-f0-9]+:	df 38                	fistpll \(%eax\)
+[ 	]*[a-f0-9]+:	df 38                	fistpll \(%eax\)
  +[a-f0-9]+:	82 c3 01             	add    \$0x1,%bl
  +[a-f0-9]+:	82 f3 01             	xor    \$0x1,%bl
  +[a-f0-9]+:	82 d3 01             	adc    \$0x1,%bl
--- a/gas/testsuite/gas/i386/opcode.s
+++ b/gas/testsuite/gas/i386/opcode.s
@@ -592,6 +592,11 @@  foo:
  cmovpe  0x90909090(%eax),%dx
  cmovpo 0x90909090(%eax),%dx
 
+ fildq  (%eax)
+ fildll (%eax)
+ fistpq (%eax)
+ fistpll (%eax)
+
 	.byte 0x82, 0xc3, 0x01
 	.byte 0x82, 0xf3, 0x01
 	.byte 0x82, 0xd3, 0x01
--- a/gas/testsuite/gas/i386/opcode-intel.d
+++ b/gas/testsuite/gas/i386/opcode-intel.d
@@ -593,6 +593,10 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:	0f 4b 90 90 90 90 90 	cmovnp edx,DWORD PTR \[eax-0x6f6f6f70\]
 [ 	]*[a-f0-9]+:	66 0f 4a 90 90 90 90 90 	cmovp  dx,WORD PTR \[eax-0x6f6f6f70\]
 [ 	]*[a-f0-9]+:	66 0f 4b 90 90 90 90 90 	cmovnp dx,WORD PTR \[eax-0x6f6f6f70\]
+[ 	]*[a-f0-9]+:	df 28                	fild   QWORD PTR \[eax\]
+[ 	]*[a-f0-9]+:	df 28                	fild   QWORD PTR \[eax\]
+[ 	]*[a-f0-9]+:	df 38                	fistp  QWORD PTR \[eax\]
+[ 	]*[a-f0-9]+:	df 38                	fistp  QWORD PTR \[eax\]
  +[a-f0-9]+:	82 c3 01             	add    bl,0x1
  +[a-f0-9]+:	82 f3 01             	xor    bl,0x1
  +[a-f0-9]+:	82 d3 01             	adc    bl,0x1
--- a/gas/testsuite/gas/i386/opcode-suffix.d
+++ b/gas/testsuite/gas/i386/opcode-suffix.d
@@ -593,6 +593,10 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:	0f 4b 90 90 90 90 90 	cmovnpl -0x6f6f6f70\(%eax\),%edx
 [ 	]*[a-f0-9]+:	66 0f 4a 90 90 90 90 90 	cmovpw -0x6f6f6f70\(%eax\),%dx
 [ 	]*[a-f0-9]+:	66 0f 4b 90 90 90 90 90 	cmovnpw -0x6f6f6f70\(%eax\),%dx
+[ 	]*[a-f0-9]+:	df 28                	fildll \(%eax\)
+[ 	]*[a-f0-9]+:	df 28                	fildll \(%eax\)
+[ 	]*[a-f0-9]+:	df 38                	fistpll \(%eax\)
+[ 	]*[a-f0-9]+:	df 38                	fistpll \(%eax\)
  +[a-f0-9]+:	82 c3 01             	addb   \$0x1,%bl
  +[a-f0-9]+:	82 f3 01             	xorb   \$0x1,%bl
  +[a-f0-9]+:	82 d3 01             	adcb   \$0x1,%bl
--- a/gas/testsuite/gas/i386/sse3.d
+++ b/gas/testsuite/gas/i386/sse3.d
@@ -13,29 +13,30 @@  Disassembly of section .text:
   10:	df 88 90 90 90 90 [ 	]*fisttps -0x6f6f6f70\(%eax\)
   16:	db 88 90 90 90 90 [ 	]*fisttpl -0x6f6f6f70\(%eax\)
   1c:	dd 88 90 90 90 90 [ 	]*fisttpll -0x6f6f6f70\(%eax\)
-  22:	66 0f 7c 65 00 [ 	]*haddpd 0x0\(%ebp\),%xmm4
-  27:	66 0f 7c ee [ 	]*haddpd %xmm6,%xmm5
-  2b:	f2 0f 7c 37 [ 	]*haddps \(%edi\),%xmm6
-  2f:	f2 0f 7c f8 [ 	]*haddps %xmm0,%xmm7
-  33:	66 0f 7d c1 [ 	]*hsubpd %xmm1,%xmm0
-  37:	66 0f 7d 0a [ 	]*hsubpd \(%edx\),%xmm1
-  3b:	f2 0f 7d d2 [ 	]*hsubps %xmm2,%xmm2
-  3f:	f2 0f 7d 1c 24 [ 	]*hsubps \(%esp\),%xmm3
-  44:	f2 0f f0 2e [ 	]*lddqu  \(%esi\),%xmm5
-  48:	0f 01 c8 [ 	]*monitor %eax,%ecx,%edx
-  4b:	0f 01 c8 [ 	]*monitor %eax,%ecx,%edx
-  4e:	f2 0f 12 f7 [ 	]*movddup %xmm7,%xmm6
-  52:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
-  56:	f3 0f 16 01 [ 	]*movshdup \(%ecx\),%xmm0
-  5a:	f3 0f 16 ca [ 	]*movshdup %xmm2,%xmm1
-  5e:	f3 0f 12 13 [ 	]*movsldup \(%ebx\),%xmm2
-  62:	f3 0f 12 dc [ 	]*movsldup %xmm4,%xmm3
-  66:	0f 01 c9 [ 	]*mwait  %eax,%ecx
-  69:	0f 01 c9 [ 	]*mwait  %eax,%ecx
-  6c:	67 0f 01 c8 [ 	]*monitor %ax,%ecx,%edx
-  70:	67 0f 01 c8 [ 	]*monitor %ax,%ecx,%edx
-  74:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
-  78:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
+[ 	]*[0-9a-f]+:	dd 88 90 90 90 90 [ 	]*fisttpll -0x6f6f6f70\(%eax\)
+[ 	]*[0-9a-f]+:	66 0f 7c 65 00 [ 	]*haddpd 0x0\(%ebp\),%xmm4
+[ 	]*[0-9a-f]+:	66 0f 7c ee [ 	]*haddpd %xmm6,%xmm5
+[ 	]*[0-9a-f]+:	f2 0f 7c 37 [ 	]*haddps \(%edi\),%xmm6
+[ 	]*[0-9a-f]+:	f2 0f 7c f8 [ 	]*haddps %xmm0,%xmm7
+[ 	]*[0-9a-f]+:	66 0f 7d c1 [ 	]*hsubpd %xmm1,%xmm0
+[ 	]*[0-9a-f]+:	66 0f 7d 0a [ 	]*hsubpd \(%edx\),%xmm1
+[ 	]*[0-9a-f]+:	f2 0f 7d d2 [ 	]*hsubps %xmm2,%xmm2
+[ 	]*[0-9a-f]+:	f2 0f 7d 1c 24 [ 	]*hsubps \(%esp\),%xmm3
+[ 	]*[0-9a-f]+:	f2 0f f0 2e [ 	]*lddqu  \(%esi\),%xmm5
+[ 	]*[0-9a-f]+:	0f 01 c8 [ 	]*monitor %eax,%ecx,%edx
+[ 	]*[0-9a-f]+:	0f 01 c8 [ 	]*monitor %eax,%ecx,%edx
+[ 	]*[0-9a-f]+:	f2 0f 12 f7 [ 	]*movddup %xmm7,%xmm6
+[ 	]*[0-9a-f]+:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
+[ 	]*[0-9a-f]+:	f3 0f 16 01 [ 	]*movshdup \(%ecx\),%xmm0
+[ 	]*[0-9a-f]+:	f3 0f 16 ca [ 	]*movshdup %xmm2,%xmm1
+[ 	]*[0-9a-f]+:	f3 0f 12 13 [ 	]*movsldup \(%ebx\),%xmm2
+[ 	]*[0-9a-f]+:	f3 0f 12 dc [ 	]*movsldup %xmm4,%xmm3
+[ 	]*[0-9a-f]+:	0f 01 c9 [ 	]*mwait  %eax,%ecx
+[ 	]*[0-9a-f]+:	0f 01 c9 [ 	]*mwait  %eax,%ecx
+[ 	]*[0-9a-f]+:	67 0f 01 c8 [ 	]*monitor %ax,%ecx,%edx
+[ 	]*[0-9a-f]+:	67 0f 01 c8 [ 	]*monitor %ax,%ecx,%edx
+[ 	]*[0-9a-f]+:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
+[ 	]*[0-9a-f]+:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
 [ 	]*[0-9a-f]+:	0f 01 c8[ 	]+monitor %eax,%ecx,%edx
 [ 	]*[0-9a-f]+:	67 0f 01 c8[ 	]+monitor %ax,%ecx,%edx
 [ 	]*[0-9a-f]+:	0f 01 c9[ 	]+mwait  %eax,%ecx
--- a/gas/testsuite/gas/i386/sse3.s
+++ b/gas/testsuite/gas/i386/sse3.s
@@ -8,6 +8,7 @@  foo:
 	addsubps	%xmm4,%xmm3
 	fisttps		0x90909090(%eax)
 	fisttpl		0x90909090(%eax)
+	fisttpq		0x90909090(%eax)
 	fisttpll	0x90909090(%eax)
 	haddpd		0x0(%ebp),%xmm4
 	haddpd		%xmm6,%xmm5
--- a/gas/testsuite/gas/i386/sse3-intel.d
+++ b/gas/testsuite/gas/i386/sse3-intel.d
@@ -14,6 +14,7 @@  Disassembly of section .text:
 [ 	]*[0-9a-f]+:	df 88 90 90 90 90[ 	]+fisttp WORD PTR \[eax-0x6f6f6f70\]
 [ 	]*[0-9a-f]+:	db 88 90 90 90 90[ 	]+fisttp DWORD PTR \[eax-0x6f6f6f70\]
 [ 	]*[0-9a-f]+:	dd 88 90 90 90 90[ 	]+fisttp QWORD PTR \[eax-0x6f6f6f70\]
+[ 	]*[0-9a-f]+:	dd 88 90 90 90 90[ 	]+fisttp QWORD PTR \[eax-0x6f6f6f70\]
 [ 	]*[0-9a-f]+:	66 0f 7c 65 00[ 	]+haddpd xmm4,(XMMWORD PTR )?\[ebp(\+0x0)\]
 [ 	]*[0-9a-f]+:	66 0f 7c ee[ 	]+haddpd xmm5,xmm6
 [ 	]*[0-9a-f]+:	f2 0f 7c 37[ 	]+haddps xmm6,(XMMWORD PTR )?\[edi\]
--- a/gas/testsuite/gas/i386/x86-64-lfence-load.d
+++ b/gas/testsuite/gas/i386/x86-64-lfence-load.d
@@ -44,16 +44,21 @@  Disassembly of section .text:
  +[a-f0-9]+:	0f ae e8             	lfence
  +[a-f0-9]+:	db 55 00             	fistl  0x0\(%rbp\)
  +[a-f0-9]+:	df 55 00             	fists  0x0\(%rbp\)
+ +[a-f0-9]+:	db 5d 00             	fistpl 0x0\(%rbp\)
+ +[a-f0-9]+:	df 5d 00             	fistps 0x0\(%rbp\)
+ +[a-f0-9]+:	df 7d 00             	fistpll 0x0\(%rbp\)
  +[a-f0-9]+:	db 45 00             	fildl  0x0\(%rbp\)
  +[a-f0-9]+:	0f ae e8             	lfence
  +[a-f0-9]+:	df 45 00             	filds  0x0\(%rbp\)
  +[a-f0-9]+:	0f ae e8             	lfence
+ +[a-f0-9]+:	df 6d 00             	fildll 0x0\(%rbp\)
+ +[a-f0-9]+:	0f ae e8             	lfence
  +[a-f0-9]+:	9b dd 75 00          	fsave  0x0\(%rbp\)
  +[a-f0-9]+:	dd 65 00             	frstor 0x0\(%rbp\)
  +[a-f0-9]+:	0f ae e8             	lfence
- +[a-f0-9]+:	df 45 00             	filds  0x0\(%rbp\)
- +[a-f0-9]+:	0f ae e8             	lfence
+ +[a-f0-9]+:	db 4d 00             	fisttpl 0x0\(%rbp\)
  +[a-f0-9]+:	df 4d 00             	fisttps 0x0\(%rbp\)
+ +[a-f0-9]+:	dd 4d 00             	fisttpll 0x0\(%rbp\)
  +[a-f0-9]+:	d9 65 00             	fldenv 0x0\(%rbp\)
  +[a-f0-9]+:	0f ae e8             	lfence
  +[a-f0-9]+:	9b d9 75 00          	fstenv 0x0\(%rbp\)
--- a/gas/testsuite/gas/i386/x86-64-lfence-load.s
+++ b/gas/testsuite/gas/i386/x86-64-lfence-load.s
@@ -27,12 +27,17 @@  _start:
 	flds (%rbp)
 	fistl (%rbp)
 	fists (%rbp)
+	fistpl (%rbp)
+	fistps (%rbp)
+	fistpq (%rbp)
 	fildl (%rbp)
 	filds (%rbp)
+	fildq (%rbp)
 	fsave (%rbp)
 	frstor (%rbp)
-	filds (%rbp)
+	fisttpl (%rbp)
 	fisttps (%rbp)
+	fisttpq (%rbp)
 	fldenv (%rbp)
 	fstenv (%rbp)
 	fadds  (%rbp)
--- a/gas/testsuite/gas/i386/x86-64-sse3.d
+++ b/gas/testsuite/gas/i386/x86-64-sse3.d
@@ -13,6 +13,7 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:	df 88 90 90 90 00 [ 	]*fisttps 0x909090\(%rax\)
 [ 	]*[a-f0-9]+:	db 88 90 90 90 00 [ 	]*fisttpl 0x909090\(%rax\)
 [ 	]*[a-f0-9]+:	dd 88 90 90 90 00 [ 	]*fisttpll 0x909090\(%rax\)
+[ 	]*[a-f0-9]+:	dd 88 90 90 90 00 [ 	]*fisttpll 0x909090\(%rax\)
 [ 	]*[a-f0-9]+:	66 0f 7c 65 00 [ 	]*haddpd 0x0\(%rbp\),%xmm4
 [ 	]*[a-f0-9]+:	66 0f 7c ee [ 	]*haddpd %xmm6,%xmm5
 [ 	]*[a-f0-9]+:	f2 0f 7c 37 [ 	]*haddps \(%rdi\),%xmm6
--- a/gas/testsuite/gas/i386/x86-64-sse3.s
+++ b/gas/testsuite/gas/i386/x86-64-sse3.s
@@ -8,6 +8,7 @@  foo:
 	addsubps	%xmm4,%xmm3
 	fisttps		0x909090(%rax)
 	fisttpl		0x909090(%rax)
+	fisttpq		0x909090(%rax)
 	fisttpll	0x909090(%rax)
 	haddpd		0x0(%rbp),%xmm4
 	haddpd		%xmm6,%xmm5
--- a/gas/testsuite/gas/i386/x86-64-sse3-intel.d
+++ b/gas/testsuite/gas/i386/x86-64-sse3-intel.d
@@ -14,6 +14,7 @@  Disassembly of section .text:
 [ 	]*[a-f0-9]+:	df 88 90 90 90 00[ 	]+fisttp WORD PTR \[rax\+0x909090\]
 [ 	]*[a-f0-9]+:	db 88 90 90 90 00[ 	]+fisttp DWORD PTR \[rax\+0x909090\]
 [ 	]*[a-f0-9]+:	dd 88 90 90 90 00[ 	]+fisttp QWORD PTR \[rax\+0x909090\]
+[ 	]*[a-f0-9]+:	dd 88 90 90 90 00[ 	]+fisttp QWORD PTR \[rax\+0x909090\]
 [ 	]*[a-f0-9]+:	66 0f 7c 65 00[ 	]+haddpd xmm4,(XMMWORD PTR )?\[rbp(\+0x0)\]
 [ 	]*[a-f0-9]+:	66 0f 7c ee[ 	]+haddpd xmm5,xmm6
 [ 	]*[a-f0-9]+:	f2 0f 7c 37[ 	]+haddps xmm6,(XMMWORD PTR )?\[rdi\]