[V2] Support -m[no-]gather -m[no-]scatter to enable/disable vectorization for all gather/scatter instructions
Checks
Commit Message
Rename original use_gather to use_gather_8parts, Support
-mtune-ctrl={,^}use_gather to set/clear tune features
use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
as alias of -mtune-ctrl=, use_gather, ^use_gather.
Similar for use_scatter.
How about this version?
gcc/ChangeLog:
* config/i386/i386-builtins.cc
(ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
* config/i386/i386-options.cc (parse_mtune_ctrl_str):
Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
* config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
for use_scatter_8parts
* config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
(TARGET_USE_GATHER_8PARTS): .. this.
(TARGET_USE_SCATTER): Rename to ..
(TARGET_USE_SCATTER_8PARTS): .. this.
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
(X86_TUNE_USE_GATHER_8PARTS): .. this.
(X86_TUNE_USE_SCATTER): Rename to
(X86_TUNE_USE_SCATTER_8PARTS): .. this.
* config/i386/i386.opt: Add new options mgather, mscatter.
---
gcc/config/i386/i386-builtins.cc | 2 +-
gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++---------
gcc/config/i386/i386.cc | 2 +-
gcc/config/i386/i386.h | 8 ++---
gcc/config/i386/i386.opt | 8 +++++
gcc/config/i386/x86-tune.def | 4 +--
6 files changed, 56 insertions(+), 22 deletions(-)
Comments
On Fri, Aug 11, 2023 at 2:02 PM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Rename original use_gather to use_gather_8parts, Support
> -mtune-ctrl={,^}use_gather to set/clear tune features
> use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
> as alias of -mtune-ctrl=, use_gather, ^use_gather.
>
> Similar for use_scatter.
>
> How about this version?
I'll commit the patch if there's no objections in the next 24 hours.
>
> gcc/ChangeLog:
>
> * config/i386/i386-builtins.cc
> (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
> * config/i386/i386-options.cc (parse_mtune_ctrl_str):
> Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
> 8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
> * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
> for use_scatter_8parts
> * config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
> (TARGET_USE_GATHER_8PARTS): .. this.
> (TARGET_USE_SCATTER): Rename to ..
> (TARGET_USE_SCATTER_8PARTS): .. this.
> * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
> (X86_TUNE_USE_GATHER_8PARTS): .. this.
> (X86_TUNE_USE_SCATTER): Rename to
> (X86_TUNE_USE_SCATTER_8PARTS): .. this.
> * config/i386/i386.opt: Add new options mgather, mscatter.
> ---
> gcc/config/i386/i386-builtins.cc | 2 +-
> gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++---------
> gcc/config/i386/i386.cc | 2 +-
> gcc/config/i386/i386.h | 8 ++---
> gcc/config/i386/i386.opt | 8 +++++
> gcc/config/i386/x86-tune.def | 4 +--
> 6 files changed, 56 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> index 356b6dfd5fb..8a0b8dfe073 100644
> --- a/gcc/config/i386/i386-builtins.cc
> +++ b/gcc/config/i386/i386-builtins.cc
> @@ -1657,7 +1657,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
> ? !TARGET_USE_GATHER_2PARTS
> : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
> ? !TARGET_USE_GATHER_4PARTS
> - : !TARGET_USE_GATHER)))
> + : !TARGET_USE_GATHER_8PARTS)))
> return NULL_TREE;
>
> if ((TREE_CODE (index_type) != INTEGER_TYPE
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 127ee24203c..b8d038af69d 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -1731,20 +1731,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
> curr_feature_string++;
> clear = true;
> }
> - for (i = 0; i < X86_TUNE_LAST; i++)
> - {
> - if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> - {
> - ix86_tune_features[i] = !clear;
> - if (dump)
> - fprintf (stderr, "Explicitly %s feature %s\n",
> - clear ? "clear" : "set", ix86_tune_feature_names[i]);
> - break;
> - }
> - }
> - if (i == X86_TUNE_LAST)
> - error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> - clear ? curr_feature_string - 1 : curr_feature_string);
> +
> + if (!strcmp (curr_feature_string, "use_gather"))
> + {
> + ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
> + if (dump)
> + fprintf (stderr, "Explicitly %s features use_gather_2parts,"
> + " use_gather_4parts, use_gather_8parts\n",
> + clear ? "clear" : "set");
> +
> + }
> + else if (!strcmp (curr_feature_string, "use_scatter"))
> + {
> + ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
> + if (dump)
> + fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
> + " use_scatter_4parts, use_scatter_8parts\n",
> + clear ? "clear" : "set");
> + }
> + else
> + {
> + for (i = 0; i < X86_TUNE_LAST; i++)
> + {
> + if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> + {
> + ix86_tune_features[i] = !clear;
> + if (dump)
> + fprintf (stderr, "Explicitly %s feature %s\n",
> + clear ? "clear" : "set", ix86_tune_feature_names[i]);
> + break;
> + }
> + }
> +
> + if (i == X86_TUNE_LAST)
> + error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> + clear ? curr_feature_string - 1 : curr_feature_string);
> + }
> curr_feature_string = next_feature_string;
> }
> while (curr_feature_string);
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d592ece700a..cd49fb9e47a 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -19193,7 +19193,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
> ? !TARGET_USE_SCATTER_2PARTS
> : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
> ? !TARGET_USE_SCATTER_4PARTS
> - : !TARGET_USE_SCATTER))
> + : !TARGET_USE_SCATTER_8PARTS))
> return NULL_TREE;
>
> if ((TREE_CODE (index_type) != INTEGER_TYPE
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index ef342fcee9b..f7330e818e7 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -403,10 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
> ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
> #define TARGET_USE_SCATTER_4PARTS \
> ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
> -#define TARGET_USE_GATHER \
> - ix86_tune_features[X86_TUNE_USE_GATHER]
> -#define TARGET_USE_SCATTER \
> - ix86_tune_features[X86_TUNE_USE_SCATTER]
> +#define TARGET_USE_GATHER_8PARTS \
> + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
> +#define TARGET_USE_SCATTER_8PARTS \
> + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
> #define TARGET_FUSE_CMP_AND_BRANCH_32 \
> ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
> #define TARGET_FUSE_CMP_AND_BRANCH_64 \
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 8a43187f703..78b499304a4 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1302,3 +1302,11 @@ msm4
> Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save
> Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
> SM4 built-in functions and code generation.
> +
> +mgather
> +Target Alias(mtune-ctrl=, use_gather, ^use_gather)
> +Enable vectorization for gather instruction.
> +
> +mscatter
> +Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
> +Enable vectorization for scatter instruction.
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 40e04ecddbf..d7f20d3a118 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -511,13 +511,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
>
> /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
> elements. */
> -DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
> +DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
> ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_ARROWLAKE
> | m_CORE_ATOM | m_GENERIC))
>
> /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
> elements. */
> -DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
> +DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
> ~(m_ZNVER4))
>
> /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
> --
> 2.31.1
>
On Mon, Aug 14, 2023 at 10:40 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Fri, Aug 11, 2023 at 2:02 PM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Rename original use_gather to use_gather_8parts, Support
> > -mtune-ctrl={,^}use_gather to set/clear tune features
> > use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
> > as alias of -mtune-ctrl=, use_gather, ^use_gather.
> >
> > Similar for use_scatter.
> >
> > How about this version?
> I'll commit the patch if there's no objections in the next 24 hours.
Pushed to trunk and backport to release/gcc-{13,12,11}.
Note for GCC11, The backport patch only supports -m{no,}gather since
the branch doesn't have scatter tunings.
For GCC12/GCC13. both -m{no,}gather/scatter are supported.
> >
> > gcc/ChangeLog:
> >
> > * config/i386/i386-builtins.cc
> > (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
> > * config/i386/i386-options.cc (parse_mtune_ctrl_str):
> > Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
> > 8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
> > * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
> > for use_scatter_8parts
> > * config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
> > (TARGET_USE_GATHER_8PARTS): .. this.
> > (TARGET_USE_SCATTER): Rename to ..
> > (TARGET_USE_SCATTER_8PARTS): .. this.
> > * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
> > (X86_TUNE_USE_GATHER_8PARTS): .. this.
> > (X86_TUNE_USE_SCATTER): Rename to
> > (X86_TUNE_USE_SCATTER_8PARTS): .. this.
> > * config/i386/i386.opt: Add new options mgather, mscatter.
> > ---
> > gcc/config/i386/i386-builtins.cc | 2 +-
> > gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++---------
> > gcc/config/i386/i386.cc | 2 +-
> > gcc/config/i386/i386.h | 8 ++---
> > gcc/config/i386/i386.opt | 8 +++++
> > gcc/config/i386/x86-tune.def | 4 +--
> > 6 files changed, 56 insertions(+), 22 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> > index 356b6dfd5fb..8a0b8dfe073 100644
> > --- a/gcc/config/i386/i386-builtins.cc
> > +++ b/gcc/config/i386/i386-builtins.cc
> > @@ -1657,7 +1657,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
> > ? !TARGET_USE_GATHER_2PARTS
> > : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
> > ? !TARGET_USE_GATHER_4PARTS
> > - : !TARGET_USE_GATHER)))
> > + : !TARGET_USE_GATHER_8PARTS)))
> > return NULL_TREE;
> >
> > if ((TREE_CODE (index_type) != INTEGER_TYPE
> > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> > index 127ee24203c..b8d038af69d 100644
> > --- a/gcc/config/i386/i386-options.cc
> > +++ b/gcc/config/i386/i386-options.cc
> > @@ -1731,20 +1731,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
> > curr_feature_string++;
> > clear = true;
> > }
> > - for (i = 0; i < X86_TUNE_LAST; i++)
> > - {
> > - if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> > - {
> > - ix86_tune_features[i] = !clear;
> > - if (dump)
> > - fprintf (stderr, "Explicitly %s feature %s\n",
> > - clear ? "clear" : "set", ix86_tune_feature_names[i]);
> > - break;
> > - }
> > - }
> > - if (i == X86_TUNE_LAST)
> > - error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> > - clear ? curr_feature_string - 1 : curr_feature_string);
> > +
> > + if (!strcmp (curr_feature_string, "use_gather"))
> > + {
> > + ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
> > + ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
> > + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
> > + if (dump)
> > + fprintf (stderr, "Explicitly %s features use_gather_2parts,"
> > + " use_gather_4parts, use_gather_8parts\n",
> > + clear ? "clear" : "set");
> > +
> > + }
> > + else if (!strcmp (curr_feature_string, "use_scatter"))
> > + {
> > + ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
> > + ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
> > + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
> > + if (dump)
> > + fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
> > + " use_scatter_4parts, use_scatter_8parts\n",
> > + clear ? "clear" : "set");
> > + }
> > + else
> > + {
> > + for (i = 0; i < X86_TUNE_LAST; i++)
> > + {
> > + if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> > + {
> > + ix86_tune_features[i] = !clear;
> > + if (dump)
> > + fprintf (stderr, "Explicitly %s feature %s\n",
> > + clear ? "clear" : "set", ix86_tune_feature_names[i]);
> > + break;
> > + }
> > + }
> > +
> > + if (i == X86_TUNE_LAST)
> > + error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> > + clear ? curr_feature_string - 1 : curr_feature_string);
> > + }
> > curr_feature_string = next_feature_string;
> > }
> > while (curr_feature_string);
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index d592ece700a..cd49fb9e47a 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -19193,7 +19193,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
> > ? !TARGET_USE_SCATTER_2PARTS
> > : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
> > ? !TARGET_USE_SCATTER_4PARTS
> > - : !TARGET_USE_SCATTER))
> > + : !TARGET_USE_SCATTER_8PARTS))
> > return NULL_TREE;
> >
> > if ((TREE_CODE (index_type) != INTEGER_TYPE
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index ef342fcee9b..f7330e818e7 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -403,10 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
> > ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
> > #define TARGET_USE_SCATTER_4PARTS \
> > ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
> > -#define TARGET_USE_GATHER \
> > - ix86_tune_features[X86_TUNE_USE_GATHER]
> > -#define TARGET_USE_SCATTER \
> > - ix86_tune_features[X86_TUNE_USE_SCATTER]
> > +#define TARGET_USE_GATHER_8PARTS \
> > + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
> > +#define TARGET_USE_SCATTER_8PARTS \
> > + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
> > #define TARGET_FUSE_CMP_AND_BRANCH_32 \
> > ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
> > #define TARGET_FUSE_CMP_AND_BRANCH_64 \
> > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> > index 8a43187f703..78b499304a4 100644
> > --- a/gcc/config/i386/i386.opt
> > +++ b/gcc/config/i386/i386.opt
> > @@ -1302,3 +1302,11 @@ msm4
> > Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save
> > Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
> > SM4 built-in functions and code generation.
> > +
> > +mgather
> > +Target Alias(mtune-ctrl=, use_gather, ^use_gather)
> > +Enable vectorization for gather instruction.
> > +
> > +mscatter
> > +Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
> > +Enable vectorization for scatter instruction.
> > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> > index 40e04ecddbf..d7f20d3a118 100644
> > --- a/gcc/config/i386/x86-tune.def
> > +++ b/gcc/config/i386/x86-tune.def
> > @@ -511,13 +511,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
> >
> > /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
> > elements. */
> > -DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
> > +DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
> > ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_ARROWLAKE
> > | m_CORE_ATOM | m_GENERIC))
> >
> > /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
> > elements. */
> > -DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
> > +DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
> > ~(m_ZNVER4))
> >
> > /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
> > --
> > 2.31.1
> >
>
>
> --
> BR,
> Hongtao
@@ -1657,7 +1657,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
? !TARGET_USE_GATHER_2PARTS
: (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
? !TARGET_USE_GATHER_4PARTS
- : !TARGET_USE_GATHER)))
+ : !TARGET_USE_GATHER_8PARTS)))
return NULL_TREE;
if ((TREE_CODE (index_type) != INTEGER_TYPE
@@ -1731,20 +1731,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
curr_feature_string++;
clear = true;
}
- for (i = 0; i < X86_TUNE_LAST; i++)
- {
- if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
- {
- ix86_tune_features[i] = !clear;
- if (dump)
- fprintf (stderr, "Explicitly %s feature %s\n",
- clear ? "clear" : "set", ix86_tune_feature_names[i]);
- break;
- }
- }
- if (i == X86_TUNE_LAST)
- error ("unknown parameter to option %<-mtune-ctrl%>: %s",
- clear ? curr_feature_string - 1 : curr_feature_string);
+
+ if (!strcmp (curr_feature_string, "use_gather"))
+ {
+ ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
+ ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
+ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
+ if (dump)
+ fprintf (stderr, "Explicitly %s features use_gather_2parts,"
+ " use_gather_4parts, use_gather_8parts\n",
+ clear ? "clear" : "set");
+
+ }
+ else if (!strcmp (curr_feature_string, "use_scatter"))
+ {
+ ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
+ ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
+ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
+ if (dump)
+ fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
+ " use_scatter_4parts, use_scatter_8parts\n",
+ clear ? "clear" : "set");
+ }
+ else
+ {
+ for (i = 0; i < X86_TUNE_LAST; i++)
+ {
+ if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
+ {
+ ix86_tune_features[i] = !clear;
+ if (dump)
+ fprintf (stderr, "Explicitly %s feature %s\n",
+ clear ? "clear" : "set", ix86_tune_feature_names[i]);
+ break;
+ }
+ }
+
+ if (i == X86_TUNE_LAST)
+ error ("unknown parameter to option %<-mtune-ctrl%>: %s",
+ clear ? curr_feature_string - 1 : curr_feature_string);
+ }
curr_feature_string = next_feature_string;
}
while (curr_feature_string);
@@ -19193,7 +19193,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
? !TARGET_USE_SCATTER_2PARTS
: (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
? !TARGET_USE_SCATTER_4PARTS
- : !TARGET_USE_SCATTER))
+ : !TARGET_USE_SCATTER_8PARTS))
return NULL_TREE;
if ((TREE_CODE (index_type) != INTEGER_TYPE
@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
#define TARGET_USE_SCATTER_4PARTS \
ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
-#define TARGET_USE_GATHER \
- ix86_tune_features[X86_TUNE_USE_GATHER]
-#define TARGET_USE_SCATTER \
- ix86_tune_features[X86_TUNE_USE_SCATTER]
+#define TARGET_USE_GATHER_8PARTS \
+ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
+#define TARGET_USE_SCATTER_8PARTS \
+ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
#define TARGET_FUSE_CMP_AND_BRANCH_32 \
ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
#define TARGET_FUSE_CMP_AND_BRANCH_64 \
@@ -1302,3 +1302,11 @@ msm4
Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and
SM4 built-in functions and code generation.
+
+mgather
+Target Alias(mtune-ctrl=, use_gather, ^use_gather)
+Enable vectorization for gather instruction.
+
+mscatter
+Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
+Enable vectorization for scatter instruction.
@@ -511,13 +511,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
elements. */
-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
+DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_ARROWLAKE
| m_CORE_ATOM | m_GENERIC))
/* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
elements. */
-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
+DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
~(m_ZNVER4))
/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or