@@ -797,6 +797,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_HRESET);
if (eax & bit_CMPCCXADD)
set_feature(FEATURE_CMPCCXADD);
+ if (edx & bit_PREFETCHI)
+ set_feature (FEATURE_PREFETCHI);
if (avx_usable)
{
if (eax & bit_AVXVNNI)
@@ -112,6 +112,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AVXNECONVERT_SET OPTION_MASK_ISA2_AVXNECONVERT
#define OPTION_MASK_ISA2_CMPCCXADD_SET OPTION_MASK_ISA2_CMPCCXADD
#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
+#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
@@ -287,6 +288,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AVXNECONVERT_UNSET OPTION_MASK_ISA2_AVXNECONVERT
#define OPTION_MASK_ISA2_CMPCCXADD_UNSET OPTION_MASK_ISA2_CMPCCXADD
#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
+#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@@ -1211,6 +1213,19 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mprefetchi:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_PREFETCHI_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_PREFETCHI_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
@@ -246,6 +246,7 @@ enum processor_features
FEATURE_AVXNECONVERT,
FEATURE_CMPCCXADD,
FEATURE_AMX_FP16,
+ FEATURE_PREFETCHI,
CPU_FEATURE_MAX
};
@@ -182,4 +182,5 @@ ISA_NAMES_TABLE_START
P_NONE, "-mavxneconvert")
ISA_NAMES_TABLE_ENTRY("cmpccxadd", FEATURE_CMPCCXADD, P_NONE, "-mcmpccxadd")
ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
+ ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
ISA_NAMES_TABLE_END
@@ -423,7 +423,7 @@ i[34567]86-*-* | x86_64-*-*)
hresetintrin.h keylockerintrin.h avxvnniintrin.h
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h
- cmpccxaddintrin.h amxfp16intrin.h"
+ cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
@@ -54,6 +54,7 @@
#define bit_AVXVNNIINT8 (1 << 4)
#define bit_AVXNECONVERT (1 << 5)
#define bit_CMPXCHG8B (1 << 8)
+#define bit_PREFETCHI (1 << 14)
#define bit_CMOV (1 << 15)
#define bit_MMX (1 << 23)
#define bit_FXSAVE (1 << 24)
@@ -650,6 +650,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__CMPCCXADD__");
if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16)
def_or_undef (parse_in, "__AMX_FP16__");
+ if (isa_flag2 & OPTION_MASK_ISA2_PREFETCHI)
+ def_or_undef (parse_in, "__PREFETCHI__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
@@ -114,3 +114,4 @@ DEF_PTA(AVXVNNIINT8)
DEF_PTA(AVXNECONVERT)
DEF_PTA(CMPCCXADD)
DEF_PTA(AMX_FP16)
+DEF_PTA(PREFETCHI)
@@ -232,7 +232,8 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 },
{ "-mavxneconvert", OPTION_MASK_ISA2_AVXNECONVERT },
{ "-mcmpccxadd", OPTION_MASK_ISA2_CMPCCXADD },
- { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }
+ { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 },
+ { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -1084,6 +1085,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("avxneconvert", OPT_mavxneconvert),
IX86_ATTR_ISA ("cmpccxadd", OPT_mcmpccxadd),
IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16),
+ IX86_ATTR_ISA ("prefetchi", OPT_mprefetchi),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -23637,47 +23637,65 @@
(match_operand:SI 1 "const_int_operand")
(match_operand:SI 2 "const_int_operand")
(match_operand:SI 3 "const_int_operand"))]
- "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
+ "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1
+ || TARGET_PREFETCHI"
{
- if (INTVAL (operands[3]) == 0)
- {
- warning (0, "instruction prefetch is not supported; using data prefetch");
- operands[3] = const1_rtx;
- }
bool write = operands[1] != const0_rtx;
int locality = INTVAL (operands[2]);
+ bool data = operands[3] != const0_rtx;
gcc_assert (IN_RANGE (locality, 0, 3));
- /* Use 3dNOW prefetch in case we are asking for write prefetch not
- supported by SSE counterpart (non-SSE2 athlon machines) or the
- SSE prefetch is not available (K6 machines). Otherwise use SSE
- prefetch as it allows specifying of locality. */
-
- if (write)
+ if (data)
{
- if (TARGET_PREFETCHWT1)
- operands[2] = GEN_INT (MAX (locality, 2));
- else if (TARGET_PRFCHW)
- operands[2] = GEN_INT (3);
- else if (TARGET_3DNOW && !TARGET_SSE2)
- operands[2] = GEN_INT (3);
- else if (TARGET_PREFETCH_SSE)
- operands[1] = const0_rtx;
+ /* Use 3dNOW prefetch in case we are asking for write prefetch not
+ supported by SSE counterpart (non-SSE2 athlon machines) or the
+ SSE prefetch is not available (K6 machines). Otherwise use SSE
+ prefetch as it allows specifying of locality. */
+
+ if (write)
+ {
+ if (TARGET_PREFETCHWT1)
+ operands[2] = GEN_INT (MAX (locality, 2));
+ else if (TARGET_PRFCHW)
+ operands[2] = GEN_INT (3);
+ else if (TARGET_3DNOW && !TARGET_SSE2)
+ operands[2] = GEN_INT (3);
+ else if (TARGET_PREFETCH_SSE)
+ operands[1] = const0_rtx;
+ else
+ {
+ gcc_assert (TARGET_3DNOW);
+ operands[2] = GEN_INT (3);
+ }
+ }
else
{
- gcc_assert (TARGET_3DNOW);
- operands[2] = GEN_INT (3);
+ if (TARGET_PREFETCH_SSE)
+ ;
+ else
+ {
+ gcc_assert (TARGET_3DNOW);
+ operands[2] = GEN_INT (3);
+ }
}
}
else
{
- if (TARGET_PREFETCH_SSE)
+ /* GOT/PLT_PIC should not be available for instruction prefetch.
+ It must be real instruction address. */
+ if (TARGET_PREFETCHI && TARGET_64BIT
+ && local_func_symbolic_operand (operands[0], GET_MODE (operands[0])))
;
else
{
- gcc_assert (TARGET_3DNOW);
- operands[2] = GEN_INT (3);
+ /* Ignore the hint. */
+ warning (0, "instruction prefetch applies when in 64-bit mode"
+ " with RIP-relative addressing and"
+ " option %<-mprefetchi%>;"
+ " they stay NOPs otherwise");
+ emit_insn (gen_nop ());
+ DONE;
}
}
})
@@ -23733,6 +23751,28 @@
(symbol_ref "memory_address_length (operands[0], false)"))
(set_attr "memory" "none")])
+(define_insn "*prefetch_i"
+ [(prefetch (match_operand 0 "local_func_symbolic_operand" "p")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand")
+ (const_int 0))]
+ "TARGET_PREFETCHI"
+{
+ static const char * const patterns[2] = {
+ "prefetchit1\t%a0", "prefetchit0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ gcc_assert (IN_RANGE (locality, 2, 3));
+
+ return patterns[locality - 2];
+}
+ [(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "prefetch")
+ (set (attr "length_address")
+ (symbol_ref "memory_address_length (operands[0], false)"))
+ (set_attr "memory" "none")])
+
(define_expand "stack_protect_set"
[(match_operand 0 "memory_operand")
(match_operand 1 "memory_operand")]
@@ -1238,3 +1238,7 @@ CMPCCXADD build-in functions and code generation.
mamx-fp16
Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save
Support AMX-FP16 built-in functions and code generation.
+
+mprefetchi
+Target Mask(ISA2_PREFETCHI) Var(ix86_isa_flags2) Save
+Support PREFETCHI built-in functions and code generation.
@@ -134,6 +134,8 @@
#include <amxbf16intrin.h>
+#include <prfchiintrin.h>
+
#include <prfchwintrin.h>
#include <keylockerintrin.h>
@@ -610,6 +610,21 @@
return false;
})
+(define_predicate "local_func_symbolic_operand"
+ (match_operand 0 "local_symbolic_operand")
+{
+ if (GET_CODE (op) == CONST
+ && GET_CODE (XEXP (op, 0)) == PLUS
+ && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+ op = XEXP (XEXP (op, 0), 0);
+
+ if (GET_CODE (op) == SYMBOL_REF
+ && !SYMBOL_REF_FUNCTION_P (op))
+ return false;
+
+ return true;
+})
+
;; Test for a legitimate @GOTOFF operand.
;;
;; VxWorks does not impose a fixed gap between segments; the run-time
new file mode 100644
@@ -0,0 +1,39 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <prfchiintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _PRFCHIINTRIN_H_INCLUDED
+#define _PRFCHIINTRIN_H_INCLUDED
+
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetchi (void* __P)
+{
+ __builtin_prefetch (__P, 0, 3, 0 /* _MM_HINT_IT0 */);
+}
+#endif
+
+#endif /* _PRFCHIINTRIN_H_INCLUDED */
@@ -36,6 +36,8 @@
/* Constants for use with _mm_prefetch. */
enum _mm_hint
{
+ _MM_HINT_IT0 = 19,
+ _MM_HINT_IT1 = 18,
/* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */
_MM_HINT_ET0 = 7,
_MM_HINT_ET1 = 6,
@@ -51,11 +53,11 @@ enum _mm_hint
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_prefetch (const void *__P, enum _mm_hint __I)
{
- __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
+ __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3, ((__I & 0x10) >> 4) ^ 0x1);
}
#else
#define _mm_prefetch(P, I) \
- __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
+ __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3), (((I & 0x10) >> 4) ^ 0x1))
#endif
#ifndef __SSE__
@@ -7085,6 +7085,11 @@ Enable/disable the generation of the CMPccXADD instructions.
@cindex @code{target("amx-fp16")} function attribute, x86
Enable/disable the generation of the AMX-FP16 instructions.
+@item prefetchi
+@itemx no-prefetchi
+@cindex @code{target("prefetchi")} function attribute, x86
+Enable/disable the generation of the PREFETCHI instructions.
+
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86
@@ -1437,6 +1437,7 @@ See RS/6000 and PowerPC Options.
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 @gol
+-mprefetchi @gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mkl -mwidekl @gol
@@ -32916,6 +32917,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@itemx -mamx-fp16
@opindex mamx-fp16
+@need 200
+@itemx -mprefetchi
+@opindex mprefetchi
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
@@ -32926,9 +32930,9 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
-AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16 or CLDEMOTE extended
-instruction sets. Each has a corresponding @option{-mno-} option to disable
-use of these instructions.
+AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI or CLDEMOTE
+extended instruction sets. Each has a corresponding @option{-mno-} option to
+disable use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and
@@ -2535,6 +2535,9 @@ Target does not require strict alignment.
@item pie_copyreloc
The x86-64 target linker supports PIE with copy reloc.
+@item prefetchi
+Target supports the execution of @code{prefetchi} instructions.
+
@item rdrand
Target supports x86 @code{rdrand} instruction.
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
@@ -29,3 +29,5 @@ int main ()
/* { dg-final { scan-assembler "prefetchw" } } */
/* { dg-final { scan-assembler-not "prefetchnta" } } */
/* { dg-final { scan-assembler-not "prefetcht" } } */
+/* { dg-final { scan-assembler-not "prefetchit0" } } */
+/* { dg-final { scan-assembler-not "prefetchit1" } } */
@@ -29,3 +29,5 @@ int main ()
/* { dg-final { scan-assembler "prefetchw" } } */
/* { dg-final { scan-assembler "prefetchnta" } } */
/* { dg-final { scan-assembler "prefetcht" } } */
+/* { dg-final { scan-assembler-not "prefetchit0" } } */
+/* { dg-final { scan-assembler-not "prefetchit1" } } */
@@ -30,3 +30,5 @@ int main ()
/* { dg-final { scan-assembler "prefetcht1" } } */
/* { dg-final { scan-assembler "prefetcht2" } } */
/* { dg-final { scan-assembler-not "prefetchw" } } */
+/* { dg-final { scan-assembler-not "prefetchit0" } } */
+/* { dg-final { scan-assembler-not "prefetchit1" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl -mprefetchi" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16 -mavx512vl" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16 -mavx512vl -mprefetchi" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -85,6 +85,7 @@ extern void test_avxvnniint8 (void) __attribute__((__target__("avxvnniint8")));
extern void test_avxneconvert (void) __attribute__((__target__("avxneconvert")));
extern void test_cmpccxadd (void) __attribute__((__target__("cmpccxadd")));
extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16")));
+extern void test_prefetchi (void) __attribute__((__target__("prefetchi")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
@@ -171,6 +172,7 @@ extern void test_no_avxvnniint8 (void) __attribute__((__target__("no-avxvnniint
extern void test_no_avxneconvert (void) __attribute__((__target__("no-avxneconvert")));
extern void test_no_cmpccxadd (void) __attribute__((__target__("no-cmpccxadd")));
extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16")));
+extern void test_no_prefetchi (void) __attribute__((__target__("no-prefetchi")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
@@ -1,11 +1,33 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -msse" } */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mprefetchi -O2" } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+prefetchit0\[ \\t\]+" 2 } } */
+/* { dg-final { scan-assembler "\[ \\t\]+prefetchit1\[ \\t\]+" } } */
-/* Remind users that instruction prefetch is not supported yet. */
+#include <x86intrin.h>
-void
-bad(const int* p)
+int
+bar (int a)
{
- __builtin_prefetch(p, 0, 3, 0); /* { dg-warning "instruction prefetch is not supported; using data prefetch" } */
- __builtin_prefetch(p, 0, 2, 0); /* { dg-warning "instruction prefetch is not supported; using data prefetch" } */
+ return a + 1;
+}
+
+int
+foo1 (int b)
+{
+ _mm_prefetch (bar, _MM_HINT_IT0);
+ return bar (b) + 1;
+}
+
+int
+foo2 (int b)
+{
+ _mm_prefetch (bar, _MM_HINT_IT1);
+ return bar (b) + 1;
+}
+
+int
+foo3 (int b)
+{
+ _m_prefetchi (bar);
+ return bar (b) + 1;
}
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-mprefetchi -fpie -O2" } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+prefetchit0" } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+prefetchit1" } } */
+
+#include <x86intrin.h>
+
+int
+bar (int a)
+{
+ return a + 1;
+}
+
+int
+foo1 (int b)
+{
+ __builtin_prefetch (bar, 0, 3, 0); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
+ return bar (b) + 1;
+}
+
+int
+foo2 (int b)
+{
+ __builtin_prefetch (bar, 0, 2, 0); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
+ return bar (b) + 1;
+}
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mprefetchi -O2" } */
+/* { dg-final { scan-assembler-not "prefetchit0" } } */
+/* { dg-final { scan-assembler-not "prefetchit1" } } */
+
+#include <x86intrin.h>
+
+void* p;
+
+void extern
+prefetchi_test (void)
+{
+ __builtin_prefetch (p, 0, 3, 0); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
+ __builtin_prefetch (p, 0, 2, 0); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
+}
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mprefetchi" } */
#include <x86intrin.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mprefetchi" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -103,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,prefetchi")
#endif
/* Following intrinsics require immediate arguments. They
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,amx-fp16,prefetchi")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
@@ -847,6 +847,6 @@
#define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1)
#define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi")
#include <x86intrin.h>