new file mode 100644
@@ -0,0 +1,13 @@
+/* Test C2x (= Unicode) rules for characters in identifiers. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+
+¨
+
+/* The requirement for NFC only applies in identifiers, not pp-numbers. */
+
+À /* { dg-error "not in NFC" } */
+ÿÀ /* { dg-error "not in NFC" } */
+
+0À /* { dg-warning "not in NFC" } */
+.1À /* { dg-warning "not in NFC" } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* Test C2x (= Unicode) rules for characters in identifiers. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+
+\u00A8 /* { dg-error "is not valid in an identifier" } */
+
+/* The requirement for NFC only applies in identifiers, not pp-numbers. */
+
+A\u0300 /* { dg-error "not in NFC" } */
+\u00ffA\u0300 /* { dg-error "not in NFC" } */
+
+0A\u0300 /* { dg-warning "not in NFC" } */
+.1A\u0300 /* { dg-warning "not in NFC" } */
@@ -1291,7 +1291,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
valid_flags = C99 | CXX | C11 | CXX23;
if (CPP_PEDANTIC (pfile))
{
- if (CPP_OPTION (pfile, cplusplus))
+ if (CPP_OPTION (pfile, xid_identifiers))
valid_flags = CXX23;
else if (CPP_OPTION (pfile, c11_identifiers))
valid_flags = C11;
@@ -1355,7 +1355,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
return 2;
}
- if (CPP_OPTION (pfile, cplusplus))
+ if (CPP_OPTION (pfile, xid_identifiers))
invalid_start_flags = NXX23;
else if (CPP_OPTION (pfile, c11_identifiers))
invalid_start_flags = N11;
@@ -496,6 +496,10 @@ struct cpp_options
in C11. */
unsigned char c11_identifiers;
+ /* Nonzero means extended identifiers allow the characters specified
+ by Unicode XID_Start and XID_Continue properties. */
+ unsigned char xid_identifiers;
+
/* Nonzero for C++ 2014 Standard binary constants. */
unsigned char binary_constants;
@@ -82,6 +82,7 @@ struct lang_flags
char extended_numbers;
char extended_identifiers;
char c11_identifiers;
+ char xid_identifiers;
char std;
char digraphs;
char uliterals;
@@ -102,31 +103,31 @@ struct lang_flags
};
static const struct lang_flags lang_defaults[] =
-{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */
- /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
- /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
- /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
- /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
- /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
- /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
- /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
- /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
- /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
- /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1 },
- /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
- /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
- /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+{ /* c99 c++ xnum xid c11 xidid std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */
+ /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
+ /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
+ /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
+ /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
+ /* GNUC2X */ { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
+ /* STDC89 */ { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC94 */ { 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
+ /* GNUCXX */ { 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
+ /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
+ /* GNUCXX11 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
+ /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
+ /* GNUCXX14 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
+ /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
+ /* GNUCXX17 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
+ /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1 },
+ /* GNUCXX20 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
+ /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
+ /* GNUCXX23 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
+ /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
+ /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
/* Sets internal flags correctly for a given language. */
@@ -142,6 +143,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
CPP_OPTION (pfile, extended_numbers) = l->extended_numbers;
CPP_OPTION (pfile, extended_identifiers) = l->extended_identifiers;
CPP_OPTION (pfile, c11_identifiers) = l->c11_identifiers;
+ CPP_OPTION (pfile, xid_identifiers) = l->xid_identifiers;
CPP_OPTION (pfile, std) = l->std;
CPP_OPTION (pfile, digraphs) = l->digraphs;
CPP_OPTION (pfile, uliterals) = l->uliterals;
@@ -2007,7 +2007,8 @@ name_p (cpp_reader *pfile, const cpp_string *string)
static void
warn_about_normalization (cpp_reader *pfile,
const cpp_token *token,
- const struct normalize_state *s)
+ const struct normalize_state *s,
+ bool identifier)
{
if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
&& !pfile->state.skipping)
@@ -2043,7 +2044,7 @@ warn_about_normalization (cpp_reader *pfile,
if (NORMALIZE_STATE_RESULT (s) == normalized_C)
cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
"`%.*s' is not in NFKC", (int) sz, buf);
- else if (CPP_OPTION (pfile, cplusplus))
+ else if (identifier && CPP_OPTION (pfile, xid_identifiers))
cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
"`%.*s' is not in NFC", (int) sz, buf);
else
@@ -3839,7 +3840,7 @@ _cpp_lex_direct (cpp_reader *pfile)
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
result->type = CPP_NUMBER;
lex_number (pfile, &result->val.str, &nst);
- warn_about_normalization (pfile, result, &nst);
+ warn_about_normalization (pfile, result, &nst, false);
break;
}
@@ -3888,7 +3889,7 @@ _cpp_lex_direct (cpp_reader *pfile)
result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
&nst,
&result->val.node.spelling);
- warn_about_normalization (pfile, result, &nst);
+ warn_about_normalization (pfile, result, &nst, true);
}
/* Convert named operators to their proper types. */
@@ -4101,7 +4102,7 @@ _cpp_lex_direct (cpp_reader *pfile)
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
result->type = CPP_NUMBER;
lex_number (pfile, &result->val.str, &nst);
- warn_about_normalization (pfile, result, &nst);
+ warn_about_normalization (pfile, result, &nst, false);
}
else if (*buffer->cur == '.' && buffer->cur[1] == '.')
buffer->cur += 2, result->type = CPP_ELLIPSIS;
@@ -4192,7 +4193,7 @@ _cpp_lex_direct (cpp_reader *pfile)
result->type = CPP_NAME;
result->val.node.node = lex_identifier (pfile, base, true, &nst,
&result->val.node.spelling);
- warn_about_normalization (pfile, result, &nst);
+ warn_about_normalization (pfile, result, &nst, true);
break;
}