libcpp, v2: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
Commit Message
On Wed, Aug 17, 2022 at 10:22:03PM -0400, Jason Merrill wrote:
> OK, a comment mentioning this should be sufficient.
Here is an updated patch with those changes in.
So far successfully tested with
GXX_TESTSUITE_STDS=98,11,14,17,20,2b make -j32 -k check-gcc check-g++ RUNTESTFLAGS="dg.exp='Wbidi* cpp/*' cpp.exp"
ok if it passes full bootstrap/regtest tonight?
2022-08-18 Jakub Jelinek <jakub@redhat.com>
PR c++/106645
libcpp/
* include/cpplib.h (struct cpp_options): Implement
P2290R3 - Delimited escape sequences. Add delimite_escape_seqs
member.
* init.cc (struct lang_flags): Likewise.
(lang_defaults): Add delim column.
(cpp_set_lang): Copy over delimite_escape_seqs.
* charset.cc (extend_char_range): New function.
(_cpp_valid_ucn): Use it. Handle delimited escape sequences.
(convert_hex): Likewise.
(convert_oct): Likewise.
(convert_ucn): Use extend_char_range.
(convert_escape): Call convert_oct even for \o.
(_cpp_interpret_identifier): Handle delimited escape sequences.
* lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in.
(get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to
compute num_bytes.
gcc/testsuite/
* c-c++-common/cpp/delimited-escape-seq-1.c: New test.
* c-c++-common/cpp/delimited-escape-seq-2.c: New test.
* c-c++-common/cpp/delimited-escape-seq-3.c: New test.
* c-c++-common/Wbidi-chars-24.c: New test.
* gcc.dg/cpp/delimited-escape-seq-1.c: New test.
* gcc.dg/cpp/delimited-escape-seq-2.c: New test.
* g++.dg/cpp/delimited-escape-seq-1.C: New test.
* g++.dg/cpp/delimited-escape-seq-2.C: New test.
Jakub
Comments
On 8/18/22 01:17, Jakub Jelinek wrote:
> On Wed, Aug 17, 2022 at 10:22:03PM -0400, Jason Merrill wrote:
>> OK, a comment mentioning this should be sufficient.
>
> Here is an updated patch with those changes in.
> So far successfully tested with
> GXX_TESTSUITE_STDS=98,11,14,17,20,2b make -j32 -k check-gcc check-g++ RUNTESTFLAGS="dg.exp='Wbidi* cpp/*' cpp.exp"
> ok if it passes full bootstrap/regtest tonight?
OK.
> 2022-08-18 Jakub Jelinek <jakub@redhat.com>
>
> PR c++/106645
> libcpp/
> * include/cpplib.h (struct cpp_options): Implement
> P2290R3 - Delimited escape sequences. Add delimite_escape_seqs
> member.
> * init.cc (struct lang_flags): Likewise.
> (lang_defaults): Add delim column.
> (cpp_set_lang): Copy over delimite_escape_seqs.
> * charset.cc (extend_char_range): New function.
> (_cpp_valid_ucn): Use it. Handle delimited escape sequences.
> (convert_hex): Likewise.
> (convert_oct): Likewise.
> (convert_ucn): Use extend_char_range.
> (convert_escape): Call convert_oct even for \o.
> (_cpp_interpret_identifier): Handle delimited escape sequences.
> * lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in.
> (get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to
> compute num_bytes.
> gcc/testsuite/
> * c-c++-common/cpp/delimited-escape-seq-1.c: New test.
> * c-c++-common/cpp/delimited-escape-seq-2.c: New test.
> * c-c++-common/cpp/delimited-escape-seq-3.c: New test.
> * c-c++-common/Wbidi-chars-24.c: New test.
> * gcc.dg/cpp/delimited-escape-seq-1.c: New test.
> * gcc.dg/cpp/delimited-escape-seq-2.c: New test.
> * g++.dg/cpp/delimited-escape-seq-1.C: New test.
> * g++.dg/cpp/delimited-escape-seq-2.C: New test.
>
> --- libcpp/include/cpplib.h.jj 2022-08-10 09:06:53.268209449 +0200
> +++ libcpp/include/cpplib.h 2022-08-15 19:32:53.743213474 +0200
> @@ -519,6 +519,9 @@ struct cpp_options
> /* Nonzero for C++23 size_t literals. */
> unsigned char size_t_literals;
>
> + /* Nonzero for C++23 delimited escape sequences. */
> + unsigned char delimited_escape_seqs;
> +
> /* Holds the name of the target (execution) character set. */
> const char *narrow_charset;
>
> --- libcpp/init.cc.jj 2022-08-10 09:06:53.268209449 +0200
> +++ libcpp/init.cc 2022-08-15 16:09:01.403020485 +0200
> @@ -96,34 +96,35 @@ struct lang_flags
> char dfp_constants;
> char size_t_literals;
> char elifdef;
> + char delimited_escape_seqs;
> };
>
> static const struct lang_flags lang_defaults[] =
> -{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */
> - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
> - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
> - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
> - /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
> - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1 },
> - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
> - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
> - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
> - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
> - /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
> - /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
> - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
> - /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 },
> - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
> - /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0 },
> - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 },
> - /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 },
> - /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 },
> - /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0 },
> - /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 },
> - /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 },
> - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
> - /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
> - /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
> +{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef delim */
> + /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
> + /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
> + /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
> + /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
> + /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0 },
> + /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
> + /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
> + /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
> + /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
> + /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
> + /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0 },
> + /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
> + /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
> + /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
> + /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
> + /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0 },
> + /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0 },
> + /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
> + /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 },
> + /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
> + /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
> + /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
> + /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
> + /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
> };
>
> /* Sets internal flags correctly for a given language. */
> @@ -153,6 +154,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_
> CPP_OPTION (pfile, dfp_constants) = l->dfp_constants;
> CPP_OPTION (pfile, size_t_literals) = l->size_t_literals;
> CPP_OPTION (pfile, elifdef) = l->elifdef;
> + CPP_OPTION (pfile, delimited_escape_seqs) = l->delimited_escape_seqs;
> }
>
> /* Initialize library global state. */
> --- libcpp/charset.cc.jj 2022-08-15 12:52:43.213902801 +0200
> +++ libcpp/charset.cc 2022-08-18 10:01:22.569112418 +0200
> @@ -1036,6 +1036,19 @@ ucn_valid_in_identifier (cpp_reader *pfi
> return 1;
> }
>
> +/* Increment char_range->m_finish by a single character. */
> +
> +static void
> +extend_char_range (source_range *char_range,
> + cpp_string_location_reader *loc_reader)
> +{
> + if (loc_reader)
> + {
> + gcc_assert (char_range);
> + char_range->m_finish = loc_reader->get_next ().m_finish;
> + }
> +}
> +
> /* [lex.charset]: The character designated by the universal character
> name \UNNNNNNNN is that character whose character short name in
> ISO/IEC 10646 is NNNNNNNN; the character designated by the
> @@ -1081,6 +1094,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const
> unsigned int length;
> const uchar *str = *pstr;
> const uchar *base = str - 2;
> + bool delimited = false;
>
> if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
> cpp_error (pfile, CPP_DL_WARNING,
> @@ -1095,7 +1109,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const
> (int) str[-1]);
>
> if (str[-1] == 'u')
> - length = 4;
> + {
> + length = 4;
> + if (str < limit && *str == '{')
> + {
> + str++;
> + /* Magic value to indicate no digits seen. */
> + length = 32;
> + delimited = true;
> + extend_char_range (char_range, loc_reader);
> + }
> + }
> else if (str[-1] == 'U')
> length = 8;
> else
> @@ -1107,18 +1131,53 @@ _cpp_valid_ucn (cpp_reader *pfile, const
> result = 0;
> do
> {
> + if (str == limit)
> + break;
> c = *str;
> if (!ISXDIGIT (c))
> break;
> str++;
> - if (loc_reader)
> + extend_char_range (char_range, loc_reader);
> + if (delimited)
> {
> - gcc_assert (char_range);
> - char_range->m_finish = loc_reader->get_next ().m_finish;
> + if (!result)
> + /* Accept arbitrary number of leading zeros.
> + 16 is another magic value, smaller than 32 above
> + and bigger than 8, so that upon encountering first
> + non-zero digit we can count 8 digits and after that
> + or in overflow bit and ensure length doesn't decrease
> + to 0, as delimited escape sequence doesn't have upper
> + bound on the number of hex digits. */
> + length = 16;
> + else if (length == 16 - 8)
> + {
> + /* Make sure we detect overflows. */
> + result |= 0x8000000;
> + ++length;
> + }
> }
> +
> result = (result << 4) + hex_value (c);
> }
> - while (--length && str < limit);
> + while (--length);
> +
> + if (delimited
> + && str < limit
> + && *str == '}'
> + && (length != 32 || !identifier_pos))
> + {
> + if (length == 32)
> + cpp_error (pfile, CPP_DL_ERROR,
> + "empty delimited escape sequence");
> + else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> + && CPP_OPTION (pfile, cpp_pedantic))
> + cpp_error (pfile, CPP_DL_PEDWARN,
> + "delimited escape sequences are only valid in C++23");
> + str++;
> + length = 0;
> + delimited = false;
> + extend_char_range (char_range, loc_reader);
> + }
>
> /* Partial UCNs are not valid in strings, but decompose into
> multiple tokens in identifiers, so we can't give a helpful
> @@ -1132,9 +1191,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const
> *pstr = str;
> if (length)
> {
> - cpp_error (pfile, CPP_DL_ERROR,
> - "incomplete universal character name %.*s",
> - (int) (str - base), base);
> + if (!delimited)
> + cpp_error (pfile, CPP_DL_ERROR,
> + "incomplete universal character name %.*s",
> + (int) (str - base), base);
> + else
> + cpp_error (pfile, CPP_DL_ERROR,
> + "'\\u{' not terminated with '}' after %.*s",
> + (int) (str - base), base);
> result = 1;
> }
> /* The C99 standard permits $, @ and ` to be specified as UCNs. We use
> @@ -1212,9 +1276,8 @@ convert_ucn (cpp_reader *pfile, const uc
>
> from++; /* Skip u/U. */
>
> - if (loc_reader)
> - /* The u/U is part of the spelling of this character. */
> - char_range.m_finish = loc_reader->get_next ().m_finish;
> + /* The u/U is part of the spelling of this character. */
> + extend_char_range (&char_range, loc_reader);
>
> _cpp_valid_ucn (pfile, &from, limit, 0, &nst,
> &ucn, &char_range, loc_reader);
> @@ -1392,6 +1455,8 @@ convert_hex (cpp_reader *pfile, const uc
> int digits_found = 0;
> size_t width = cvt.width;
> size_t mask = width_to_mask (width);
> + bool delimited = false;
> + const uchar *base = from - 1;
>
> /* loc_reader and ranges must either be both NULL, or both be non-NULL. */
> gcc_assert ((loc_reader != NULL) == (ranges != NULL));
> @@ -1404,8 +1469,14 @@ convert_hex (cpp_reader *pfile, const uc
> from++;
>
> /* The 'x' is part of the spelling of this character. */
> - if (loc_reader)
> - char_range.m_finish = loc_reader->get_next ().m_finish;
> + extend_char_range (&char_range, loc_reader);
> +
> + if (from < limit && *from == '{')
> + {
> + delimited = true;
> + from++;
> + extend_char_range (&char_range, loc_reader);
> + }
>
> while (from < limit)
> {
> @@ -1413,19 +1484,42 @@ convert_hex (cpp_reader *pfile, const uc
> if (! hex_p (c))
> break;
> from++;
> - if (loc_reader)
> - char_range.m_finish = loc_reader->get_next ().m_finish;
> + extend_char_range (&char_range, loc_reader);
> overflow |= n ^ (n << 4 >> 4);
> n = (n << 4) + hex_value (c);
> digits_found = 1;
> }
>
> + if (delimited && from < limit && *from == '}')
> + {
> + from++;
> + if (!digits_found)
> + {
> + cpp_error (pfile, CPP_DL_ERROR,
> + "empty delimited escape sequence");
> + return from;
> + }
> + else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> + && CPP_OPTION (pfile, cpp_pedantic))
> + cpp_error (pfile, CPP_DL_PEDWARN,
> + "delimited escape sequences are only valid in C++23");
> + delimited = false;
> + extend_char_range (&char_range, loc_reader);
> + }
> +
> if (!digits_found)
> {
> cpp_error (pfile, CPP_DL_ERROR,
> "\\x used with no following hex digits");
> return from;
> }
> + else if (delimited)
> + {
> + cpp_error (pfile, CPP_DL_ERROR,
> + "'\\x{' not terminated with '}' after %.*s",
> + (int) (from - base), base);
> + return from;
> + }
>
> if (overflow | (n != (n & mask)))
> {
> @@ -1459,25 +1553,71 @@ convert_oct (cpp_reader *pfile, const uc
> cpp_substring_ranges *ranges)
> {
> size_t count = 0;
> - cppchar_t c, n = 0;
> + cppchar_t c, n = 0, overflow = 0;
> size_t width = cvt.width;
> size_t mask = width_to_mask (width);
> + bool delimited = false;
> + const uchar *base = from - 1;
>
> /* loc_reader and ranges must either be both NULL, or both be non-NULL. */
> gcc_assert ((loc_reader != NULL) == (ranges != NULL));
>
> + if (from < limit && *from == 'o')
> + {
> + from++;
> + extend_char_range (&char_range, loc_reader);
> + if (from == limit || *from != '{')
> + cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
> + else
> + {
> + from++;
> + extend_char_range (&char_range, loc_reader);
> + delimited = true;
> + }
> + }
> +
> while (from < limit && count++ < 3)
> {
> c = *from;
> if (c < '0' || c > '7')
> break;
> from++;
> - if (loc_reader)
> - char_range.m_finish = loc_reader->get_next ().m_finish;
> + extend_char_range (&char_range, loc_reader);
> + if (delimited)
> + {
> + count = 2;
> + overflow |= n ^ (n << 3 >> 3);
> + }
> n = (n << 3) + c - '0';
> }
>
> - if (n != (n & mask))
> + if (delimited)
> + {
> + if (from < limit && *from == '}')
> + {
> + from++;
> + if (count == 1)
> + {
> + cpp_error (pfile, CPP_DL_ERROR,
> + "empty delimited escape sequence");
> + return from;
> + }
> + else if (!CPP_OPTION (pfile, delimited_escape_seqs)
> + && CPP_OPTION (pfile, cpp_pedantic))
> + cpp_error (pfile, CPP_DL_PEDWARN,
> + "delimited escape sequences are only valid in C++23");
> + extend_char_range (&char_range, loc_reader);
> + }
> + else
> + {
> + cpp_error (pfile, CPP_DL_ERROR,
> + "'\\o{' not terminated with '}' after %.*s",
> + (int) (from - base), base);
> + return from;
> + }
> + }
> +
> + if (overflow | (n != (n & mask)))
> {
> cpp_error (pfile, CPP_DL_PEDWARN,
> "octal escape sequence out of range");
> @@ -1535,6 +1675,7 @@ convert_escape (cpp_reader *pfile, const
>
> case '0': case '1': case '2': case '3':
> case '4': case '5': case '6': case '7':
> + case 'o':
> return convert_oct (pfile, from, limit, tbuf, cvt,
> char_range, loc_reader, ranges);
>
> @@ -2119,15 +2260,27 @@ _cpp_interpret_identifier (cpp_reader *p
> cppchar_t value = 0;
> size_t bufleft = len - (bufp - buf);
> int rval;
> + bool delimited = false;
>
> idp += 2;
> + if (length == 4 && id[idp] == '{')
> + {
> + delimited = true;
> + idp++;
> + }
> while (length && idp < len && ISXDIGIT (id[idp]))
> {
> value = (value << 4) + hex_value (id[idp]);
> idp++;
> - length--;
> + if (!delimited)
> + length--;
> }
> - idp--;
> + if (!delimited)
> + idp--;
> + /* else
> + assert (id[idp] == '}');
> + As the caller ensures it is a valid identifier, if it is
> + delimited escape sequence, it must be terminated by }. */
>
> /* Special case for EBCDIC: if the identifier contains
> a '$' specified using a UCN, translate it to EBCDIC. */
> --- libcpp/lex.cc.jj 2022-05-23 10:59:06.235591348 +0200
> +++ libcpp/lex.cc 2022-08-16 11:57:53.772823661 +0200
> @@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const
> /* Parse a UCN where P points just past \u or \U and return its bidi code. */
>
> static bidi::kind
> -get_bidi_ucn_1 (const unsigned char *p, bool is_U)
> +get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
> {
> /* 6.4.3 Universal Character Names
> \u hex-quad
> \U hex-quad hex-quad
> + \u { simple-hexadecimal-digit-sequence }
> where \unnnn means \U0000nnnn. */
>
> + *end = p + 4;
> if (is_U)
> {
> if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
> return bidi::kind::NONE;
> /* Skip 4B so we can treat \u and \U the same below. */
> p += 4;
> + *end += 4;
> + }
> + else if (p[0] == '{')
> + {
> + p++;
> + while (*p == '0')
> + p++;
> + if (p[0] != '2'
> + || p[1] != '0'
> + || !ISXDIGIT (p[2])
> + || !ISXDIGIT (p[3])
> + || p[4] != '}')
> + return bidi::kind::NONE;
> + *end = p + 5;
> }
>
> /* All code points we are looking for start with 20xx. */
> @@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p,
> If the kind is not NONE, write the location to *OUT.*/
>
> static bidi::kind
> -get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
> +get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
> location_t *out)
> {
> - bidi::kind result = get_bidi_ucn_1 (p, is_U);
> + const unsigned char *end;
> + bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
> if (result != bidi::kind::NONE)
> {
> const unsigned char *start = p - 2;
> - size_t num_bytes = 2 + (is_U ? 8 : 4);
> + size_t num_bytes = end - start;
> *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
> }
> return result;
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c.jj 2022-08-16 10:47:38.693022740 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c 2022-08-16 12:18:42.235477632 +0200
> @@ -0,0 +1,92 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do run } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +#ifndef __cplusplus
> +#include <wchar.h>
> +typedef __CHAR16_TYPE__ char16_t;
> +typedef __CHAR32_TYPE__ char32_t;
> +#endif
> +
> +const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}";
> +const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}";
> +const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}";
> +const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
> +const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}";
> +const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}";
> +const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
> +const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}";
> +const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}";
> +#ifdef __cplusplus
> +const char *j = "\u{34}\u{000000000000000003D}";
> +#endif
> +const char *k = "\x{34}\x{000000000000000003D}";
> +const char *l = "\o{34}\o{000000000000000176}";
> +
> +#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \
> + || U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \
> + || U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \
> + || u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \
> + || u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \
> + || u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \
> + || L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \
> + || L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \
> + || L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \
> + || '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \
> + || '\o{34}' != '\x1C' || '\o{176}' != '\x007E'
> +#error Bad
> +#endif
> +#ifdef __cplusplus
> +#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D'
> +#error Bad
> +#endif
> +#endif
> +
> +int
> +main ()
> +{
> + if (a[0] != U'\u1234' || a[0] != U'\u{1234}'
> + || a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}'
> + || a[2] != a[0]
> + || a[3] != a[1]
> + || b[0] != U'\x1234' || b[0] != U'\x{001234}'
> + || b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}'
> + || b[2] != b[0]
> + || c[0] != U'\x29c' || c[0] != U'\o{001234}'
> + || c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}'
> + || c[2] != c[1])
> + __builtin_abort ();
> + if (d[0] != u'\u1234' || d[0] != u'\u{1234}'
> + || d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}'
> + || d[2] != d[0]
> + || e[0] != u'\x1234' || e[0] != u'\x{001234}'
> + || e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}'
> + || e[2] != e[0]
> + || f[0] != u'\x29c' || f[0] != u'\o{001234}'
> + || f[1] != u'\xbFFD' || f[1] != u'\o{137775}'
> + || f[2] != f[1])
> + __builtin_abort ();
> + if (g[0] != L'\u1234' || g[0] != L'\u{1234}'
> + || g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}'
> + || g[2] != g[0]
> + || h[0] != L'\x1234' || h[0] != L'\x{001234}'
> + || h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}'
> + || h[2] != h[0]
> + || i[0] != L'\x29c' || i[0] != L'\o{001234}'
> + || i[1] != L'\xbFFD' || i[1] != L'\o{137775}'
> + || i[2] != i[1])
> + __builtin_abort ();
> +#ifdef __cplusplus
> + if (j[0] != '\u0034' || j[0] != '\u{034}'
> + || j[1] != '\U0000003D' || j[1] != '\u{000003d}')
> + __builtin_abort ();
> +#endif
> + if (k[0] != '\x034' || k[0] != '\x{0034}'
> + || k[1] != '\x3D' || k[1] != '\x{3d}'
> + || l[0] != '\x1c' || l[0] != '\o{0034}'
> + || l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176')
> + __builtin_abort ();
> + return 0;
> +}
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c.jj 2022-08-16 10:47:41.846981390 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c 2022-08-16 12:18:58.807260607 +0200
> @@ -0,0 +1,18 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +int jalape\u{f1}o = 42;
> +
> +int
> +caf\u{000e9} (void)
> +{
> + return jalape\u00F1o;
> +}
> +
> +int
> +test (void)
> +{
> + return caf\u00e9 ();
> +}
> --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c.jj 2022-08-16 12:18:19.308777922 +0200
> +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c 2022-08-16 12:41:23.693648138 +0200
> @@ -0,0 +1,33 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
> +/* { dg-options "-std=c++23" { target c++ } } */
> +
> +#ifndef __cplusplus
> +typedef __CHAR32_TYPE__ char32_t;
> +#endif
> +
> +const char32_t *a = U"\u{}"; /* { dg-error "empty delimited escape sequence" } */
> + /* { dg-error "is not a valid universal character" "" { target c } .-1 } */
> +const char32_t *b = U"\u{12" "34}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *c = U"\u{0000ffffffff}"; /* { dg-error "is not a valid universal character" } */
> +const char32_t *d = U"\u{010000edcb}"; /* { dg-error "is not a valid universal character" } */
> +const char32_t *e = U"\u{02000000000000000000edcb}"; /* { dg-error "is not a valid universal character" } */
> +const char32_t *f = U"\u{123ghij}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *g = U"\u{123.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *h = U"\u{.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
> +const char32_t *i = U"\x{}"; /* { dg-error "empty delimited escape sequence" } */
> +const char32_t *j = U"\x{12" "34}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *k = U"\x{0000ffffffff}";
> +const char32_t *l = U"\x{010000edcb}"; /* { dg-warning "hex escape sequence out of range" } */
> +const char32_t *m = U"\x{02000000000000000000edcb}"; /* { dg-warning "hex escape sequence out of range" } */
> +const char32_t *n = U"\x{123ghij}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *o = U"\x{123.}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
> +const char32_t *p = U"\o{}"; /* { dg-error "empty delimited escape sequence" } */
> +const char32_t *q = U"\o{12" "34}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> +const char32_t *r = U"\o{0000037777777777}";
> +const char32_t *s = U"\o{040000166713}"; /* { dg-warning "octal escape sequence out of range" } */
> +const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */
> +const char32_t *u = U"\o{1238}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> +const char32_t *v = U"\o{.}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
> --- gcc/testsuite/c-c++-common/Wbidi-chars-24.c.jj 2022-08-16 12:03:19.350561676 +0200
> +++ gcc/testsuite/c-c++-common/Wbidi-chars-24.c 2022-08-16 12:06:46.381851525 +0200
> @@ -0,0 +1,28 @@
> +/* PR preprocessor/103026 */
> +/* { dg-do compile } */
> +/* { dg-options "-Wbidi-chars=ucn,unpaired" } */
> +/* Test nesting of bidi chars in various contexts. */
> +
> +void
> +g1 ()
> +{
> + const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> + const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> + const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> + const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> + const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> + const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> + const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z";
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +}
> +
> +int A\u{202a}B\u{2069}C;
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> +int a\u{00000202b}B\u{000000002069}c;
> +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
> --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c.jj 2022-08-16 10:47:38.693022740 +0200
> +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c 2022-08-16 12:46:56.508291006 +0200
> @@ -0,0 +1,10 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */
> +
> +typedef __CHAR32_TYPE__ char32_t;
> +
> +const char32_t *a = U"\u{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
> +const char32_t *b = U"\x{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
> +const char32_t *c = U"\o{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
> --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c.jj 2022-08-16 10:47:41.846981390 +0200
> +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c 2022-08-16 12:47:05.955167423 +0200
> @@ -0,0 +1,10 @@
> +/* P2290R3 - Delimited escape sequences */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target wchar } */
> +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */
> +
> +typedef __CHAR32_TYPE__ char32_t;
> +
> +const char32_t *a = U"\u{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
> +const char32_t *b = U"\x{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
> +const char32_t *c = U"\o{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
> --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C.jj 2022-08-16 12:46:43.368462901 +0200
> +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C 2022-08-16 12:49:21.532393786 +0200
> @@ -0,0 +1,8 @@
> +// P2290R3 - Delimited escape sequences
> +// { dg-do compile { target c++11 } }
> +// { dg-require-effective-target wchar }
> +// { dg-options "-pedantic" }
> +
> +const char32_t *a = U"\u{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *b = U"\x{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *c = U"\o{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
> --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C.jj 2022-08-16 12:46:46.281424798 +0200
> +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C 2022-08-16 12:49:33.761233803 +0200
> @@ -0,0 +1,8 @@
> +// P2290R3 - Delimited escape sequences
> +// { dg-do compile { target c++11 } }
> +// { dg-require-effective-target wchar }
> +// { dg-options "-pedantic-errors" }
> +
> +const char32_t *a = U"\u{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *b = U"\x{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
> +const char32_t *c = U"\o{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
>
>
> Jakub
>
@@ -519,6 +519,9 @@ struct cpp_options
/* Nonzero for C++23 size_t literals. */
unsigned char size_t_literals;
+ /* Nonzero for C++23 delimited escape sequences. */
+ unsigned char delimited_escape_seqs;
+
/* Holds the name of the target (execution) character set. */
const char *narrow_charset;
@@ -96,34 +96,35 @@ struct lang_flags
char dfp_constants;
char size_t_literals;
char elifdef;
+ char delimited_escape_seqs;
};
static const struct lang_flags lang_defaults[] =
-{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */
- /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
- /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
- /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
- /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
- /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1 },
- /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
- /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
- /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 },
- /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
- /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0 },
- /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 },
- /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 },
- /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 },
- /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0 },
- /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 },
- /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 },
- /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
- /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 },
- /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef delim */
+ /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
+ /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
+ /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
+ /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
+ /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0 },
+ /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+ /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0 },
+ /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
+ /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
+ /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
+ /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
+ /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0 },
+ /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0 },
+ /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
+ /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 },
+ /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
+ /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
+ /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
+ /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
+ /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
/* Sets internal flags correctly for a given language. */
@@ -153,6 +154,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_
CPP_OPTION (pfile, dfp_constants) = l->dfp_constants;
CPP_OPTION (pfile, size_t_literals) = l->size_t_literals;
CPP_OPTION (pfile, elifdef) = l->elifdef;
+ CPP_OPTION (pfile, delimited_escape_seqs) = l->delimited_escape_seqs;
}
/* Initialize library global state. */
@@ -1036,6 +1036,19 @@ ucn_valid_in_identifier (cpp_reader *pfi
return 1;
}
+/* Increment char_range->m_finish by a single character. */
+
+static void
+extend_char_range (source_range *char_range,
+ cpp_string_location_reader *loc_reader)
+{
+ if (loc_reader)
+ {
+ gcc_assert (char_range);
+ char_range->m_finish = loc_reader->get_next ().m_finish;
+ }
+}
+
/* [lex.charset]: The character designated by the universal character
name \UNNNNNNNN is that character whose character short name in
ISO/IEC 10646 is NNNNNNNN; the character designated by the
@@ -1081,6 +1094,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const
unsigned int length;
const uchar *str = *pstr;
const uchar *base = str - 2;
+ bool delimited = false;
if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
cpp_error (pfile, CPP_DL_WARNING,
@@ -1095,7 +1109,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const
(int) str[-1]);
if (str[-1] == 'u')
- length = 4;
+ {
+ length = 4;
+ if (str < limit && *str == '{')
+ {
+ str++;
+ /* Magic value to indicate no digits seen. */
+ length = 32;
+ delimited = true;
+ extend_char_range (char_range, loc_reader);
+ }
+ }
else if (str[-1] == 'U')
length = 8;
else
@@ -1107,18 +1131,53 @@ _cpp_valid_ucn (cpp_reader *pfile, const
result = 0;
do
{
+ if (str == limit)
+ break;
c = *str;
if (!ISXDIGIT (c))
break;
str++;
- if (loc_reader)
+ extend_char_range (char_range, loc_reader);
+ if (delimited)
{
- gcc_assert (char_range);
- char_range->m_finish = loc_reader->get_next ().m_finish;
+ if (!result)
+ /* Accept arbitrary number of leading zeros.
+ 16 is another magic value, smaller than 32 above
+ and bigger than 8, so that upon encountering first
+ non-zero digit we can count 8 digits and after that
+ or in overflow bit and ensure length doesn't decrease
+ to 0, as delimited escape sequence doesn't have upper
+ bound on the number of hex digits. */
+ length = 16;
+ else if (length == 16 - 8)
+ {
+ /* Make sure we detect overflows. */
+ result |= 0x8000000;
+ ++length;
+ }
}
+
result = (result << 4) + hex_value (c);
}
- while (--length && str < limit);
+ while (--length);
+
+ if (delimited
+ && str < limit
+ && *str == '}'
+ && (length != 32 || !identifier_pos))
+ {
+ if (length == 32)
+ cpp_error (pfile, CPP_DL_ERROR,
+ "empty delimited escape sequence");
+ else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+ && CPP_OPTION (pfile, cpp_pedantic))
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "delimited escape sequences are only valid in C++23");
+ str++;
+ length = 0;
+ delimited = false;
+ extend_char_range (char_range, loc_reader);
+ }
/* Partial UCNs are not valid in strings, but decompose into
multiple tokens in identifiers, so we can't give a helpful
@@ -1132,9 +1191,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const
*pstr = str;
if (length)
{
- cpp_error (pfile, CPP_DL_ERROR,
- "incomplete universal character name %.*s",
- (int) (str - base), base);
+ if (!delimited)
+ cpp_error (pfile, CPP_DL_ERROR,
+ "incomplete universal character name %.*s",
+ (int) (str - base), base);
+ else
+ cpp_error (pfile, CPP_DL_ERROR,
+ "'\\u{' not terminated with '}' after %.*s",
+ (int) (str - base), base);
result = 1;
}
/* The C99 standard permits $, @ and ` to be specified as UCNs. We use
@@ -1212,9 +1276,8 @@ convert_ucn (cpp_reader *pfile, const uc
from++; /* Skip u/U. */
- if (loc_reader)
- /* The u/U is part of the spelling of this character. */
- char_range.m_finish = loc_reader->get_next ().m_finish;
+ /* The u/U is part of the spelling of this character. */
+ extend_char_range (&char_range, loc_reader);
_cpp_valid_ucn (pfile, &from, limit, 0, &nst,
&ucn, &char_range, loc_reader);
@@ -1392,6 +1455,8 @@ convert_hex (cpp_reader *pfile, const uc
int digits_found = 0;
size_t width = cvt.width;
size_t mask = width_to_mask (width);
+ bool delimited = false;
+ const uchar *base = from - 1;
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
@@ -1404,8 +1469,14 @@ convert_hex (cpp_reader *pfile, const uc
from++;
/* The 'x' is part of the spelling of this character. */
- if (loc_reader)
- char_range.m_finish = loc_reader->get_next ().m_finish;
+ extend_char_range (&char_range, loc_reader);
+
+ if (from < limit && *from == '{')
+ {
+ delimited = true;
+ from++;
+ extend_char_range (&char_range, loc_reader);
+ }
while (from < limit)
{
@@ -1413,19 +1484,42 @@ convert_hex (cpp_reader *pfile, const uc
if (! hex_p (c))
break;
from++;
- if (loc_reader)
- char_range.m_finish = loc_reader->get_next ().m_finish;
+ extend_char_range (&char_range, loc_reader);
overflow |= n ^ (n << 4 >> 4);
n = (n << 4) + hex_value (c);
digits_found = 1;
}
+ if (delimited && from < limit && *from == '}')
+ {
+ from++;
+ if (!digits_found)
+ {
+ cpp_error (pfile, CPP_DL_ERROR,
+ "empty delimited escape sequence");
+ return from;
+ }
+ else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+ && CPP_OPTION (pfile, cpp_pedantic))
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "delimited escape sequences are only valid in C++23");
+ delimited = false;
+ extend_char_range (&char_range, loc_reader);
+ }
+
if (!digits_found)
{
cpp_error (pfile, CPP_DL_ERROR,
"\\x used with no following hex digits");
return from;
}
+ else if (delimited)
+ {
+ cpp_error (pfile, CPP_DL_ERROR,
+ "'\\x{' not terminated with '}' after %.*s",
+ (int) (from - base), base);
+ return from;
+ }
if (overflow | (n != (n & mask)))
{
@@ -1459,25 +1553,71 @@ convert_oct (cpp_reader *pfile, const uc
cpp_substring_ranges *ranges)
{
size_t count = 0;
- cppchar_t c, n = 0;
+ cppchar_t c, n = 0, overflow = 0;
size_t width = cvt.width;
size_t mask = width_to_mask (width);
+ bool delimited = false;
+ const uchar *base = from - 1;
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
+ if (from < limit && *from == 'o')
+ {
+ from++;
+ extend_char_range (&char_range, loc_reader);
+ if (from == limit || *from != '{')
+ cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
+ else
+ {
+ from++;
+ extend_char_range (&char_range, loc_reader);
+ delimited = true;
+ }
+ }
+
while (from < limit && count++ < 3)
{
c = *from;
if (c < '0' || c > '7')
break;
from++;
- if (loc_reader)
- char_range.m_finish = loc_reader->get_next ().m_finish;
+ extend_char_range (&char_range, loc_reader);
+ if (delimited)
+ {
+ count = 2;
+ overflow |= n ^ (n << 3 >> 3);
+ }
n = (n << 3) + c - '0';
}
- if (n != (n & mask))
+ if (delimited)
+ {
+ if (from < limit && *from == '}')
+ {
+ from++;
+ if (count == 1)
+ {
+ cpp_error (pfile, CPP_DL_ERROR,
+ "empty delimited escape sequence");
+ return from;
+ }
+ else if (!CPP_OPTION (pfile, delimited_escape_seqs)
+ && CPP_OPTION (pfile, cpp_pedantic))
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "delimited escape sequences are only valid in C++23");
+ extend_char_range (&char_range, loc_reader);
+ }
+ else
+ {
+ cpp_error (pfile, CPP_DL_ERROR,
+ "'\\o{' not terminated with '}' after %.*s",
+ (int) (from - base), base);
+ return from;
+ }
+ }
+
+ if (overflow | (n != (n & mask)))
{
cpp_error (pfile, CPP_DL_PEDWARN,
"octal escape sequence out of range");
@@ -1535,6 +1675,7 @@ convert_escape (cpp_reader *pfile, const
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
+ case 'o':
return convert_oct (pfile, from, limit, tbuf, cvt,
char_range, loc_reader, ranges);
@@ -2119,15 +2260,27 @@ _cpp_interpret_identifier (cpp_reader *p
cppchar_t value = 0;
size_t bufleft = len - (bufp - buf);
int rval;
+ bool delimited = false;
idp += 2;
+ if (length == 4 && id[idp] == '{')
+ {
+ delimited = true;
+ idp++;
+ }
while (length && idp < len && ISXDIGIT (id[idp]))
{
value = (value << 4) + hex_value (id[idp]);
idp++;
- length--;
+ if (!delimited)
+ length--;
}
- idp--;
+ if (!delimited)
+ idp--;
+ /* else
+ assert (id[idp] == '}');
+ As the caller ensures it is a valid identifier, if it is
+ delimited escape sequence, it must be terminated by }. */
/* Special case for EBCDIC: if the identifier contains
a '$' specified using a UCN, translate it to EBCDIC. */
@@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const
/* Parse a UCN where P points just past \u or \U and return its bidi code. */
static bidi::kind
-get_bidi_ucn_1 (const unsigned char *p, bool is_U)
+get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
{
/* 6.4.3 Universal Character Names
\u hex-quad
\U hex-quad hex-quad
+ \u { simple-hexadecimal-digit-sequence }
where \unnnn means \U0000nnnn. */
+ *end = p + 4;
if (is_U)
{
if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
return bidi::kind::NONE;
/* Skip 4B so we can treat \u and \U the same below. */
p += 4;
+ *end += 4;
+ }
+ else if (p[0] == '{')
+ {
+ p++;
+ while (*p == '0')
+ p++;
+ if (p[0] != '2'
+ || p[1] != '0'
+ || !ISXDIGIT (p[2])
+ || !ISXDIGIT (p[3])
+ || p[4] != '}')
+ return bidi::kind::NONE;
+ *end = p + 5;
}
/* All code points we are looking for start with 20xx. */
@@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p,
If the kind is not NONE, write the location to *OUT.*/
static bidi::kind
-get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
+get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
location_t *out)
{
- bidi::kind result = get_bidi_ucn_1 (p, is_U);
+ const unsigned char *end;
+ bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
if (result != bidi::kind::NONE)
{
const unsigned char *start = p - 2;
- size_t num_bytes = 2 + (is_U ? 8 : 4);
+ size_t num_bytes = end - start;
*out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
}
return result;
@@ -0,0 +1,92 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do run } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+#ifndef __cplusplus
+#include <wchar.h>
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}";
+const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}";
+const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}";
+const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
+const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}";
+const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}";
+const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
+const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}";
+const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}";
+#ifdef __cplusplus
+const char *j = "\u{34}\u{000000000000000003D}";
+#endif
+const char *k = "\x{34}\x{000000000000000003D}";
+const char *l = "\o{34}\o{000000000000000176}";
+
+#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \
+ || U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \
+ || U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \
+ || u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \
+ || u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \
+ || u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \
+ || L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \
+ || L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \
+ || L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \
+ || '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \
+ || '\o{34}' != '\x1C' || '\o{176}' != '\x007E'
+#error Bad
+#endif
+#ifdef __cplusplus
+#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D'
+#error Bad
+#endif
+#endif
+
+int
+main ()
+{
+ if (a[0] != U'\u1234' || a[0] != U'\u{1234}'
+ || a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}'
+ || a[2] != a[0]
+ || a[3] != a[1]
+ || b[0] != U'\x1234' || b[0] != U'\x{001234}'
+ || b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}'
+ || b[2] != b[0]
+ || c[0] != U'\x29c' || c[0] != U'\o{001234}'
+ || c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}'
+ || c[2] != c[1])
+ __builtin_abort ();
+ if (d[0] != u'\u1234' || d[0] != u'\u{1234}'
+ || d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}'
+ || d[2] != d[0]
+ || e[0] != u'\x1234' || e[0] != u'\x{001234}'
+ || e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}'
+ || e[2] != e[0]
+ || f[0] != u'\x29c' || f[0] != u'\o{001234}'
+ || f[1] != u'\xbFFD' || f[1] != u'\o{137775}'
+ || f[2] != f[1])
+ __builtin_abort ();
+ if (g[0] != L'\u1234' || g[0] != L'\u{1234}'
+ || g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}'
+ || g[2] != g[0]
+ || h[0] != L'\x1234' || h[0] != L'\x{001234}'
+ || h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}'
+ || h[2] != h[0]
+ || i[0] != L'\x29c' || i[0] != L'\o{001234}'
+ || i[1] != L'\xbFFD' || i[1] != L'\o{137775}'
+ || i[2] != i[1])
+ __builtin_abort ();
+#ifdef __cplusplus
+ if (j[0] != '\u0034' || j[0] != '\u{034}'
+ || j[1] != '\U0000003D' || j[1] != '\u{000003d}')
+ __builtin_abort ();
+#endif
+ if (k[0] != '\x034' || k[0] != '\x{0034}'
+ || k[1] != '\x3D' || k[1] != '\x{3d}'
+ || l[0] != '\x1c' || l[0] != '\o{0034}'
+ || l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176')
+ __builtin_abort ();
+ return 0;
+}
@@ -0,0 +1,18 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+int jalape\u{f1}o = 42;
+
+int
+caf\u{000e9} (void)
+{
+ return jalape\u00F1o;
+}
+
+int
+test (void)
+{
+ return caf\u00e9 ();
+}
@@ -0,0 +1,33 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
+/* { dg-options "-std=c++23" { target c++ } } */
+
+#ifndef __cplusplus
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+const char32_t *a = U"\u{}"; /* { dg-error "empty delimited escape sequence" } */
+ /* { dg-error "is not a valid universal character" "" { target c } .-1 } */
+const char32_t *b = U"\u{12" "34}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *c = U"\u{0000ffffffff}"; /* { dg-error "is not a valid universal character" } */
+const char32_t *d = U"\u{010000edcb}"; /* { dg-error "is not a valid universal character" } */
+const char32_t *e = U"\u{02000000000000000000edcb}"; /* { dg-error "is not a valid universal character" } */
+const char32_t *f = U"\u{123ghij}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *g = U"\u{123.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *h = U"\u{.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
+const char32_t *i = U"\x{}"; /* { dg-error "empty delimited escape sequence" } */
+const char32_t *j = U"\x{12" "34}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *k = U"\x{0000ffffffff}";
+const char32_t *l = U"\x{010000edcb}"; /* { dg-warning "hex escape sequence out of range" } */
+const char32_t *m = U"\x{02000000000000000000edcb}"; /* { dg-warning "hex escape sequence out of range" } */
+const char32_t *n = U"\x{123ghij}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *o = U"\x{123.}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
+const char32_t *p = U"\o{}"; /* { dg-error "empty delimited escape sequence" } */
+const char32_t *q = U"\o{12" "34}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
+const char32_t *r = U"\o{0000037777777777}";
+const char32_t *s = U"\o{040000166713}"; /* { dg-warning "octal escape sequence out of range" } */
+const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */
+const char32_t *u = U"\o{1238}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
+const char32_t *v = U"\o{.}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
@@ -0,0 +1,28 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=ucn,unpaired" } */
+/* Test nesting of bidi chars in various contexts. */
+
+void
+g1 ()
+{
+ const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+}
+
+int A\u{202a}B\u{2069}C;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u{00000202b}B\u{000000002069}c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
@@ -0,0 +1,10 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */
+
+typedef __CHAR32_TYPE__ char32_t;
+
+const char32_t *a = U"\u{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
+const char32_t *b = U"\x{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
+const char32_t *c = U"\o{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
@@ -0,0 +1,10 @@
+/* P2290R3 - Delimited escape sequences */
+/* { dg-do compile } */
+/* { dg-require-effective-target wchar } */
+/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */
+
+typedef __CHAR32_TYPE__ char32_t;
+
+const char32_t *a = U"\u{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
+const char32_t *b = U"\x{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
+const char32_t *c = U"\o{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
@@ -0,0 +1,8 @@
+// P2290R3 - Delimited escape sequences
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target wchar }
+// { dg-options "-pedantic" }
+
+const char32_t *a = U"\u{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *b = U"\x{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *c = U"\o{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
@@ -0,0 +1,8 @@
+// P2290R3 - Delimited escape sequences
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target wchar }
+// { dg-options "-pedantic-errors" }
+
+const char32_t *a = U"\u{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *b = U"\x{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
+const char32_t *c = U"\o{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }