@@ -30,7 +30,7 @@ localedata/unicode-gen/unicode_utils.py
localedata/unicode-gen/utf8_gen.py
And the most recent versions added to GCC are from glibc git commit:
-4c721f24fc190d1dc935eb0bab283de7cf13182e
+71de3aead9fffe89556e80ebc94aa918d8ee7bca
The script gen_wcwidth.py found here contains the GCC-specific code to
map glibc's output to the lookup tables we require. This script should not need
@@ -40,14 +40,14 @@ produce ucnid.h.
The procedure to update GCC's Unicode support is the following:
-1. Update the five Unicode data files from the above URLs.
+1. Update the six Unicode data files from the above URLs.
2. Update the two glibc files in from_glibc/ from glibc's git. Update
the commit number above in this README.
3. Run ./gen_wcwidth.py X.Y > ../../libcpp/generated_cpp_wcwidth.h
(where X.Y is the version of the Unicode standard corresponding to the
- Unicode data files being used, most recently, 15.0.0).
+ Unicode data files being used, most recently, 15.1.0).
4. Update Unicode Copyright years in libcpp/makeucnid.cc and in
libcpp/makeuname2c.cc up to the year in which the Unicode
@@ -350,7 +350,7 @@ if __name__ == "__main__":
# the EastAsianWidth.txt file.
if re.match(r'.*<reserved-.+>\.\.<reserved-.+>.*', LINE):
continue
- if re.match(r'^[^;]*;[WF]', LINE):
+ if re.match(r'^[^;]*;\s*[WF]\s*', LINE):
EAST_ASIAN_WIDTH_LINES.append(LINE.strip())
with open(ARGS.prop_list_file, mode='r') as PROP_LIST_FILE:
PROP_LIST_LINES = []
@@ -11231,6 +11231,10 @@
2FF9;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT;So;0;ON;;;;;N;;;;;
2FFA;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT;So;0;ON;;;;;N;;;;;
2FFB;IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID;So;0;ON;;;;;N;;;;;
+2FFC;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT;So;0;ON;;;;;N;;;;;
+2FFD;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT;So;0;ON;;;;;N;;;;;
+2FFE;IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION;So;0;ON;;;;;N;;;;;
+2FFF;IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION;So;0;ON;;;;;N;;;;;
3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;;
3001;IDEOGRAPHIC COMMA;Po;0;ON;;;;;N;;;;;
3002;IDEOGRAPHIC FULL STOP;Po;0;ON;;;;;N;IDEOGRAPHIC PERIOD;;;;
@@ -11705,6 +11709,7 @@
31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;;
31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;;
31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;;
+31EF;IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION;So;0;ON;;;;;N;;;;;
31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;;
31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;;
31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;;
@@ -34035,6 +34040,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N
2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;;
2CEB0;<CJK Ideograph Extension F, First>;Lo;0;L;;;;;N;;;;;
2EBE0;<CJK Ideograph Extension F, Last>;Lo;0;L;;;;;N;;;;;
+2EBF0;<CJK Ideograph Extension I, First>;Lo;0;L;;;;;N;;;;;
+2EE5D;<CJK Ideograph Extension I, Last>;Lo;0;L;;;;;N;;;;;
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;;
2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;;
@@ -1,11 +1,11 @@
-# EastAsianWidth-15.0.0.txt
-# Date: 2022-05-24, 17:40:20 GMT [KW, LI]
-# © 2022 Unicode®, Inc.
+# EastAsianWidth-15.1.0.txt
+# Date: 2023-07-28, 23:34:08 GMT
+# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
-# For documentation, see https://www.unicode.org/reports/tr44/
+# For documentation, see https://www.unicode.org/reports/tr44/
#
# East_Asian_Width Property
#
...
@@ -1,6 +1,6 @@
-# DerivedNormalizationProps-15.0.0.txt
-# Date: 2022-04-02, 01:29:03 GMT
-# © 2022 Unicode®, Inc.
+# DerivedNormalizationProps-15.1.0.txt
+# Date: 2023-05-02, 13:20:58 GMT
+# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
...
@@ -1,6 +1,6 @@
-# NameAliases-15.0.0.txt
-# Date: 2022-07-26, 20:13:00 GMT [KW]
-# © 2022 Unicode®, Inc.
+# NameAliases-15.1.0.txt
+# Date: 2023-01-05
+# © 2023 Unicode®, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
@@ -1,6 +1,6 @@
-# DerivedCoreProperties-15.0.0.txt
-# Date: 2022-08-05, 22:17:05 GMT
-# © 2022 Unicode®, Inc.
+# DerivedCoreProperties-15.1.0.txt
+# Date: 2023-08-07, 15:21:24 GMT
+# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
@@ -1397,11 +1397,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HA
2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Alphabetic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Alphabetic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2EBF0..2EE5D ; Alphabetic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 137765
+# Total code points: 138387
# ================================================
@@ -6853,11 +6854,12 @@ FFDA..FFDC ; ID_Start # Lo [3] HALF
2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; ID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; ID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2EBF0..2EE5D ; ID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 136345
+# Total code points: 136967
# ================================================
@@ -7438,6 +7440,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALF
1FE0..1FEC ; ID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; ID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; ID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+200C..200D ; ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
203F..2040 ; ID_Continue # Pc [2] UNDERTIE..CHARACTER TIE
2054 ; ID_Continue # Pc INVERTED UNDERTIE
2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I
@@ -7504,6 +7507,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALF
309D..309E ; ID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; ID_Continue # Lo HIRAGANA DIGRAPH YORI
30A1..30FA ; ID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
+30FB ; ID_Continue # Po KATAKANA MIDDLE DOT
30FC..30FE ; ID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
30FF ; ID_Continue # Lo KATAKANA DIGRAPH KOTO
3105..312F ; ID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
@@ -7683,6 +7687,7 @@ FF10..FF19 ; ID_Continue # Nd [10] F
FF21..FF3A ; ID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF3F ; ID_Continue # Pc FULLWIDTH LOW LINE
FF41..FF5A ; ID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+FF65 ; ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
FF66..FF6F ; ID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF70 ; ID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF71..FF9D ; ID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
@@ -8207,12 +8212,13 @@ FFDA..FFDC ; ID_Continue # Lo [3] H
2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; ID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; ID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2EBF0..2EE5D ; ID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 139482
+# Total code points: 140108
# ================================================
@@ -8962,11 +8968,12 @@ FFDA..FFDC ; XID_Start # Lo [3] HAL
2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; XID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2EBF0..2EE5D ; XID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 136322
+# Total code points: 136944
# ================================================
@@ -9543,6 +9550,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HAL
1FE0..1FEC ; XID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; XID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; XID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+200C..200D ; XID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
203F..2040 ; XID_Continue # Pc [2] UNDERTIE..CHARACTER TIE
2054 ; XID_Continue # Pc INVERTED UNDERTIE
2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I
@@ -9608,6 +9616,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HAL
309D..309E ; XID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; XID_Continue # Lo HIRAGANA DIGRAPH YORI
30A1..30FA ; XID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
+30FB ; XID_Continue # Po KATAKANA MIDDLE DOT
30FC..30FE ; XID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
30FF ; XID_Continue # Lo KATAKANA DIGRAPH KOTO
3105..312F ; XID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
@@ -9793,6 +9802,7 @@ FF10..FF19 ; XID_Continue # Nd [10]
FF21..FF3A ; XID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF3F ; XID_Continue # Pc FULLWIDTH LOW LINE
FF41..FF5A ; XID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+FF65 ; XID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
FF66..FF6F ; XID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF70 ; XID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF71..FF9D ; XID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
@@ -10317,12 +10327,13 @@ FFDA..FFDC ; XID_Continue # Lo [3]
2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; XID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2EBF0..2EE5D ; XID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 139463
+# Total code points: 140089
# ================================================
@@ -10335,6 +10346,15 @@ E0100..E01EF ; XID_Continue # Mn [240]
# - FFF9..FFFB (Interlinear annotation format characters)
# - 13430..13440 (Egyptian hieroglyph format characters)
# - Prepended_Concatenation_Mark (Exceptional format characters that should be visible)
+#
+# There are currently no stability guarantees for DICP. However, the
+# values of DICP interact with the derivation of XID_Continue
+# and NFKC_CF, for which there are stability guarantees.
+# Maintainers of this property should note that in the
+# unlikely case that the DICP value changes for an existing character
+# which is also XID_Continue=Yes, then exceptions must be put
+# in place to ensure that the NFKC_CF mapping value for that
+# existing character does not change.
00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN
034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER
@@ -11602,7 +11622,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [24
2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
-2FF0..2FFB ; Grapheme_Base # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
+2FF0..2FFF ; Grapheme_Base # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
3000 ; Grapheme_Base # Zs IDEOGRAPHIC SPACE
3001..3003 ; Grapheme_Base # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3004 ; Grapheme_Base # So JAPANESE INDUSTRIAL STANDARD SYMBOL
@@ -11657,6 +11677,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [24
3196..319F ; Grapheme_Base # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31A0..31BF ; Grapheme_Base # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
31C0..31E3 ; Grapheme_Base # So [36] CJK STROKE T..CJK STROKE Q
+31EF ; Grapheme_Base # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
@@ -12497,11 +12518,12 @@ FFFC..FFFD ; Grapheme_Base # So [2]
2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Grapheme_Base # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Grapheme_Base # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2EBF0..2EE5D ; Grapheme_Base # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 146986
+# Total code points: 147613
# ================================================
...
@@ -1,6 +1,6 @@
-# PropList-15.0.0.txt
-# Date: 2022-08-05, 22:17:16 GMT
-# © 2022 Unicode®, Inc.
+# PropList-15.1.0.txt
+# Date: 2023-08-01, 21:56:53 GMT
+# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
...
@@ -69,7 +69,7 @@ struct entry { const char *name; unsigne
static struct entry *entries;
static unsigned long num_allocated, num_entries;
-/* Unicode 15 Table 4-8. */
+/* Unicode 15.1 Table 4-8. */
struct generated {
const char *prefix;
/* max_high is a workaround for UnicodeData.txt inconsistencies
@@ -87,6 +87,7 @@ static struct generated generated_ranges
{ "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
+ { "CJK UNIFIED IDEOGRAPH-", 0x2ebf0, 0x2ee5d, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 },
{ "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
@@ -669,7 +670,7 @@ write_copyright (void)
<http://www.gnu.org/licenses/>.\n\
\n\
\n\
- Copyright (C) 1991-2022 Unicode, Inc. All rights reserved.\n\
+ Copyright (C) 1991-2023 Unicode, Inc. All rights reserved.\n\
Distributed under the Terms of Use in\n\
http://www.unicode.org/copyright.html.\n\
\n\
@@ -467,7 +467,7 @@ write_copyright (void)
<http://www.gnu.org/licenses/>.\n\
\n\
\n\
- Copyright (C) 1991-2022 Unicode, Inc. All rights reserved.\n\
+ Copyright (C) 1991-2023 Unicode, Inc. All rights reserved.\n\
Distributed under the Terms of Use in\n\
http://www.unicode.org/copyright.html.\n\
\n\
@@ -16,7 +16,7 @@
<http://www.gnu.org/licenses/>.
- Copyright (C) 1991-2022 Unicode, Inc. All rights reserved.
+ Copyright (C) 1991-2023 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in
http://www.unicode.org/copyright.html.
@@ -1379,7 +1379,8 @@ static const struct ucnrange ucnranges[]
{ 0| 0| 0|C11| 0| 0| 0|CID|NFC| 0| 0, 0, 0x1ffe },
{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x1fff },
{ 0| 0| 0| 0| 0| 0| 0|CID| 0| 0| 0, 0, 0x200a },
-{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x200d },
+{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x200b },
+{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x200d },
{ 0| 0| 0| 0| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2029 },
{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x202e },
{ 0| 0| 0| 0| 0| 0| 0|CID|NFC| 0| 0, 0, 0x203e },
@@ -1625,7 +1626,7 @@ static const struct ucnrange ucnranges[]
{ C99| 0|CXX|C11| 0|CXX23| 0| 0|NFC|NKC| 0, 0, 0x30f4 },
{ C99| 0|CXX|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x30f6 },
{ 0| 0|CXX|C11| 0|CXX23| 0| 0|NFC|NKC| 0, 0, 0x30fa },
-{ C99| 0|CXX|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x30fb },
+{ C99| 0|CXX|C11| 0|CXX23|NXX23|CID|NFC|NKC| 0, 0, 0x30fb },
{ C99| 0|CXX|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x30fc },
{ 0| 0|CXX|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x30fd },
{ 0| 0|CXX|C11| 0|CXX23| 0| 0|NFC|NKC| 0, 0, 0x30fe },
@@ -1906,7 +1907,8 @@ static const struct ucnrange ucnranges[]
{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC| 0| 0, 0, 0xff3f },
{ 0| 0| 0|C11| 0| 0| 0|CID|NFC| 0| 0, 0, 0xff40 },
{ 0| 0|CXX|C11| 0|CXX23| 0|CID|NFC| 0| 0, 0, 0xff5a },
-{ 0| 0| 0|C11| 0| 0| 0|CID|NFC| 0| 0, 0, 0xff65 },
+{ 0| 0| 0|C11| 0| 0| 0|CID|NFC| 0| 0, 0, 0xff64 },
+{ 0| 0| 0|C11| 0|CXX23|NXX23|CID|NFC| 0| 0, 0, 0xff65 },
{ 0| 0|CXX|C11| 0|CXX23| 0|CID|NFC| 0| 0, 0, 0xff9d },
{ 0| 0|CXX|C11| 0|CXX23|NXX23|CID|NFC| 0| 0, 0, 0xff9f },
{ 0| 0|CXX|C11| 0|CXX23| 0|CID|NFC| 0| 0, 0, 0xffbe },
@@ -2786,6 +2788,8 @@ static const struct ucnrange ucnranges[]
{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x2cea1 },
{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2ceaf },
{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x2ebe0 },
+{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2ebef },
+{ 0| 0| 0|C11| 0|CXX23| 0|CID|NFC|NKC| 0, 0, 0x2ee5d },
{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2f7ff },
{ 0| 0| 0|C11| 0|CXX23| 0| 0| 0| 0| 0, 0, 0x2fa1d },
{ 0| 0| 0|C11| 0| 0| 0|CID|NFC|NKC| 0, 0, 0x2fffd },
@@ -16,7 +16,7 @@
<http://www.gnu.org/licenses/>.
- Copyright (C) 1991-2022 Unicode, Inc. All rights reserved.
+ Copyright (C) 1991-2023 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in
http://www.unicode.org/copyright.html.
@@ -52,7 +52,7 @@
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder. */
-static const char uname2c_dict[59891] =
+static const char uname2c_dict[59919] =
"DIVIDED BY HORIZONTAL BAR AND TOP HALF DIVIDED BY VERTICAL BARUIGHUR KIRGHIZ "
"YEH WITH HAMZA ABOVE WITH ALEF MAKSURA LANTED EQUAL ABOVE GREATER-THAN ABOVE "
"SLANTED EQUAL WITH EXCLAMATION MARK WITH LEFT RIGHT ARROW ABOVELANTED EQUAL A"
...
@@ -1,5 +1,5 @@
/* Generated by contrib/unicode/gen_wcwidth.py, with the help of glibc's
- utf8_gen.py, using version 15.0.0 of the Unicode standard. */
+ utf8_gen.py, using version 15.1.0 of the Unicode standard. */
static const cppchar_t wcwidth_range_ends[] = {
0x2ff, 0x36f, 0x482, 0x489, 0x590, 0x5bd, 0x5be, 0x5bf,