From b467630d73974200456f472a58bf9dab5eea1be0 Mon Sep 17 00:00:00 2001 From: data-man Date: Tue, 12 May 2020 23:29:06 +0500 Subject: Update to Unicode 13.0 --- data/CaseFolding.txt | 97 ++++++++++++++++- src/case_fold_switch.inc | 267 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 360 insertions(+), 4 deletions(-) diff --git a/data/CaseFolding.txt b/data/CaseFolding.txt index 372ee68..033788b 100644 --- a/data/CaseFolding.txt +++ b/data/CaseFolding.txt @@ -1,6 +1,6 @@ -# CaseFolding-9.0.0.txt -# Date: 2016-03-02, 18:54:54 GMT -# © 2016 Unicode®, Inc. +# CaseFolding-13.0.0.txt +# Date: 2019-09-08, 23:30:59 GMT +# © 2019 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -24,7 +24,7 @@ # # NOTE: case folding does not preserve normalization formats! # -# For information on case folding, including how to have case folding +# For information on case folding, including how to have case folding # preserve normalization formats, see Section 3.13 Default Case Algorithms in # The Unicode Standard. # @@ -603,6 +603,52 @@ 1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN 1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT 1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK +1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN +1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN +1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN +1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON +1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN +1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN +1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN +1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN +1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN +1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN +1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS +1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN +1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR +1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON +1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR +1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR +1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE +1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN +1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR +1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN +1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR +1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR +1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN +1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR +1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN +1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN +1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN +1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL +1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL +1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR +1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN +1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN +1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE +1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE +1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE +1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE +1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR +1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE +1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI +1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN +1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI +1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN +1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN +1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN +1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1180,6 +1226,17 @@ A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL A7B3; C; AB53; # LATIN CAPITAL LETTER CHI A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA +A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE +A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A +A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I +A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U +A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W +A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK +A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK +A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK +A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H AB70; C; 13A0; # CHEROKEE SMALL LETTER A AB71; C; 13A1; # CHEROKEE SMALL LETTER E AB72; C; 13A2; # CHEROKEE SMALL LETTER I @@ -1457,6 +1514,38 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU 118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII 118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO +16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M +16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S +16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V +16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W +16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU +16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z +16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP +16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P +16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T +16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G +16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F +16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I +16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K +16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A +16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J +16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E +16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B +16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C +16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U +16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU +16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L +16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q +16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP +16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY +16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X +16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D +16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE +16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N +16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R +16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O +16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI +16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y 1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF 1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI 1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM diff --git a/src/case_fold_switch.inc b/src/case_fold_switch.inc index 28e223e..fc1b6d8 100644 --- a/src/case_fold_switch.inc +++ b/src/case_fold_switch.inc @@ -1631,6 +1631,144 @@ case 0x1C88: bufpush(0xA64B); break; + case 0x1C90: + bufpush(0x10D0); + break; + case 0x1C91: + bufpush(0x10D1); + break; + case 0x1C92: + bufpush(0x10D2); + break; + case 0x1C93: + bufpush(0x10D3); + break; + case 0x1C94: + bufpush(0x10D4); + break; + case 0x1C95: + bufpush(0x10D5); + break; + case 0x1C96: + bufpush(0x10D6); + break; + case 0x1C97: + bufpush(0x10D7); + break; + case 0x1C98: + bufpush(0x10D8); + break; + case 0x1C99: + bufpush(0x10D9); + break; + case 0x1C9A: + bufpush(0x10DA); + break; + case 0x1C9B: + bufpush(0x10DB); + break; + case 0x1C9C: + bufpush(0x10DC); + break; + case 0x1C9D: + bufpush(0x10DD); + break; + case 0x1C9E: + bufpush(0x10DE); + break; + case 0x1C9F: + bufpush(0x10DF); + break; + case 0x1CA0: + bufpush(0x10E0); + break; + case 0x1CA1: + bufpush(0x10E1); + break; + case 0x1CA2: + bufpush(0x10E2); + break; + case 0x1CA3: + bufpush(0x10E3); + break; + case 0x1CA4: + bufpush(0x10E4); + break; + case 0x1CA5: + bufpush(0x10E5); + break; + case 0x1CA6: + bufpush(0x10E6); + break; + case 0x1CA7: + bufpush(0x10E7); + break; + case 0x1CA8: + bufpush(0x10E8); + break; + case 0x1CA9: + bufpush(0x10E9); + break; + case 0x1CAA: + bufpush(0x10EA); + break; + case 0x1CAB: + bufpush(0x10EB); + break; + case 0x1CAC: + bufpush(0x10EC); + break; + case 0x1CAD: + bufpush(0x10ED); + break; + case 0x1CAE: + bufpush(0x10EE); + break; + case 0x1CAF: + bufpush(0x10EF); + break; + case 0x1CB0: + bufpush(0x10F0); + break; + case 0x1CB1: + bufpush(0x10F1); + break; + case 0x1CB2: + bufpush(0x10F2); + break; + case 0x1CB3: + bufpush(0x10F3); + break; + case 0x1CB4: + bufpush(0x10F4); + break; + case 0x1CB5: + bufpush(0x10F5); + break; + case 0x1CB6: + bufpush(0x10F6); + break; + case 0x1CB7: + bufpush(0x10F7); + break; + case 0x1CB8: + bufpush(0x10F8); + break; + case 0x1CB9: + bufpush(0x10F9); + break; + case 0x1CBA: + bufpush(0x10FA); + break; + case 0x1CBD: + bufpush(0x10FD); + break; + case 0x1CBE: + bufpush(0x10FE); + break; + case 0x1CBF: + bufpush(0x10FF); + break; case 0x1E00: bufpush(0x1E01); break; @@ -3375,6 +3513,39 @@ case 0xA7B6: bufpush(0xA7B7); break; + case 0xA7B8: + bufpush(0xA7B9); + break; + case 0xA7BA: + bufpush(0xA7BB); + break; + case 0xA7BC: + bufpush(0xA7BD); + break; + case 0xA7BE: + bufpush(0xA7BF); + break; + case 0xA7C2: + bufpush(0xA7C3); + break; + case 0xA7C4: + bufpush(0xA794); + break; + case 0xA7C5: + bufpush(0x0282); + break; + case 0xA7C6: + bufpush(0x1D8E); + break; + case 0xA7C7: + bufpush(0xA7C8); + break; + case 0xA7C9: + bufpush(0xA7CA); + break; + case 0xA7F5: + bufpush(0xA7F6); + break; case 0xAB70: bufpush(0x13A0); break; @@ -4220,6 +4391,102 @@ case 0x118BF: bufpush(0x118DF); break; + case 0x16E40: + bufpush(0x16E60); + break; + case 0x16E41: + bufpush(0x16E61); + break; + case 0x16E42: + bufpush(0x16E62); + break; + case 0x16E43: + bufpush(0x16E63); + break; + case 0x16E44: + bufpush(0x16E64); + break; + case 0x16E45: + bufpush(0x16E65); + break; + case 0x16E46: + bufpush(0x16E66); + break; + case 0x16E47: + bufpush(0x16E67); + break; + case 0x16E48: + bufpush(0x16E68); + break; + case 0x16E49: + bufpush(0x16E69); + break; + case 0x16E4A: + bufpush(0x16E6A); + break; + case 0x16E4B: + bufpush(0x16E6B); + break; + case 0x16E4C: + bufpush(0x16E6C); + break; + case 0x16E4D: + bufpush(0x16E6D); + break; + case 0x16E4E: + bufpush(0x16E6E); + break; + case 0x16E4F: + bufpush(0x16E6F); + break; + case 0x16E50: + bufpush(0x16E70); + break; + case 0x16E51: + bufpush(0x16E71); + break; + case 0x16E52: + bufpush(0x16E72); + break; + case 0x16E53: + bufpush(0x16E73); + break; + case 0x16E54: + bufpush(0x16E74); + break; + case 0x16E55: + bufpush(0x16E75); + break; + case 0x16E56: + bufpush(0x16E76); + break; + case 0x16E57: + bufpush(0x16E77); + break; + case 0x16E58: + bufpush(0x16E78); + break; + case 0x16E59: + bufpush(0x16E79); + break; + case 0x16E5A: + bufpush(0x16E7A); + break; + case 0x16E5B: + bufpush(0x16E7B); + break; + case 0x16E5C: + bufpush(0x16E7C); + break; + case 0x16E5D: + bufpush(0x16E7D); + break; + case 0x16E5E: + bufpush(0x16E7E); + break; + case 0x16E5F: + bufpush(0x16E7F); + break; case 0x1E900: bufpush(0x1E922); break; -- cgit v1.2.3