summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_tables.c
diff options
context:
space:
mode:
authorSean Finney <seanius@debian.org>2009-04-10 14:09:48 +0200
committerSean Finney <seanius@debian.org>2009-04-10 14:09:48 +0200
commitcd0b49c72aee33b3e44a9c589fcd93b9e1c7a64f (patch)
tree1315c623bb7d9dfa8d366fa9cd2c6834ceeb5da5 /ext/pcre/pcrelib/pcre_tables.c
parent9ea47aab740772adf0c69d8c94b208a464e599ea (diff)
downloadphp-cd0b49c72aee33b3e44a9c589fcd93b9e1c7a64f.tar.gz
Imported Upstream version 5.2.9.dfsg.1upstream/5.2.9.dfsg.1
Diffstat (limited to 'ext/pcre/pcrelib/pcre_tables.c')
-rw-r--r--ext/pcre/pcrelib/pcre_tables.c252
1 files changed, 145 insertions, 107 deletions
diff --git a/ext/pcre/pcrelib/pcre_tables.c b/ext/pcre/pcrelib/pcre_tables.c
index 646ddb568..08e5a4a84 100644
--- a/ext/pcre/pcrelib/pcre_tables.c
+++ b/ext/pcre/pcrelib/pcre_tables.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -85,6 +85,19 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+/* Table to translate from particular type value to the general value. */
+
+const int _pcre_ucp_gentype[] = {
+ ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
+ ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
+ ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
+ ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
+ ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
+ ucp_P, ucp_P, /* Ps, Po */
+ ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
+ ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
+};
+
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
@@ -92,7 +105,10 @@ field of each entry. However, that leads to a large number of relocations when
a shared library is dynamically loaded. A significant reduction is made by
putting all the names into a single, large string and then using offsets in the
table itself. Maintenance is more error-prone, but frequent changes to this
-data is unlikely. */
+data are unlikely.
+
+July 2008: There is now a script called maint/GenerateUtt.py which can be used
+to generate this data instead of maintaining it entirely by hand. */
const char _pcre_utt_names[] =
"Any\0"
@@ -106,8 +122,10 @@ const char _pcre_utt_names[] =
"Buhid\0"
"C\0"
"Canadian_Aboriginal\0"
+ "Carian\0"
"Cc\0"
"Cf\0"
+ "Cham\0"
"Cherokee\0"
"Cn\0"
"Co\0"
@@ -134,12 +152,14 @@ const char _pcre_utt_names[] =
"Inherited\0"
"Kannada\0"
"Katakana\0"
+ "Kayah_Li\0"
"Kharoshthi\0"
"Khmer\0"
"L\0"
"L&\0"
"Lao\0"
"Latin\0"
+ "Lepcha\0"
"Limbu\0"
"Linear_B\0"
"Ll\0"
@@ -147,6 +167,8 @@ const char _pcre_utt_names[] =
"Lo\0"
"Lt\0"
"Lu\0"
+ "Lycian\0"
+ "Lydian\0"
"M\0"
"Malayalam\0"
"Mc\0"
@@ -161,6 +183,7 @@ const char _pcre_utt_names[] =
"Nl\0"
"No\0"
"Ogham\0"
+ "Ol_Chiki\0"
"Old_Italic\0"
"Old_Persian\0"
"Oriya\0"
@@ -175,14 +198,17 @@ const char _pcre_utt_names[] =
"Pi\0"
"Po\0"
"Ps\0"
+ "Rejang\0"
"Runic\0"
"S\0"
+ "Saurashtra\0"
"Sc\0"
"Shavian\0"
"Sinhala\0"
"Sk\0"
"Sm\0"
"So\0"
+ "Sundanese\0"
"Syloti_Nagri\0"
"Syriac\0"
"Tagalog\0"
@@ -195,6 +221,7 @@ const char _pcre_utt_names[] =
"Tibetan\0"
"Tifinagh\0"
"Ugaritic\0"
+ "Vai\0"
"Yi\0"
"Z\0"
"Zl\0"
@@ -202,111 +229,122 @@ const char _pcre_utt_names[] =
"Zs\0";
const ucp_type_table _pcre_utt[] = {
- { 0, PT_ANY, 0 },
- { 4, PT_SC, ucp_Arabic },
- { 11, PT_SC, ucp_Armenian },
- { 20, PT_SC, ucp_Balinese },
- { 29, PT_SC, ucp_Bengali },
- { 37, PT_SC, ucp_Bopomofo },
- { 46, PT_SC, ucp_Braille },
- { 54, PT_SC, ucp_Buginese },
- { 63, PT_SC, ucp_Buhid },
- { 69, PT_GC, ucp_C },
- { 71, PT_SC, ucp_Canadian_Aboriginal },
- { 91, PT_PC, ucp_Cc },
- { 94, PT_PC, ucp_Cf },
- { 97, PT_SC, ucp_Cherokee },
- { 106, PT_PC, ucp_Cn },
- { 109, PT_PC, ucp_Co },
- { 112, PT_SC, ucp_Common },
- { 119, PT_SC, ucp_Coptic },
- { 126, PT_PC, ucp_Cs },
- { 129, PT_SC, ucp_Cuneiform },
- { 139, PT_SC, ucp_Cypriot },
- { 147, PT_SC, ucp_Cyrillic },
- { 156, PT_SC, ucp_Deseret },
- { 164, PT_SC, ucp_Devanagari },
- { 175, PT_SC, ucp_Ethiopic },
- { 184, PT_SC, ucp_Georgian },
- { 193, PT_SC, ucp_Glagolitic },
- { 204, PT_SC, ucp_Gothic },
- { 211, PT_SC, ucp_Greek },
- { 217, PT_SC, ucp_Gujarati },
- { 226, PT_SC, ucp_Gurmukhi },
- { 235, PT_SC, ucp_Han },
- { 239, PT_SC, ucp_Hangul },
- { 246, PT_SC, ucp_Hanunoo },
- { 254, PT_SC, ucp_Hebrew },
- { 261, PT_SC, ucp_Hiragana },
- { 270, PT_SC, ucp_Inherited },
- { 280, PT_SC, ucp_Kannada },
- { 288, PT_SC, ucp_Katakana },
- { 297, PT_SC, ucp_Kharoshthi },
- { 308, PT_SC, ucp_Khmer },
- { 314, PT_GC, ucp_L },
- { 316, PT_LAMP, 0 },
- { 319, PT_SC, ucp_Lao },
- { 323, PT_SC, ucp_Latin },
- { 329, PT_SC, ucp_Limbu },
- { 335, PT_SC, ucp_Linear_B },
- { 344, PT_PC, ucp_Ll },
- { 347, PT_PC, ucp_Lm },
- { 350, PT_PC, ucp_Lo },
- { 353, PT_PC, ucp_Lt },
- { 356, PT_PC, ucp_Lu },
- { 359, PT_GC, ucp_M },
- { 361, PT_SC, ucp_Malayalam },
- { 371, PT_PC, ucp_Mc },
- { 374, PT_PC, ucp_Me },
- { 377, PT_PC, ucp_Mn },
- { 380, PT_SC, ucp_Mongolian },
- { 390, PT_SC, ucp_Myanmar },
- { 398, PT_GC, ucp_N },
- { 400, PT_PC, ucp_Nd },
- { 403, PT_SC, ucp_New_Tai_Lue },
- { 415, PT_SC, ucp_Nko },
- { 419, PT_PC, ucp_Nl },
- { 422, PT_PC, ucp_No },
- { 425, PT_SC, ucp_Ogham },
- { 431, PT_SC, ucp_Old_Italic },
- { 442, PT_SC, ucp_Old_Persian },
- { 454, PT_SC, ucp_Oriya },
- { 460, PT_SC, ucp_Osmanya },
- { 468, PT_GC, ucp_P },
- { 470, PT_PC, ucp_Pc },
- { 473, PT_PC, ucp_Pd },
- { 476, PT_PC, ucp_Pe },
- { 479, PT_PC, ucp_Pf },
- { 482, PT_SC, ucp_Phags_Pa },
- { 491, PT_SC, ucp_Phoenician },
- { 502, PT_PC, ucp_Pi },
- { 505, PT_PC, ucp_Po },
- { 508, PT_PC, ucp_Ps },
- { 511, PT_SC, ucp_Runic },
- { 517, PT_GC, ucp_S },
- { 519, PT_PC, ucp_Sc },
- { 522, PT_SC, ucp_Shavian },
- { 530, PT_SC, ucp_Sinhala },
- { 538, PT_PC, ucp_Sk },
- { 541, PT_PC, ucp_Sm },
- { 544, PT_PC, ucp_So },
- { 547, PT_SC, ucp_Syloti_Nagri },
- { 560, PT_SC, ucp_Syriac },
- { 567, PT_SC, ucp_Tagalog },
- { 575, PT_SC, ucp_Tagbanwa },
- { 584, PT_SC, ucp_Tai_Le },
- { 591, PT_SC, ucp_Tamil },
- { 597, PT_SC, ucp_Telugu },
- { 604, PT_SC, ucp_Thaana },
- { 611, PT_SC, ucp_Thai },
- { 616, PT_SC, ucp_Tibetan },
- { 624, PT_SC, ucp_Tifinagh },
- { 633, PT_SC, ucp_Ugaritic },
- { 642, PT_SC, ucp_Yi },
- { 645, PT_GC, ucp_Z },
- { 647, PT_PC, ucp_Zl },
- { 650, PT_PC, ucp_Zp },
- { 653, PT_PC, ucp_Zs }
+ { 0, PT_ANY, 0 },
+ { 4, PT_SC, ucp_Arabic },
+ { 11, PT_SC, ucp_Armenian },
+ { 20, PT_SC, ucp_Balinese },
+ { 29, PT_SC, ucp_Bengali },
+ { 37, PT_SC, ucp_Bopomofo },
+ { 46, PT_SC, ucp_Braille },
+ { 54, PT_SC, ucp_Buginese },
+ { 63, PT_SC, ucp_Buhid },
+ { 69, PT_GC, ucp_C },
+ { 71, PT_SC, ucp_Canadian_Aboriginal },
+ { 91, PT_SC, ucp_Carian },
+ { 98, PT_PC, ucp_Cc },
+ { 101, PT_PC, ucp_Cf },
+ { 104, PT_SC, ucp_Cham },
+ { 109, PT_SC, ucp_Cherokee },
+ { 118, PT_PC, ucp_Cn },
+ { 121, PT_PC, ucp_Co },
+ { 124, PT_SC, ucp_Common },
+ { 131, PT_SC, ucp_Coptic },
+ { 138, PT_PC, ucp_Cs },
+ { 141, PT_SC, ucp_Cuneiform },
+ { 151, PT_SC, ucp_Cypriot },
+ { 159, PT_SC, ucp_Cyrillic },
+ { 168, PT_SC, ucp_Deseret },
+ { 176, PT_SC, ucp_Devanagari },
+ { 187, PT_SC, ucp_Ethiopic },
+ { 196, PT_SC, ucp_Georgian },
+ { 205, PT_SC, ucp_Glagolitic },
+ { 216, PT_SC, ucp_Gothic },
+ { 223, PT_SC, ucp_Greek },
+ { 229, PT_SC, ucp_Gujarati },
+ { 238, PT_SC, ucp_Gurmukhi },
+ { 247, PT_SC, ucp_Han },
+ { 251, PT_SC, ucp_Hangul },
+ { 258, PT_SC, ucp_Hanunoo },
+ { 266, PT_SC, ucp_Hebrew },
+ { 273, PT_SC, ucp_Hiragana },
+ { 282, PT_SC, ucp_Inherited },
+ { 292, PT_SC, ucp_Kannada },
+ { 300, PT_SC, ucp_Katakana },
+ { 309, PT_SC, ucp_Kayah_Li },
+ { 318, PT_SC, ucp_Kharoshthi },
+ { 329, PT_SC, ucp_Khmer },
+ { 335, PT_GC, ucp_L },
+ { 337, PT_LAMP, 0 },
+ { 340, PT_SC, ucp_Lao },
+ { 344, PT_SC, ucp_Latin },
+ { 350, PT_SC, ucp_Lepcha },
+ { 357, PT_SC, ucp_Limbu },
+ { 363, PT_SC, ucp_Linear_B },
+ { 372, PT_PC, ucp_Ll },
+ { 375, PT_PC, ucp_Lm },
+ { 378, PT_PC, ucp_Lo },
+ { 381, PT_PC, ucp_Lt },
+ { 384, PT_PC, ucp_Lu },
+ { 387, PT_SC, ucp_Lycian },
+ { 394, PT_SC, ucp_Lydian },
+ { 401, PT_GC, ucp_M },
+ { 403, PT_SC, ucp_Malayalam },
+ { 413, PT_PC, ucp_Mc },
+ { 416, PT_PC, ucp_Me },
+ { 419, PT_PC, ucp_Mn },
+ { 422, PT_SC, ucp_Mongolian },
+ { 432, PT_SC, ucp_Myanmar },
+ { 440, PT_GC, ucp_N },
+ { 442, PT_PC, ucp_Nd },
+ { 445, PT_SC, ucp_New_Tai_Lue },
+ { 457, PT_SC, ucp_Nko },
+ { 461, PT_PC, ucp_Nl },
+ { 464, PT_PC, ucp_No },
+ { 467, PT_SC, ucp_Ogham },
+ { 473, PT_SC, ucp_Ol_Chiki },
+ { 482, PT_SC, ucp_Old_Italic },
+ { 493, PT_SC, ucp_Old_Persian },
+ { 505, PT_SC, ucp_Oriya },
+ { 511, PT_SC, ucp_Osmanya },
+ { 519, PT_GC, ucp_P },
+ { 521, PT_PC, ucp_Pc },
+ { 524, PT_PC, ucp_Pd },
+ { 527, PT_PC, ucp_Pe },
+ { 530, PT_PC, ucp_Pf },
+ { 533, PT_SC, ucp_Phags_Pa },
+ { 542, PT_SC, ucp_Phoenician },
+ { 553, PT_PC, ucp_Pi },
+ { 556, PT_PC, ucp_Po },
+ { 559, PT_PC, ucp_Ps },
+ { 562, PT_SC, ucp_Rejang },
+ { 569, PT_SC, ucp_Runic },
+ { 575, PT_GC, ucp_S },
+ { 577, PT_SC, ucp_Saurashtra },
+ { 588, PT_PC, ucp_Sc },
+ { 591, PT_SC, ucp_Shavian },
+ { 599, PT_SC, ucp_Sinhala },
+ { 607, PT_PC, ucp_Sk },
+ { 610, PT_PC, ucp_Sm },
+ { 613, PT_PC, ucp_So },
+ { 616, PT_SC, ucp_Sundanese },
+ { 626, PT_SC, ucp_Syloti_Nagri },
+ { 639, PT_SC, ucp_Syriac },
+ { 646, PT_SC, ucp_Tagalog },
+ { 654, PT_SC, ucp_Tagbanwa },
+ { 663, PT_SC, ucp_Tai_Le },
+ { 670, PT_SC, ucp_Tamil },
+ { 676, PT_SC, ucp_Telugu },
+ { 683, PT_SC, ucp_Thaana },
+ { 690, PT_SC, ucp_Thai },
+ { 695, PT_SC, ucp_Tibetan },
+ { 703, PT_SC, ucp_Tifinagh },
+ { 712, PT_SC, ucp_Ugaritic },
+ { 721, PT_SC, ucp_Vai },
+ { 725, PT_SC, ucp_Yi },
+ { 728, PT_GC, ucp_Z },
+ { 730, PT_PC, ucp_Zl },
+ { 733, PT_PC, ucp_Zp },
+ { 736, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);