1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
|
/*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file picobase.c
*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
* All rights reserved.
*
* History:
* - 2009-04-20 -- initial version
*
*/
#include "picoos.h"
#include "picodbg.h"
#include "picodefs.h"
#include "picobase.h"
#ifdef __cplusplus
extern "C" {
#endif
#if 0
}
#endif
/**
* @addtogroup picobase
*
* @b Unicode_UTF8_functions
*
* UTF8
* scalar value 1st Byte 2nd Byte 3rd Byte 4th Byte
* 00000000 0xxxxxxx 0xxxxxxx
* 00000yyy yyxxxxxx 110yyyyy 10xxxxxx
* zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
* 000uuuuu zzzzyyyy yyxxxxx 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
*
*/
picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
const picoos_uint16 maxlen) {
picoos_uint16 i;
picoos_uint16 len;
picoos_uint8 follow;
picoos_uint8 ok;
ok = TRUE;
i = 0;
len = 0;
follow = 0;
while (ok && (i < maxlen) && (utf8str[i] != '\000')) {
if (follow > 0) {
if ((utf8str[i] >= (picoos_uint8)'\200') &&
(utf8str[i] < (picoos_uint8)'\300')) {
follow--;
} else {
ok = FALSE;
}
} else if (utf8str[i] < (picoos_uint8)'\200') {
len++;
} else if (utf8str[i] >= (picoos_uint8)'\370') {
ok = FALSE;
} else if (utf8str[i] >= (picoos_uint8)'\360') {
follow = 3;
len++;
} else if (utf8str[i] >= (picoos_uint8)'\340') {
follow = 2;
len++;
} else if (utf8str[i] >= (picoos_uint8)'\300') {
follow = 1;
len++;
} else {
ok = FALSE;
}
i++;
}
if (ok) {
return len;
} else {
return -1;
}
}
static picoos_uint32 base_utf32_lowercase (picoos_uint32 utf32)
{
picoos_uint32 lc;
lc = utf32;
if (((utf32 >= 65313) && (utf32 <= 65338))) {
lc = (utf32 + 32);
} else if (((utf32 >= 66560) && (utf32 <= 66599))) {
lc = (utf32 + 40);
} else if (((utf32 >= 7680) && (utf32 <= 9423))) {
switch (utf32) {
case 7680: case 7681: case 7682: case 7683: case 7684: case 7685: case 7686: case 7687: case 7688: case 7689:
case 7690: case 7691: case 7692: case 7693: case 7694: case 7695: case 7696: case 7697: case 7698: case 7699: case 7700: case 7701:
case 7702: case 7703: case 7704: case 7705: case 7706: case 7707: case 7708: case 7709: case 7710: case 7711: case 7712: case 7713:
case 7714: case 7715: case 7716: case 7717: case 7718: case 7719: case 7720: case 7721: case 7722: case 7723: case 7724: case 7725:
case 7726: case 7727: case 7728: case 7729: case 7730: case 7731: case 7732: case 7733: case 7734: case 7735: case 7736: case 7737:
case 7738: case 7739: case 7740: case 7741: case 7742: case 7743: case 7744: case 7745: case 7746: case 7747: case 7748: case 7749:
case 7750: case 7751: case 7752: case 7753: case 7754: case 7755: case 7756: case 7757: case 7758: case 7759: case 7760: case 7761:
case 7762: case 7763: case 7764: case 7765: case 7766: case 7767: case 7768: case 7769: case 7770: case 7771: case 7772: case 7773:
case 7774: case 7775: case 7776: case 7777: case 7778: case 7779: case 7780: case 7781: case 7782: case 7783: case 7784: case 7785:
case 7786: case 7787: case 7788: case 7789: case 7790: case 7791: case 7792: case 7793: case 7794: case 7795: case 7796: case 7797:
case 7798: case 7799: case 7800: case 7801: case 7802: case 7803: case 7804: case 7805: case 7806: case 7807: case 7808: case 7809:
case 7810: case 7811: case 7812: case 7813: case 7814: case 7815: case 7816: case 7817: case 7818: case 7819: case 7820: case 7821:
case 7822: case 7823: case 7824: case 7825: case 7826: case 7827: case 7828: case 7840: case 7841: case 7842: case 7843:
case 7844: case 7845: case 7846: case 7847: case 7848: case 7849: case 7850: case 7851: case 7852: case 7853: case 7854: case 7855:
case 7856: case 7857: case 7858: case 7859: case 7860: case 7861: case 7862: case 7863: case 7864: case 7865: case 7866: case 7867:
case 7868: case 7869: case 7870: case 7871: case 7872: case 7873: case 7874: case 7875: case 7876: case 7877: case 7878: case 7879:
case 7880: case 7881: case 7882: case 7883: case 7884: case 7885: case 7886: case 7887: case 7888: case 7889: case 7890: case 7891:
case 7892: case 7893: case 7894: case 7895: case 7896: case 7897: case 7898: case 7899: case 7900: case 7901: case 7902: case 7903:
case 7904: case 7905: case 7906: case 7907: case 7908: case 7909: case 7910: case 7911: case 7912: case 7913: case 7914: case 7915:
case 7916: case 7917: case 7918: case 7919: case 7920: case 7921: case 7922: case 7923: case 7924: case 7925: case 7926: case 7927:
case 7928:
if ( !(((utf32) % 2 == 1))) {
lc = (utf32 + 1);
}
break;
case 7944: case 7945: case 7946: case 7947: case 7948: case 7949: case 7950: case 7951: case 7960:
case 7961: case 7962: case 7963: case 7964: case 7965: case 7976: case 7977: case 7978: case 7979: case 7980: case 7981:
case 7982: case 7983: case 7992: case 7993: case 7994: case 7995: case 7996: case 7997: case 7998: case 7999:
case 8008: case 8009: case 8010: case 8011: case 8012: case 8013: case 8040: case 8041: case 8042: case 8043: case 8044:
case 8045: case 8046: case 8047: case 8072: case 8073: case 8074: case 8075: case 8076: case 8077: case 8078: case 8079:
case 8088: case 8089: case 8090: case 8091: case 8092: case 8093: case 8094: case 8095: case 8104: case 8105:
case 8106: case 8107: case 8108: case 8109: case 8110: case 8111:
lc = (utf32 - 8);
break;
case 8025: case 8026: case 8027: case 8028: case 8029: case 8030: case 8031:
if (((utf32) % 2 == 1)) {
lc = (utf32 - 8);
}
break;
case 8544: case 8545: case 8546: case 8547: case 8548: case 8549: case 8550: case 8551: case 8552: case 8553:
case 8554: case 8555: case 8556: case 8557: case 8558: case 8559:
lc = (utf32 + 16);
break;
case 9398: case 9399: case 9400: case 9401: case 9402: case 9403: case 9404: case 9405: case 9406: case 9407:
case 9408: case 9409: case 9410: case 9411: case 9412: case 9413: case 9414: case 9415: case 9416: case 9417: case 9418: case 9419:
case 9420: case 9421: case 9422: case 9423:
lc = (utf32 + 26);
break;
case 8120:
lc = 8112;
break;
case 8121:
lc = 8113;
break;
case 8122:
lc = 8048;
break;
case 8123:
lc = 8049;
break;
case 8124:
lc = 8115;
break;
case 8136:
lc = 8050;
break;
case 8137:
lc = 8051;
break;
case 8138:
lc = 8052;
break;
case 8139:
lc = 8053;
break;
case 8140:
lc = 8131;
break;
case 8152:
lc = 8144;
break;
case 8153:
lc = 8145;
break;
case 8154:
lc = 8054;
break;
case 8155:
lc = 8055;
break;
case 8168:
lc = 8160;
break;
case 8169:
lc = 8161;
break;
case 8170:
lc = 8058;
break;
case 8171:
lc = 8059;
break;
case 8172:
lc = 8165;
break;
case 8184:
lc = 8056;
break;
case 8185:
lc = 8057;
break;
case 8186:
lc = 8060;
break;
case 8187:
lc = 8061;
break;
case 8188:
lc = 8179;
break;
case 8486:
lc = 969;
break;
case 8490:
lc = 107;
break;
case 8491:
lc = 229;
break;
default:
break;
}
} else {
switch (utf32) {
case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: case 73: case 74:
case 75: case 76: case 77: case 78: case 79: case 80: case 81: case 82: case 83: case 84: case 85: case 86:
case 87: case 88: case 89: case 90: case 192: case 193: case 194: case 195: case 196: case 197: case 198:
case 199: case 200: case 201: case 202: case 203: case 204: case 205: case 206: case 207: case 208: case 209: case 210:
case 211: case 212: case 213: case 214: case 216: case 217: case 218: case 219: case 220: case 221: case 222:
case 913: case 914: case 915: case 916: case 917: case 918: case 919: case 920: case 921: case 922: case 923:
case 924: case 925: case 926: case 927: case 928: case 929: case 931: case 932: case 933: case 934: case 935:
case 936: case 937: case 938: case 939: case 1040: case 1041: case 1042: case 1043: case 1044: case 1045: case 1046:
case 1047: case 1048: case 1049: case 1050: case 1051: case 1052: case 1053: case 1054: case 1055: case 1056: case 1057: case 1058:
case 1059: case 1060: case 1061: case 1062: case 1063: case 1064: case 1065: case 1066: case 1067: case 1068: case 1069: case 1070:
case 1071:
lc = (utf32 + 32);
break;
case 256: case 257: case 258: case 259: case 260: case 261: case 262: case 263: case 264: case 265:
case 266: case 267: case 268: case 269: case 270: case 271: case 272: case 273: case 274: case 275: case 276: case 277:
case 278: case 279: case 280: case 281: case 282: case 283: case 284: case 285: case 286: case 287: case 288: case 289:
case 290: case 291: case 292: case 293: case 294: case 295: case 296: case 297: case 298: case 299: case 300: case 301:
case 302: case 303: case 305: case 306: case 307: case 308: case 309: case 310: case 330: case 331:
case 332: case 333: case 334: case 335: case 336: case 337: case 338: case 339: case 340: case 341: case 342: case 343:
case 344: case 345: case 346: case 347: case 348: case 349: case 350: case 351: case 352: case 353: case 354: case 355:
case 356: case 357: case 358: case 359: case 360: case 361: case 362: case 363: case 364: case 365: case 366: case 367:
case 368: case 369: case 370: case 371: case 372: case 373: case 374: case 416: case 417: case 418: case 419:
case 420: case 478: case 479: case 480: case 481: case 482: case 483: case 484: case 485: case 486: case 487:
case 488: case 489: case 490: case 491: case 492: case 493: case 494: case 504: case 505: case 506: case 507:
case 508: case 509: case 510: case 511: case 512: case 513: case 514: case 515: case 516: case 517: case 518: case 519:
case 520: case 521: case 522: case 523: case 524: case 525: case 526: case 527: case 528: case 529: case 530: case 531:
case 532: case 533: case 534: case 535: case 536: case 537: case 538: case 539: case 540: case 541: case 542:
case 546: case 547: case 548: case 549: case 550: case 551: case 552: case 553: case 554: case 555: case 556: case 557:
case 558: case 559: case 560: case 561: case 562: case 984: case 985: case 986: case 987: case 988: case 989:
case 990: case 991: case 992: case 993: case 994: case 995: case 996: case 997: case 998: case 999: case 1000: case 1001:
case 1002: case 1003: case 1004: case 1005: case 1006: case 1120: case 1121: case 1122: case 1123: case 1124: case 1125:
case 1126: case 1127: case 1128: case 1129: case 1130: case 1131: case 1132: case 1133: case 1134: case 1135: case 1136: case 1137:
case 1138: case 1139: case 1140: case 1141: case 1142: case 1143: case 1144: case 1145: case 1146: case 1147: case 1148: case 1149:
case 1150: case 1151: case 1152: case 1162: case 1163: case 1164: case 1165: case 1166: case 1167: case 1168: case 1169:
case 1170: case 1171: case 1172: case 1173: case 1174: case 1175: case 1176: case 1177: case 1178: case 1179: case 1180: case 1181:
case 1182: case 1183: case 1184: case 1185: case 1186: case 1187: case 1188: case 1189: case 1190: case 1191: case 1192: case 1193:
case 1194: case 1195: case 1196: case 1197: case 1198: case 1199: case 1200: case 1201: case 1202: case 1203: case 1204: case 1205:
case 1206: case 1207: case 1208: case 1209: case 1210: case 1211: case 1212: case 1213: case 1214: case 1232: case 1233:
case 1234: case 1235: case 1236: case 1237: case 1238: case 1239: case 1240: case 1241: case 1242: case 1243: case 1244: case 1245:
case 1246: case 1247: case 1248: case 1249: case 1250: case 1251: case 1252: case 1253: case 1254: case 1255: case 1256: case 1257:
case 1258: case 1259: case 1260: case 1261: case 1262: case 1263: case 1264: case 1265: case 1266: case 1267: case 1268:
case 1280: case 1281: case 1282: case 1283: case 1284: case 1285: case 1286: case 1287: case 1288: case 1289: case 1290: case 1291:
case 1292: case 1293: case 1294:
if ( !(((utf32) % 2 == 1))) {
lc = (utf32 + 1);
}
break;
case 313: case 314: case 315: case 316: case 317: case 318: case 319: case 320: case 321: case 322:
case 323: case 324: case 325: case 326: case 327: case 377: case 378: case 379: case 380: case 381:
case 459: case 460: case 461: case 462: case 463: case 464: case 465: case 466: case 467: case 468: case 469: case 470:
case 471: case 472: case 473: case 474: case 475: case 1217: case 1218: case 1219: case 1220: case 1221: case 1222:
case 1223: case 1224: case 1225: case 1226: case 1227: case 1228: case 1229:
if (((utf32) % 2 == 1)) {
lc = (utf32 + 1);
}
break;
case 1024: case 1025: case 1026: case 1027: case 1028: case 1029: case 1030: case 1031: case 1032: case 1033:
case 1034: case 1035: case 1036: case 1037: case 1038: case 1039:
lc = (utf32 + 80);
break;
case 1329: case 1330: case 1331: case 1332: case 1333: case 1334: case 1335: case 1336: case 1337: case 1338:
case 1339: case 1340: case 1341: case 1342: case 1343: case 1344: case 1345: case 1346: case 1347: case 1348: case 1349: case 1350:
case 1351: case 1352: case 1353: case 1354: case 1355: case 1356: case 1357: case 1358: case 1359: case 1360: case 1361: case 1362:
case 1363: case 1364: case 1365: case 1366:
lc = (utf32 + 48);
break;
case 304:
lc = 105;
break;
case 376:
lc = 255;
break;
case 385:
lc = 595;
break;
case 386:
lc = 387;
break;
case 388:
lc = 389;
break;
case 390:
lc = 596;
break;
case 391:
lc = 392;
break;
case 393:
lc = 598;
break;
case 394:
lc = 599;
break;
case 395:
lc = 396;
break;
case 398:
lc = 477;
break;
case 399:
lc = 601;
break;
case 400:
lc = 603;
break;
case 401:
lc = 402;
break;
case 403:
lc = 608;
break;
case 404:
lc = 611;
break;
case 406:
lc = 617;
break;
case 407:
lc = 616;
break;
case 408:
lc = 409;
break;
case 412:
lc = 623;
break;
case 413:
lc = 626;
break;
case 415:
lc = 629;
break;
case 422:
lc = 640;
break;
case 423:
lc = 424;
break;
case 425:
lc = 643;
break;
case 428:
lc = 429;
break;
case 430:
lc = 648;
break;
case 431:
lc = 432;
break;
case 433:
lc = 650;
break;
case 434:
lc = 651;
break;
case 435:
lc = 436;
break;
case 437:
lc = 438;
break;
case 439:
lc = 658;
break;
case 440:
lc = 441;
break;
case 444:
lc = 445;
break;
case 452:
lc = 454;
break;
case 453:
lc = 454;
break;
case 455:
lc = 457;
break;
case 456:
lc = 457;
break;
case 458:
lc = 460;
break;
case 497:
lc = 499;
break;
case 498:
lc = 499;
break;
case 500:
lc = 501;
break;
case 502:
lc = 405;
break;
case 503:
lc = 447;
break;
case 544:
lc = 414;
break;
case 902:
lc = 940;
break;
case 904:
lc = 941;
break;
case 905:
lc = 942;
break;
case 906:
lc = 943;
break;
case 908:
lc = 972;
break;
case 910:
lc = 973;
break;
case 911:
lc = 974;
break;
case 1012:
lc = 952;
break;
case 1015:
lc = 1016;
break;
case 1017:
lc = 1010;
break;
case 1018:
lc = 1019;
break;
case 1272:
lc = 1273;
break;
default:
break;
}
}
return lc;
}
/**
* Converts utf32 input to uppercase
* @param utf32 : a single character encoded in UTF32
* @return a single uppercase character encoded in UTF32
*/
static picoos_uint32 base_utf32_uppercase (picoos_uint32 utf32)
{
picoos_uint32 lc;
lc = utf32;
if (((utf32 >= 65345) && (utf32 <= 65370))) {
lc = (utf32 - 32);
} else if (((utf32 >= 66600) && (utf32 <= 66639))) {
lc = (utf32 - 40);
} else if (((utf32 >= 7681) && (utf32 <= 9449))) {
switch (utf32) {
case 7681: case 7682: case 7683: case 7684: case 7685: case 7686: case 7687: case 7688: case 7689: case 7690:
case 7691: case 7692: case 7693: case 7694: case 7695: case 7696: case 7697: case 7698: case 7699: case 7700: case 7701: case 7702:
case 7703: case 7704: case 7705: case 7706: case 7707: case 7708: case 7709: case 7710: case 7711: case 7712: case 7713: case 7714:
case 7715: case 7716: case 7717: case 7718: case 7719: case 7720: case 7721: case 7722: case 7723: case 7724: case 7725: case 7726:
case 7727: case 7728: case 7729: case 7730: case 7731: case 7732: case 7733: case 7734: case 7735: case 7736: case 7737: case 7738:
case 7739: case 7740: case 7741: case 7742: case 7743: case 7744: case 7745: case 7746: case 7747: case 7748: case 7749: case 7750:
case 7751: case 7752: case 7753: case 7754: case 7755: case 7756: case 7757: case 7758: case 7759: case 7760: case 7761: case 7762:
case 7763: case 7764: case 7765: case 7766: case 7767: case 7768: case 7769: case 7770: case 7771: case 7772: case 7773: case 7774:
case 7775: case 7776: case 7777: case 7778: case 7779: case 7780: case 7781: case 7782: case 7783: case 7784: case 7785: case 7786:
case 7787: case 7788: case 7789: case 7790: case 7791: case 7792: case 7793: case 7794: case 7795: case 7796: case 7797: case 7798:
case 7799: case 7800: case 7801: case 7802: case 7803: case 7804: case 7805: case 7806: case 7807: case 7808: case 7809: case 7810:
case 7811: case 7812: case 7813: case 7814: case 7815: case 7816: case 7817: case 7818: case 7819: case 7820: case 7821: case 7822:
case 7823: case 7824: case 7825: case 7826: case 7827: case 7828: case 7829: case 7841: case 7842: case 7843: case 7844:
case 7845: case 7846: case 7847: case 7848: case 7849: case 7850: case 7851: case 7852: case 7853: case 7854: case 7855: case 7856:
case 7857: case 7858: case 7859: case 7860: case 7861: case 7862: case 7863: case 7864: case 7865: case 7866: case 7867: case 7868:
case 7869: case 7870: case 7871: case 7872: case 7873: case 7874: case 7875: case 7876: case 7877: case 7878: case 7879: case 7880:
case 7881: case 7882: case 7883: case 7884: case 7885: case 7886: case 7887: case 7888: case 7889: case 7890: case 7891: case 7892:
case 7893: case 7894: case 7895: case 7896: case 7897: case 7898: case 7899: case 7900: case 7901: case 7902: case 7903: case 7904:
case 7905: case 7906: case 7907: case 7908: case 7909: case 7910: case 7911: case 7912: case 7913: case 7914: case 7915: case 7916:
case 7917: case 7918: case 7919: case 7920: case 7921: case 7922: case 7923: case 7924: case 7925: case 7926: case 7927: case 7928:
case 7929:
if (((utf32) % 2 == 1)) {
lc = (utf32 - 1);
}
break;
case 7936: case 7937: case 7938: case 7939: case 7940: case 7941: case 7942: case 7943: case 7952:
case 7953: case 7954: case 7955: case 7956: case 7957: case 7968: case 7969: case 7970: case 7971: case 7972: case 7973:
case 7974: case 7975: case 7984: case 7985: case 7986: case 7987: case 7988: case 7989: case 7990: case 7991:
case 8000: case 8001: case 8002: case 8003: case 8004: case 8005: case 8032: case 8033: case 8034: case 8035: case 8036:
case 8037: case 8038: case 8039: case 8064: case 8065: case 8066: case 8067: case 8068: case 8069: case 8070: case 8071:
case 8080: case 8081: case 8082: case 8083: case 8084: case 8085: case 8086: case 8087: case 8096: case 8097:
case 8098: case 8099: case 8100: case 8101: case 8102: case 8103:
lc = (utf32 + 8);
break;
case 8017: case 8018: case 8019: case 8020: case 8021: case 8022: case 8023:
if (((utf32) % 2 == 1)) {
lc = (utf32 + 8);
}
break;
case 8560: case 8561: case 8562: case 8563: case 8564: case 8565: case 8566: case 8567: case 8568: case 8569:
case 8570: case 8571: case 8572: case 8573: case 8574: case 8575:
lc = (utf32 - 16);
break;
case 9424: case 9425: case 9426: case 9427: case 9428: case 9429: case 9430: case 9431: case 9432: case 9433:
case 9434: case 9435: case 9436: case 9437: case 9438: case 9439: case 9440: case 9441: case 9442: case 9443: case 9444: case 9445:
case 9446: case 9447: case 9448: case 9449:
lc = (utf32 - 26);
break;
case 7835:
lc = 7776;
break;
case 8048:
lc = 8122;
break;
case 8049:
lc = 8123;
break;
case 8050:
lc = 8136;
break;
case 8051:
lc = 8137;
break;
case 8052:
lc = 8138;
break;
case 8053:
lc = 8139;
break;
case 8054:
lc = 8154;
break;
case 8055:
lc = 8155;
break;
case 8056:
lc = 8184;
break;
case 8057:
lc = 8185;
break;
case 8058:
lc = 8170;
break;
case 8059:
lc = 8171;
break;
case 8060:
lc = 8186;
break;
case 8061:
lc = 8187;
break;
case 8112:
lc = 8120;
break;
case 8113:
lc = 8121;
break;
case 8115:
lc = 8124;
break;
case 8126:
lc = 921;
break;
case 8131:
lc = 8140;
break;
case 8144:
lc = 8152;
break;
case 8145:
lc = 8153;
break;
case 8160:
lc = 8168;
break;
case 8161:
lc = 8169;
break;
case 8165:
lc = 8172;
break;
case 8179:
lc = 8188;
break;
default:
break;
}
} else {
switch (utf32) {
case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104: case 105: case 106:
case 107: case 108: case 109: case 110: case 111: case 112: case 113: case 114: case 115: case 116: case 117: case 118:
case 119: case 120: case 121: case 122: case 224: case 225: case 226: case 227: case 228: case 229: case 230:
case 231: case 232: case 233: case 234: case 235: case 236: case 237: case 238: case 239: case 240: case 241: case 242:
case 243: case 244: case 245: case 246: case 247: case 248: case 249: case 250: case 251: case 252: case 253: case 254:
case 945: case 946: case 947: case 948: case 949: case 950: case 951: case 952: case 953: case 954: case 955:
case 956: case 957: case 958: case 959: case 960: case 961: case 963: case 964: case 965: case 966: case 967:
case 968: case 969: case 970: case 971: case 1072: case 1073: case 1074: case 1075: case 1076: case 1077: case 1078:
case 1079: case 1080: case 1081: case 1082: case 1083: case 1084: case 1085: case 1086: case 1087: case 1088: case 1089: case 1090:
case 1091: case 1092: case 1093: case 1094: case 1095: case 1096: case 1097: case 1098: case 1099: case 1100: case 1101: case 1102:
case 1103:
if ((utf32 != 247)) {
lc = (utf32 - 32);
}
break;
case 257: case 258: case 259: case 260: case 261: case 262: case 263: case 264: case 265: case 266:
case 267: case 268: case 269: case 270: case 271: case 272: case 273: case 274: case 275: case 276: case 277: case 278:
case 279: case 280: case 281: case 282: case 283: case 284: case 285: case 286: case 287: case 288: case 289: case 290:
case 291: case 292: case 293: case 294: case 295: case 296: case 297: case 298: case 299: case 300: case 301: case 302:
case 303: case 304: case 306: case 307: case 308: case 309: case 310: case 311: case 331: case 332:
case 333: case 334: case 335: case 336: case 337: case 338: case 339: case 340: case 341: case 342: case 343: case 344:
case 345: case 346: case 347: case 348: case 349: case 350: case 351: case 352: case 353: case 354: case 355: case 356:
case 357: case 358: case 359: case 360: case 361: case 362: case 363: case 364: case 365: case 366: case 367: case 368:
case 369: case 370: case 371: case 372: case 373: case 374: case 375: case 417: case 418: case 419: case 420:
case 421: case 481: case 482: case 483: case 484: case 485: case 486: case 487: case 488: case 489: case 490:
case 491: case 492: case 493: case 494: case 495: case 507: case 508: case 509: case 510: case 511:
case 513: case 514: case 515: case 516: case 517: case 518: case 519: case 520: case 521: case 522: case 523: case 524:
case 525: case 526: case 527: case 528: case 529: case 530: case 531: case 532: case 533: case 534: case 535: case 536:
case 537: case 538: case 539: case 540: case 541: case 542: case 543: case 544: case 546: case 547: case 548:
case 549: case 550: case 551: case 552: case 553: case 554: case 555: case 556: case 557: case 558: case 559: case 560:
case 561: case 562: case 563: case 985: case 986: case 987: case 988: case 989: case 990: case 991: case 992:
case 993: case 994: case 995: case 996: case 997: case 998: case 999: case 1000: case 1001: case 1002: case 1003: case 1004:
case 1005: case 1006: case 1007: case 1121: case 1122: case 1123: case 1124: case 1125: case 1126: case 1127: case 1128:
case 1129: case 1130: case 1131: case 1132: case 1133: case 1134: case 1135: case 1136: case 1137: case 1138: case 1139: case 1140:
case 1141: case 1142: case 1143: case 1144: case 1145: case 1146: case 1147: case 1148: case 1149: case 1150: case 1151: case 1152:
case 1153: case 1163: case 1164: case 1165: case 1166: case 1167: case 1168: case 1169: case 1170: case 1171: case 1172:
case 1173: case 1174: case 1175: case 1176: case 1177: case 1178: case 1179: case 1180: case 1181: case 1182: case 1183: case 1184:
case 1185: case 1186: case 1187: case 1188: case 1189: case 1190: case 1191: case 1192: case 1193: case 1194: case 1195: case 1196:
case 1197: case 1198: case 1199: case 1200: case 1201: case 1202: case 1203: case 1204: case 1205: case 1206: case 1207: case 1208:
case 1209: case 1210: case 1211: case 1212: case 1213: case 1214: case 1215: case 1233: case 1234: case 1235: case 1236:
case 1237: case 1238: case 1239: case 1240: case 1241: case 1242: case 1243: case 1244: case 1245: case 1246: case 1247: case 1248:
case 1249: case 1250: case 1251: case 1252: case 1253: case 1254: case 1255: case 1256: case 1257: case 1258: case 1259: case 1260:
case 1261: case 1262: case 1263: case 1264: case 1265: case 1266: case 1267: case 1268: case 1269: case 1281: case 1282:
case 1283: case 1284: case 1285: case 1286: case 1287: case 1288: case 1289: case 1290: case 1291: case 1292: case 1293: case 1294:
case 1295:
if (((utf32) % 2 == 1)) {
lc = (utf32 - 1);
}
break;
case 314: case 315: case 316: case 317: case 318: case 319: case 320: case 321: case 322: case 323:
case 324: case 325: case 326: case 327: case 328: case 378: case 379: case 380: case 381: case 382:
case 464: case 465: case 466: case 467: case 468: case 469: case 470: case 471: case 472: case 473: case 474: case 475:
case 476: case 1218: case 1219: case 1220: case 1221: case 1222: case 1223: case 1224: case 1225: case 1226: case 1227:
case 1228: case 1229: case 1230:
if ( !(((utf32) % 2 == 1))) {
lc = (utf32 - 1);
}
break;
case 1104: case 1105: case 1106: case 1107: case 1108: case 1109: case 1110: case 1111: case 1112: case 1113:
case 1114: case 1115: case 1116: case 1117: case 1118: case 1119:
lc = (utf32 - 80);
break;
case 1377: case 1378: case 1379: case 1380: case 1381: case 1382: case 1383: case 1384: case 1385: case 1386:
case 1387: case 1388: case 1389: case 1390: case 1391: case 1392: case 1393: case 1394: case 1395: case 1396: case 1397: case 1398:
case 1399: case 1400: case 1401: case 1402: case 1403: case 1404: case 1405: case 1406: case 1407: case 1408: case 1409: case 1410:
case 1411: case 1412: case 1413: case 1414:
lc = (utf32 - 48);
break;
case 181:
lc = 924;
break;
case 255:
lc = 376;
break;
case 305:
lc = 73;
break;
case 383:
lc = 83;
break;
case 387:
lc = 386;
break;
case 389:
lc = 388;
break;
case 392:
lc = 391;
break;
case 396:
lc = 395;
break;
case 402:
lc = 401;
break;
case 405:
lc = 502;
break;
case 409:
lc = 408;
break;
case 414:
lc = 544;
break;
case 424:
lc = 423;
break;
case 429:
lc = 428;
break;
case 432:
lc = 431;
break;
case 436:
lc = 435;
break;
case 438:
lc = 437;
break;
case 441:
lc = 440;
break;
case 445:
lc = 444;
break;
case 447:
lc = 503;
break;
case 453:
lc = 452;
break;
case 454:
lc = 452;
break;
case 456:
lc = 455;
break;
case 457:
lc = 455;
break;
case 459:
lc = 458;
break;
case 460:
lc = 458;
break;
case 462:
lc = 461;
break;
case 477:
lc = 398;
break;
case 479:
lc = 478;
break;
case 498:
lc = 497;
break;
case 499:
lc = 497;
break;
case 501:
lc = 500;
break;
case 505:
lc = 504;
break;
case 595:
lc = 385;
break;
case 596:
lc = 390;
break;
case 598:
lc = 393;
break;
case 599:
lc = 394;
break;
case 601:
lc = 399;
break;
case 603:
lc = 400;
break;
case 608:
lc = 403;
break;
case 611:
lc = 404;
break;
case 616:
lc = 407;
break;
case 617:
lc = 406;
break;
case 623:
lc = 412;
break;
case 626:
lc = 413;
break;
case 629:
lc = 415;
break;
case 640:
lc = 422;
break;
case 643:
lc = 425;
break;
case 648:
lc = 430;
break;
case 650:
lc = 433;
break;
case 651:
lc = 434;
break;
case 658:
lc = 439;
break;
case 837:
lc = 921;
break;
case 940:
lc = 902;
break;
case 941:
lc = 904;
break;
case 942:
lc = 905;
break;
case 943:
lc = 906;
break;
case 962:
lc = 931;
break;
case 972:
lc = 908;
break;
case 973:
lc = 910;
break;
case 974:
lc = 911;
break;
case 976:
lc = 914;
break;
case 977:
lc = 920;
break;
case 981:
lc = 934;
break;
case 982:
lc = 928;
break;
case 1008:
lc = 922;
break;
case 1009:
lc = 929;
break;
case 1010:
lc = 1017;
break;
case 1013:
lc = 917;
break;
case 1016:
lc = 1015;
break;
case 1019:
lc = 1018;
break;
case 1273:
lc = 1272;
break;
default:
break;
}
}
return lc;
}
/**
* Gets the UTF8 character 'utf8char' from the UTF8 string 'utf8str' from
* position 'pos'
* @param utf8str: utf8 string
* @param pos: position from where the utf8 character is copied
* (also output set as position of the next utf8 character in the utf8 string)
* @param utf8char: zero terminated utf8 character containing 1 to 4 bytes (output)
*/
static void picobase_get_utf8char (picoos_uint8 utf8[], picoos_int32 * pos, picobase_utf8char utf8char)
{
int i;
int l;
utf8char[0] = 0;
l = picobase_det_utf8_length(utf8[*pos]);
i = 0;
while ((((i < l) && (i < PICOBASE_UTF8_MAXLEN)) && (utf8[*pos] != 0))) {
utf8char[i] = utf8[*pos];
(*pos)++;
i++;
}
utf8char[i] = 0;
}
picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmax,
picoos_uint32 *pos,
picobase_utf8char utf8char) {
picoos_uint8 i;
picoos_uint8 len;
picoos_uint32 poscnt;
utf8char[0] = 0;
len = picobase_det_utf8_length(utf8s[*pos]);
if ((((*pos) + len) > utf8slenmax) ||
(len > PICOBASE_UTF8_MAXLEN)) {
return FALSE;
}
poscnt = *pos;
i = 0;
while ((i < len) && (utf8s[poscnt] != 0)) {
utf8char[i] = utf8s[poscnt];
poscnt++;
i++;
}
utf8char[i] = 0;
if ((i < len) && (utf8s[poscnt] == 0)) {
return FALSE;
}
*pos = poscnt;
return TRUE;
}
picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmax,
picoos_uint32 *pos) {
picoos_uint8 i;
picoos_uint8 len;
picoos_uint32 poscnt;
len = picobase_det_utf8_length(utf8s[*pos]);
if ((((*pos) + len) > utf8slenmax) ||
(len > PICOBASE_UTF8_MAXLEN)){
return FALSE;
}
poscnt = *pos;
i = 0;
while ((i < len) && (utf8s[poscnt] != 0)) {
poscnt++;
i++;
}
if ((i < len) && (utf8s[poscnt] == 0)) {
return FALSE;
}
*pos = poscnt;
return TRUE;
}
picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmin,
picoos_uint32 *pos,
picobase_utf8char utf8char) {
picoos_uint8 i, j;
picoos_uint8 len;
picoos_uint32 poscnt;
utf8char[0] = 0;
if ((*pos) == 0) {
return FALSE;
}
poscnt = (*pos) - 1;
i = 1;
while ((i <= PICOBASE_UTF8_MAXLEN) && (poscnt >= utf8slenmin) &&
(utf8s[poscnt] != 0)) {
len = picobase_det_utf8_length(utf8s[poscnt]);
if (len == i) {
for (j = 0; j < len; j++) {
utf8char[j] = utf8s[poscnt + j];
}
utf8char[j] = 0;
*pos = poscnt;
return TRUE;
}
i++;
poscnt--;
}
return FALSE;
}
picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
const picoos_uint32 utf8slenmin,
picoos_uint32 *pos) {
picoos_uint8 i;
picoos_uint8 len;
picoos_uint32 poscnt;
if ((*pos) == 0) {
return FALSE;
}
poscnt = (*pos) - 1;
i = 1;
while ((i <= PICOBASE_UTF8_MAXLEN) && (poscnt >= utf8slenmin) &&
(utf8s[poscnt] != 0)) {
len = picobase_det_utf8_length(utf8s[poscnt]);
if (len == i) {
*pos = poscnt;
return TRUE;
}
i++;
poscnt--;
}
return FALSE;
}
/**
* Converts utf8 input to utf32
* @param utf8[] : character encoded in utf8
* @param done : boolean indicating the completion of the operation (FALSE: conversion not done)
* @return a single character encoded in UTF32
*/
static picobase_utf32 picobase_utf8_to_utf32 (picoos_uint8 utf8[], picoos_uint8 * done)
{
(*done) = TRUE;
if ((utf8[0] < (picoos_uint8)'\200')) {
return utf8[0];
} else if ((utf8[0] >= (picoos_uint8)'\370')) {
return 0;
} else if ((utf8[0] >= (picoos_uint8)'\360')) {
return ((((262144 * (utf8[0] % 8)) + (4096 * (utf8[1] % 64))) + (64 * (utf8[2] % 64))) + (utf8[3] % 64));
} else if ((utf8[0] >= (picoos_uint8)'\340')) {
return (((4096 * (utf8[0] % 16)) + (64 * (utf8[1] % 64))) + (utf8[2] % 64));
} else if ((utf8[(0)] >= (picoos_uint8)'\300')) {
return ((64 * (utf8[0] % 32)) + (utf8[1] % 64));
} else {
(*done) = FALSE;
return 0;
}
}
static picoos_int32 picobase_utf32_to_utf8 (picobase_utf32 utf32, picobase_utf8 utf8[], picoos_int32 utf8MaxLen, picoos_uint8 * done)
{
picoos_int32 len;
(*done) = TRUE;
if (utf8MaxLen >= 4) {
if (utf32 < 128) {
len = 1;
utf8[0] = utf32;
} else if (utf32 < 2048) {
len = 2;
utf8[1] = (128 + (utf32 % 64));
utf32 = (utf32 / 64);
utf8[0] = (192 + (utf32 % 32));
} else if (utf32 < 65536) {
len = 3;
utf8[2] = (128 + (utf32 % 64));
utf32 = (utf32 / 64);
utf8[1] = (128 + (utf32 % 64));
utf32 = (utf32 / 64);
utf8[0] = (224 + utf32);
} else if (utf32 < 1048576) {
len = 4;
utf8[3] = (128 + (utf32 % 64));
utf32 = (utf32 / 64);
utf8[2] = (128 + (utf32 % 64));
utf32 = (utf32 / 64);
utf8[1] = (128 + (utf32 % 64));
utf32 = (utf32 / 64);
utf8[0] = (240 + utf32);
} else {
(*done) = FALSE;
return 0;
}
if (len <= (utf8MaxLen-1)) {
utf8[len] = 0;
}
return len;
} else {
(*done) = FALSE;
return 0;
}
}
extern picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], int lowercaseMaxLen, picoos_uint8 * done)
{
picobase_utf8char utf8char;
picoos_int32 i;
picoos_int32 j;
picoos_int32 k;
picoos_int32 l;
picobase_utf32 utf32;
picoos_uint8 done1;
k = 0;
i = 0;
(*done) = TRUE;
while (utf8str[i] != 0) {
picobase_get_utf8char(utf8str,& i,utf8char);
utf32 = picobase_utf8_to_utf32(utf8char, & done1);
utf32 = base_utf32_lowercase(utf32);
l = picobase_utf32_to_utf8(utf32, utf8char, PICOBASE_UTF8_MAXLEN, & done1);
j = 0;
while ((j < l) && (k < (lowercaseMaxLen-1))) {
lowercase[k] = utf8char[j];
k++;
j++;
}
*done = *done && (j == l);
}
lowercase[k] = 0;
return k;
}
extern picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done)
{
picobase_utf8char utf8char;
picoos_int32 i;
picoos_int32 j;
picoos_int32 k;
picoos_int32 l;
picobase_utf32 utf32;
picoos_uint8 done1;
k = 0;
i = 0;
(*done) = TRUE;
while (utf8str[i] != 0) {
picobase_get_utf8char(utf8str,& i,utf8char);
utf32 = picobase_utf8_to_utf32(utf8char, & done1);
utf32 = base_utf32_uppercase(utf32);
l = picobase_utf32_to_utf8(utf32, utf8char, PICOBASE_UTF8_MAXLEN, & done1);
j = 0;
while ((j < l) && (k < (uppercaseMaxLen-1))) {
uppercase[k] = utf8char[j];
k++;
j++;
}
*done = *done && (j == l);
}
uppercase[k] = 0;
return k;
}
extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar utf8str[], picoos_int32 utf8strmaxlen)
{
picobase_utf8char utf8char;
picoos_int32 i;
picoos_uint32 utf32;
picoos_bool done;
picoos_bool isUpperCase;
isUpperCase = TRUE;
i = 0;
while (isUpperCase && (i <= utf8strmaxlen-1) && (utf8str[i] != 0)) {
picobase_get_utf8char(utf8str,& i,utf8char);
utf32 = picobase_utf8_to_utf32(utf8char,& done);
isUpperCase = isUpperCase && (utf32 == base_utf32_uppercase(utf32));
}
return isUpperCase;
}
extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar utf8str[], picoos_int32 utf8strmaxlen)
{
picobase_utf8char utf8char;
picoos_int32 i;
picoos_uint32 utf32;
picoos_bool done;
picoos_bool isLowerCase;
isLowerCase = TRUE;
i = 0;
while (isLowerCase && (i <= utf8strmaxlen-1) && (utf8str[i] != 0)) {
picobase_get_utf8char(utf8str,& i,utf8char);
utf32 = picobase_utf8_to_utf32(utf8char,& done);
isLowerCase = isLowerCase && (utf32 == base_utf32_lowercase(utf32));
}
return isLowerCase;
}
#ifdef __cplusplus
}
#endif
/* end */
|