Index: src/af/util/xp/ut_string.cpp =================================================================== RCS file: /cvsroot/abi/src/af/util/xp/ut_string.cpp,v retrieving revision 1.93 diff -u -b -r1.93 ut_string.cpp --- ut_string.cpp 5 Feb 2003 01:44:18 -0000 1.93 +++ ut_string.cpp 22 Jul 2003 19:35:04 -0000 @@ -568,6 +568,169 @@ return i; } +typedef struct +{ + UT_UCSChar rangeStart; + UT_UCSChar rangeEnd; +} tableEntry; + +tableEntry alphabeticTable[] = { // Unicode 4.0.0 + // uniset +cat=Lm +cat=Lo +cat=Lu +cat=Ll +cat=Lt +cat=Mn +cat=Me +cat=Mc c + { 0x0041, 0x005A }, { 0x0061, 0x007A }, { 0x00AA, 0x00AA }, + { 0x00B5, 0x00B5 }, { 0x00BA, 0x00BA }, { 0x00C0, 0x00D6 }, + { 0x00D8, 0x00F6 }, { 0x00F8, 0x0236 }, { 0x0250, 0x02C1 }, + { 0x02C6, 0x02D1 }, { 0x02E0, 0x02E4 }, { 0x02EE, 0x02EE }, + { 0x0300, 0x0357 }, { 0x035D, 0x036F }, { 0x037A, 0x037A }, + { 0x0386, 0x0386 }, { 0x0388, 0x038A }, { 0x038C, 0x038C }, + { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03F5 }, + { 0x03F7, 0x03FB }, { 0x0400, 0x0481 }, { 0x0483, 0x0486 }, + { 0x0488, 0x04CE }, { 0x04D0, 0x04F5 }, { 0x04F8, 0x04F9 }, + { 0x0500, 0x050F }, { 0x0531, 0x0556 }, { 0x0559, 0x0559 }, + { 0x0561, 0x0587 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, + { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C4 }, { 0x05D0, 0x05EA }, { 0x05F0, 0x05F2 }, + { 0x0610, 0x0615 }, { 0x0621, 0x063A }, { 0x0640, 0x0658 }, + { 0x066E, 0x06D3 }, { 0x06D5, 0x06DC }, { 0x06DE, 0x06E8 }, + { 0x06EA, 0x06EF }, { 0x06FA, 0x06FC }, { 0x06FF, 0x06FF }, + { 0x0710, 0x074A }, { 0x074D, 0x074F }, { 0x0780, 0x07B1 }, + { 0x0901, 0x0939 }, { 0x093C, 0x094D }, { 0x0950, 0x0954 }, + { 0x0958, 0x0963 }, { 0x0981, 0x0983 }, { 0x0985, 0x098C }, + { 0x098F, 0x0990 }, { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, + { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 }, { 0x09BC, 0x09C4 }, + { 0x09C7, 0x09C8 }, { 0x09CB, 0x09CD }, { 0x09D7, 0x09D7 }, + { 0x09DC, 0x09DD }, { 0x09DF, 0x09E3 }, { 0x09F0, 0x09F1 }, + { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, + { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, + { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A3C, 0x0A3C }, + { 0x0A3E, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E }, { 0x0A70, 0x0A74 }, + { 0x0A81, 0x0A83 }, { 0x0A85, 0x0A8D }, { 0x0A8F, 0x0A91 }, + { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, + { 0x0AB5, 0x0AB9 }, { 0x0ABC, 0x0AC5 }, { 0x0AC7, 0x0AC9 }, + { 0x0ACB, 0x0ACD }, { 0x0AD0, 0x0AD0 }, { 0x0AE0, 0x0AE3 }, + { 0x0B01, 0x0B03 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, + { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, + { 0x0B35, 0x0B39 }, { 0x0B3C, 0x0B43 }, { 0x0B47, 0x0B48 }, + { 0x0B4B, 0x0B4D }, { 0x0B56, 0x0B57 }, { 0x0B5C, 0x0B5D }, + { 0x0B5F, 0x0B61 }, { 0x0B71, 0x0B71 }, { 0x0B82, 0x0B83 }, + { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, + { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F }, + { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 }, + { 0x0BB7, 0x0BB9 }, { 0x0BBE, 0x0BC2 }, { 0x0BC6, 0x0BC8 }, + { 0x0BCA, 0x0BCD }, { 0x0BD7, 0x0BD7 }, { 0x0C01, 0x0C03 }, + { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 }, + { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C3E, 0x0C44 }, + { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, + { 0x0C60, 0x0C61 }, { 0x0C82, 0x0C83 }, { 0x0C85, 0x0C8C }, + { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, + { 0x0CB5, 0x0CB9 }, { 0x0CBC, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, + { 0x0CCA, 0x0CCD }, { 0x0CD5, 0x0CD6 }, { 0x0CDE, 0x0CDE }, + { 0x0CE0, 0x0CE1 }, { 0x0D02, 0x0D03 }, { 0x0D05, 0x0D0C }, + { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 }, { 0x0D2A, 0x0D39 }, + { 0x0D3E, 0x0D43 }, { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, + { 0x0D57, 0x0D57 }, { 0x0D60, 0x0D61 }, { 0x0D82, 0x0D83 }, + { 0x0D85, 0x0D96 }, { 0x0D9A, 0x0DB1 }, { 0x0DB3, 0x0DBB }, + { 0x0DBD, 0x0DBD }, { 0x0DC0, 0x0DC6 }, { 0x0DCA, 0x0DCA }, + { 0x0DCF, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0DD8, 0x0DDF }, + { 0x0DF2, 0x0DF3 }, { 0x0E01, 0x0E3A }, { 0x0E40, 0x0E4E }, + { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E88 }, + { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, { 0x0E94, 0x0E97 }, + { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, { 0x0EA5, 0x0EA5 }, + { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAB }, { 0x0EAD, 0x0EB9 }, + { 0x0EBB, 0x0EBD }, { 0x0EC0, 0x0EC4 }, { 0x0EC6, 0x0EC6 }, + { 0x0EC8, 0x0ECD }, { 0x0EDC, 0x0EDD }, { 0x0F00, 0x0F00 }, + { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, + { 0x0F39, 0x0F39 }, { 0x0F3E, 0x0F47 }, { 0x0F49, 0x0F6A }, + { 0x0F71, 0x0F84 }, { 0x0F86, 0x0F8B }, { 0x0F90, 0x0F97 }, + { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x1000, 0x1021 }, + { 0x1023, 0x1027 }, { 0x1029, 0x102A }, { 0x102C, 0x1032 }, + { 0x1036, 0x1039 }, { 0x1050, 0x1059 }, { 0x10A0, 0x10C5 }, + { 0x10D0, 0x10F8 }, { 0x1100, 0x1159 }, { 0x115F, 0x11A2 }, + { 0x11A8, 0x11F9 }, { 0x1200, 0x1206 }, { 0x1208, 0x1246 }, + { 0x1248, 0x1248 }, { 0x124A, 0x124D }, { 0x1250, 0x1256 }, + { 0x1258, 0x1258 }, { 0x125A, 0x125D }, { 0x1260, 0x1286 }, + { 0x1288, 0x1288 }, { 0x128A, 0x128D }, { 0x1290, 0x12AE }, + { 0x12B0, 0x12B0 }, { 0x12B2, 0x12B5 }, { 0x12B8, 0x12BE }, + { 0x12C0, 0x12C0 }, { 0x12C2, 0x12C5 }, { 0x12C8, 0x12CE }, + { 0x12D0, 0x12D6 }, { 0x12D8, 0x12EE }, { 0x12F0, 0x130E }, + { 0x1310, 0x1310 }, { 0x1312, 0x1315 }, { 0x1318, 0x131E }, + { 0x1320, 0x1346 }, { 0x1348, 0x135A }, { 0x13A0, 0x13F4 }, + { 0x1401, 0x166C }, { 0x166F, 0x1676 }, { 0x1681, 0x169A }, + { 0x16A0, 0x16EA }, { 0x1700, 0x170C }, { 0x170E, 0x1714 }, + { 0x1720, 0x1734 }, { 0x1740, 0x1753 }, { 0x1760, 0x176C }, + { 0x176E, 0x1770 }, { 0x1772, 0x1773 }, { 0x1780, 0x17B3 }, + { 0x17B6, 0x17D3 }, { 0x17D7, 0x17D7 }, { 0x17DC, 0x17DD }, + { 0x180B, 0x180D }, { 0x1820, 0x1877 }, { 0x1880, 0x18A9 }, + { 0x1900, 0x191C }, { 0x1920, 0x192B }, { 0x1930, 0x193B }, + { 0x1950, 0x196D }, { 0x1970, 0x1974 }, { 0x1D00, 0x1D6B }, + { 0x1E00, 0x1E9B }, { 0x1EA0, 0x1EF9 }, { 0x1F00, 0x1F15 }, + { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, { 0x1F48, 0x1F4D }, + { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 }, { 0x1F5B, 0x1F5B }, + { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D }, { 0x1F80, 0x1FB4 }, + { 0x1FB6, 0x1FBC }, { 0x1FBE, 0x1FBE }, { 0x1FC2, 0x1FC4 }, + { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, + { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, + { 0x2071, 0x2071 }, { 0x207F, 0x207F }, { 0x20D0, 0x20EA }, + { 0x2102, 0x2102 }, { 0x2107, 0x2107 }, { 0x210A, 0x2113 }, + { 0x2115, 0x2115 }, { 0x2119, 0x211D }, { 0x2124, 0x2124 }, + { 0x2126, 0x2126 }, { 0x2128, 0x2128 }, { 0x212A, 0x212D }, + { 0x212F, 0x2131 }, { 0x2133, 0x2139 }, { 0x213D, 0x213F }, + { 0x2145, 0x2149 }, { 0x3005, 0x3006 }, { 0x302A, 0x302F }, + { 0x3031, 0x3035 }, { 0x303B, 0x303C }, { 0x3041, 0x3096 }, + { 0x3099, 0x309A }, { 0x309D, 0x309F }, { 0x30A1, 0x30FA }, + { 0x30FC, 0x30FF }, { 0x3105, 0x312C }, { 0x3131, 0x318E }, + { 0x31A0, 0x31B7 }, { 0x31F0, 0x31FF }, { 0xA000, 0xA48C }, + { 0xF900, 0xFA2D }, { 0xFA30, 0xFA6A }, { 0xFB00, 0xFB06 }, + { 0xFB13, 0xFB17 }, { 0xFB1D, 0xFB28 }, { 0xFB2A, 0xFB36 }, + { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 }, + { 0xFB43, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3D }, + { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB }, + { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 }, { 0xFE70, 0xFE74 }, + { 0xFE76, 0xFEFC }, { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, + { 0xFF66, 0xFFBE }, { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, + { 0xFFD2, 0xFFD7 }, { 0xFFDA, 0xFFDC }, { 0x10000, 0x1000B }, + { 0x1000D, 0x10026 }, { 0x10028, 0x1003A }, { 0x1003C, 0x1003D }, + { 0x1003F, 0x1004D }, { 0x10050, 0x1005D }, { 0x10080, 0x100FA }, + { 0x10300, 0x1031E }, { 0x10330, 0x10349 }, { 0x10380, 0x1039D }, + { 0x10400, 0x1049D }, { 0x10800, 0x10805 }, { 0x10808, 0x10808 }, + { 0x1080A, 0x10835 }, { 0x10837, 0x10838 }, { 0x1083C, 0x1083C }, + { 0x1083F, 0x1083F }, { 0x1D165, 0x1D169 }, { 0x1D16D, 0x1D172 }, + { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D400, 0x1D454 }, { 0x1D456, 0x1D49C }, { 0x1D49E, 0x1D49F }, + { 0x1D4A2, 0x1D4A2 }, { 0x1D4A5, 0x1D4A6 }, { 0x1D4A9, 0x1D4AC }, + { 0x1D4AE, 0x1D4B9 }, { 0x1D4BB, 0x1D4BB }, { 0x1D4BD, 0x1D4C3 }, + { 0x1D4C5, 0x1D505 }, { 0x1D507, 0x1D50A }, { 0x1D50D, 0x1D514 }, + { 0x1D516, 0x1D51C }, { 0x1D51E, 0x1D539 }, { 0x1D53B, 0x1D53E }, + { 0x1D540, 0x1D544 }, { 0x1D546, 0x1D546 }, { 0x1D54A, 0x1D550 }, + { 0x1D552, 0x1D6A3 }, { 0x1D6A8, 0x1D6C0 }, { 0x1D6C2, 0x1D6DA }, + { 0x1D6DC, 0x1D6FA }, { 0x1D6FC, 0x1D714 }, { 0x1D716, 0x1D734 }, + { 0x1D736, 0x1D74E }, { 0x1D750, 0x1D76E }, { 0x1D770, 0x1D788 }, + { 0x1D78A, 0x1D7A8 }, { 0x1D7AA, 0x1D7C2 }, { 0x1D7C4, 0x1D7C9 }, + { 0x2F800, 0x2FA1D }, { 0xE0100, 0xE01EF } +}; // alphabeticTable + +// the following code taken from Vim 6.1 source (mbyte.c); this code originally +// written by Raphael Finkel +// Return index in table if "c" is in "table[size]", else -1. +static int inTable(tableEntry *table, int length, UT_UCSChar c) { + int mid, bot, top; + /* first quick check for Latin1 etc. characters */ + if (c < table[0].rangeStart) return -1; + /* binary search in table */ + bot = 0; + top = length - 1; + while (top >= bot) { + mid = (bot + top) / 2; + if (table[mid].rangeEnd < c) + bot = mid + 1; + else if (table[mid].rangeStart > c) + top = mid - 1; + else + return mid; + } + return -1; +} // inTable + #ifdef ENABLE_UCS2_STRINGS /* * My personal strstr() implementation that beats most other algorithms. @@ -929,12 +1092,19 @@ return false; }; + bool UT_UCS2_isalpha(UT_UCS2Char c) { FriBidiCharType type = fribidi_get_type(c); return (FRIBIDI_IS_LETTER(type) != 0); }; +bool UT_UCS2_isalphaormark(UT_UCS2Char c) +{ + return (inTable(alphabeticTable, sizeof(alphabeticTable) / + sizeof(tableEntry), reinterpret_cast(c)) != -1) +}; + bool UT_UCS2_isSentenceSeparator(UT_UCS2Char c) { switch(c) @@ -1118,6 +1288,12 @@ { FriBidiCharType type = fribidi_get_type(c); return (FRIBIDI_IS_LETTER(type) != 0); +}; + +bool UT_UCS4_isalphaormark(UT_UCS4Char c) +{ + return (inTable(alphabeticTable, sizeof(alphabeticTable) / + sizeof(tableEntry), c) != -1); }; bool UT_UCS4_isSentenceSeparator(UT_UCS4Char c) Index: src/af/util/xp/ut_string.h =================================================================== RCS file: /cvsroot/abi/src/af/util/xp/ut_string.h,v retrieving revision 1.70 diff -u -b -r1.70 ut_string.h --- ut_string.h 3 Jul 2003 03:43:48 -0000 1.70 +++ ut_string.h 22 Jul 2003 19:35:04 -0000 @@ -159,6 +159,7 @@ ABI_EXPORT bool UT_UCS2_isupper(UT_UCS2Char c); ABI_EXPORT bool UT_UCS2_islower(UT_UCS2Char c); ABI_EXPORT bool UT_UCS2_isalpha(UT_UCS2Char c); +ABI_EXPORT bool UT_UCS2_isalphaormark(UT_UCS2Char c); ABI_EXPORT bool UT_UCS2_isSentenceSeparator(UT_UCS2Char c); #define UT_UCS2_isalnum(x) (UT_UCS2_isalpha(x) || UT_UCS2_isdigit(x)) // HACK: not UNICODE-safe ABI_EXPORT bool UT_UCS2_isspace(UT_UCS2Char c); @@ -202,6 +203,7 @@ ABI_EXPORT bool UT_UCS4_isupper(UT_UCS4Char c); ABI_EXPORT bool UT_UCS4_islower(UT_UCS4Char c); ABI_EXPORT bool UT_UCS4_isalpha(UT_UCS4Char c); +ABI_EXPORT bool UT_UCS4_isalphaormark(UT_UCS4Char c); ABI_EXPORT bool UT_UCS4_isSentenceSeparator(UT_UCS4Char c); #define UT_UCS4_isalnum(x) (UT_UCS4_isalpha(x) || UT_UCS4_isdigit(x)) // HACK: not UNICODE-safe ABI_EXPORT bool UT_UCS4_isspace(UT_UCS4Char c); Index: src/text/fmt/xp/fl_BlockLayout.cpp =================================================================== RCS file: /cvsroot/abi/src/text/fmt/xp/fl_BlockLayout.cpp,v retrieving revision 1.539 diff -u -b -r1.539 fl_BlockLayout.cpp --- text/fmt/xp/fl_BlockLayout.cpp 17 Jul 2003 02:56:54 -0000 1.539 +++ text/fmt/xp/fl_BlockLayout.cpp 22 Jul 2003 19:36:03 -0000 @@ -64,6 +64,7 @@ #include "xap_Clipboard.h" #include "ut_png.h" #include "ut_sleep.h" +#include "ut_string.h" #include "fg_Graphic.h" #include "ap_Prefs.h" #include "ap_Prefs_SchemeIds.h" @@ -7933,8 +7934,9 @@ // m_iWordOffset. // Ignore some initial characters - if (_ignoreFirstWordCharacter(m_pText[m_iWordOffset])) + while (_ignoreFirstWordCharacter(m_pText[m_iWordOffset])) { + fprintf(stdout, "ignoring first character: %04x\n", m_pText[m_iWordOffset]); m_iWordOffset++; } @@ -8020,8 +8022,9 @@ UT_uint32 iWordLength = iWordEnd - m_iWordOffset; // ignore some terminal characters - if (_ignoreLastWordCharacter(m_pText[m_iWordOffset + iWordLength - 1])) + while (_ignoreLastWordCharacter(m_pText[m_iWordOffset + iWordLength - 1])) { + fprintf(stdout, "ignoring last character: %04x\n", m_pText[m_iWordOffset + iWordLength - 1]); iWordLength--; } @@ -8269,6 +8272,8 @@ bool fl_BlockSpellIterator::_ignoreFirstWordCharacter(const UT_UCSChar c) const { + return (!UT_UCS4_isalphaormark(c)); +#if 0 // raphael switch (c) { case '\'': case '"': @@ -8278,11 +8283,14 @@ default: return false; } +#endif } bool fl_BlockSpellIterator::_ignoreLastWordCharacter(const UT_UCSChar c) const { + return (!UT_UCS4_isalphaormark(c)); +#if 0 // raphael switch (c) { case '\'': case '"': @@ -8292,4 +8300,5 @@ default: return false; } +#endif }