diff -aur abiword-0.99.1~/abi/src/af/util/xp/ut_Encoding.cpp abiword-0.99.1/abi/src/af/util/xp/ut_Encoding.cpp --- abiword-0.99.1~/abi/src/af/util/xp/ut_Encoding.cpp Sat Aug 11 02:32:38 2001 +++ abiword-0.99.1/abi/src/af/util/xp/ut_Encoding.cpp Thu Jan 17 12:55:13 2002 @@ -46,7 +46,7 @@ // Another approach is to do these tests in an external program which // outputs the C++ code for the following table. // -// TODO Note that certain operations in Abiword currently try to open or +// TODO Note that certain operations in AbiWord currently try to open or // TODO compare certain encodings via hard-coded names. This should be // TODO discouraged and replaced with names derived as in these tables. // @@ -54,6 +54,7 @@ static XML_Char * enc_armscii[] = {"ARMSCII-8",0}; static XML_Char * enc_big5[] = {"BIG5","BIG-5","BIG-FIVE","BIGFIVE","CN-BIG5",0}; +static XML_Char * enc_big5hkscs[] = {"BIG5-HKSCS","BIG5HKSCS",0}; static XML_Char * enc_cp874[] = {"CP874",0}; static XML_Char * enc_cp932[] = {"CP932",0}; static XML_Char * enc_cp936[] = {"CP936","GBK",0}; @@ -133,6 +134,7 @@ //the property value, the localised translation, the numerical id {enc_armscii, NULL, XAP_STRING_ID_ENC_ARME_ARMSCII}, {enc_big5, NULL, XAP_STRING_ID_ENC_CHTR_BIG5}, + {enc_big5hkscs, NULL, XAP_STRING_ID_ENC_CHTR_BIG5HKSCS}, {enc_cp874, NULL, XAP_STRING_ID_ENC_THAI_WIN}, {enc_cp932, NULL, XAP_STRING_ID_ENC_JAPN_WIN}, {enc_cp936, NULL, XAP_STRING_ID_ENC_CHSI_WIN}, diff -aur abiword-0.99.1~/abi/src/af/xap/xp/xap_EncodingManager.cpp abiword-0.99.1/abi/src/af/xap/xp/xap_EncodingManager.cpp --- abiword-0.99.1~/abi/src/af/xap/xp/xap_EncodingManager.cpp Wed Jan 16 10:00:07 2002 +++ abiword-0.99.1/abi/src/af/xap/xp/xap_EncodingManager.cpp Thu Jan 17 15:03:38 2002 @@ -476,11 +476,11 @@ {NULL,NULL}, {"ru","english,russian"}, - /* I'm not sure that this is correct, but my TeTex 0.9.17 works only + /* I'm not sure that this is correct, but my teTeX 0.9.17 works only this way (i.e. only with "russian" in the middle) - hvv */ {"uk","english,russian,ukrainian"}, - /* I'm not sure again - my TeTex 0.9.17 doesn't know 'byelorussian' + /* I'm not sure again - my teTeX 0.9.17 doesn't know 'byelorussian' language - hvv */ {"be","english,russian"}, {NULL,NULL} @@ -493,7 +493,7 @@ RUSSIAN_CHARSET). */ static const char* wincharsetcode_ru[]= /* russian charset */ -{ "ru","be", "uk" , NULL }; +{ "ru", "be", "uk" , NULL }; static const char* wincharsetcode_el[]= /* greek charset*/ { "el", NULL }; @@ -510,10 +510,10 @@ Tested with GB2312 only. */ static const char* wincharsetcode_zh_GB2312[]= /* chinese*/ -{ "zh_CN.GB2312", "zh_TW.GB2312", NULL }; +{ "zh_CN.GB2312", "zh_CN.GBK", "zh_CN.GB18030", NULL }; static const char* wincharsetcode_zh_BIG5[]= /* chinese*/ -{ "zh_CN.BIG5", "zh_TW.BIG5", NULL }; +{ "zh_TW.BIG5", "zh_HK.BIG5-HKSCS", NULL }; static const _rmap langcode_to_wincharsetcode[]= { @@ -528,7 +528,7 @@ {NULL} }; -static const UT_Bijection::pair_data zh_CN_big5[]= +static const UT_Bijection::pair_data zh_TW_big5[]= { /* This data was constructed from the HJ's patch for support of Big5 to @@ -541,20 +541,20 @@ {NULL,NULL} }; -static const char* zh_CN_big5_keys[]= -{ "zh_CN.BIG5", NULL }; +static const char* zh_TW_big5_keys[]= +{ "zh_TW.BIG5", NULL }; static const _rmap cjk_word_fontname_mapping_data[]= { {NULL}, - {(char*)zh_CN_big5,zh_CN_big5_keys}, + {(char*)zh_TW_big5,zh_TW_big5_keys}, {NULL} }; /*all CJK language codes should be listed here to be marked as CJK*/ static const char* cjk_languages[]= -{ "zh","ja","ko",NULL}; +{ "zh", "ja", "ko", NULL }; static const _rmap langcode_to_cjk[]= { @@ -585,16 +585,16 @@ {NULL,NULL}, // libiconv also lists "SHIFT_JIS", "SHIFT-JIS", "MS_KANJI", "csShiftJIS" {"CP932","SJIS"}, - {"CP936","GB2312"}, - {"CP950","BIG5"}, + {"CP936","GBK"}, + {"CP950","BIG5"}, {"CP1361","JOHAB"}, {NULL,NULL} }; /* - This table is only concern CJK RTF part.It is a reverse table of - MSCodepagename_to_charset_name_map.Iconv doesn't know some cpNNNN, - but M$Word know. + This table is only concern CJK RTF part. It is a reverse table of + MSCodepagename_to_charset_name_map. Iconv doesn't know some cpNNNN, + but M$Word knows. */ static const _map charset_name_to_MSCodepagename_map[]= { @@ -603,7 +603,10 @@ // libiconv also lists "SHIFT_JIS", "SHIFT-JIS", "MS_KANJI", "csShiftJIS" {"SJIS","CP932"}, {"GB2312","CP936"}, + {"GBK","CP936"}, + {"GB18030","CP936"}, {"BIG5","CP950"}, + {"BIG5-HKSCS","CP950"}, {"JOHAB","CP1361"}, {NULL,NULL} }; @@ -613,10 +616,11 @@ { /*key, value*/ {NULL}, - {"zh_CN.BIG5", "0x404"}, {"zh_CN.GB2312", "0x804"}, + {"zh_CN.GBK", "0x804"}, + {"zh_CN.GB18030", "0x804"}, + {"zh_HK.BIG5-HKSCS", "0x404"}, {"zh_TW.BIG5", "0x404"}, - {"zh_TW.GB2312", "0x804"}, {NULL} }; diff -aur abiword-0.99.1~/abi/src/af/xap/xp/xap_String_Id.h abiword-0.99.1/abi/src/af/xap/xp/xap_String_Id.h --- abiword-0.99.1~/abi/src/af/xap/xp/xap_String_Id.h Wed Jan 16 10:00:07 2002 +++ abiword-0.99.1/abi/src/af/xap/xp/xap_String_Id.h Thu Jan 17 13:01:50 2002 @@ -339,12 +339,13 @@ dcl(ENC_GEOR_PS, "Georgian, PS") /* Multibyte CJK */ /* Chinese Simplified */ -dcl(ENC_CHSI_EUC, "Chinese Simplified, EUC-CN") +dcl(ENC_CHSI_EUC, "Chinese Simplified, EUC-CN (GB2312)") dcl(ENC_CHSI_GB, "Chinese Simplified, GB_2312-80") // Cf. EUC dcl(ENC_CHSI_HZ, "Chinese Simplified, HZ") dcl(ENC_CHSI_WIN, "Chinese Simplified, Windows Code Page 936") /* Chinese Traditional */ dcl(ENC_CHTR_BIG5, "Chinese Traditional, BIG5") +dcl(ENC_CHTR_BIG5HKSCS, "Chinese Traditional, BIG5-HKSCS") dcl(ENC_CHTR_EUC, "Chinese Traditional, EUC-TW") dcl(ENC_CHTR_WIN, "Chinese Traditional, Windows Code Page 950") /* Japanese */ diff -aur abiword-0.99.1~/abi/src/wp/impexp/xp/ie_imp_RTF.cpp abiword-0.99.1/abi/src/wp/impexp/xp/ie_imp_RTF.cpp --- abiword-0.99.1~/abi/src/wp/impexp/xp/ie_imp_RTF.cpp Tue Jan 8 20:08:08 2002 +++ abiword-0.99.1/abi/src/wp/impexp/xp/ie_imp_RTF.cpp Thu Jan 17 12:58:58 2002 @@ -920,7 +920,7 @@ break; // 936 Chinese: Simplified case 936: - CPNAME_OR_FALLBACK(m_szEncoding,"CP936","GB2312"); + CPNAME_OR_FALLBACK(m_szEncoding,"CP936","GBK"); break; // 950 Chinese: Traditional case 950: @@ -977,7 +977,7 @@ m_szEncoding = "CP1361"; break; case 134: // Chinese GB - undocumented? - CPNAME_OR_FALLBACK(m_szEncoding,"CP936","GB2312"); + CPNAME_OR_FALLBACK(m_szEncoding,"CP936","GBK"); break; case 136: // Chinese BIG5 - undocumented? CPNAME_OR_FALLBACK(m_szEncoding,"CP950","BIG5"); diff -aur abiword-0.99.1~/wv/text.c abiword-0.99.1/wv/text.c --- abiword-0.99.1~/wv/text.c Thu Dec 27 23:55:49 2001 +++ abiword-0.99.1/wv/text.c Thu Jan 17 13:36:27 2002 @@ -592,11 +592,12 @@ switch (lid) { #if 0 - case 0x0c04: /*Chinese (Hong Kong SAR, PRC) */ case 0x1404: /*Chinese (Macau SAR) */ #endif + case 0x0c04: /*Chinese (Hong Kong SAR, PRC) */ + CPNAME_OR_FALLBACK ("CP950", "BIG5-HKSCS"); case 0x0804: /*Chinese (PRC) */ - CPNAME_OR_FALLBACK ("CP936", "GB2312"); + CPNAME_OR_FALLBACK ("CP936", "GBK"); #if 0 case 0x1004: /*Chinese (Singapore) */ #endif