Re: Patch: Remember Text Encoding


Subject: Re: Patch: Remember Text Encoding
From: Andrew Dunbar (hippietrail@yahoo.com)
Date: Sun Jun 10 2001 - 08:24:41 CDT


Joaquin Cuenca Abela wrote:
>
> It looks good, just a little comment. Can you please
> change the const char array by a UT_String?
>
> Good work!
> Cheers,

Thanks Joaquin. Here's the revised version. It now also sets
the encoding when importing from RTF. This means you ought to
be able to load a Cyrillic RTF and save as plain text from an
English locale without your Cyrillic characters being lost.

Andrew.

> --- Andrew Dunbar <hippietrail@yahoo.com> wrote:
> > This is my patch to store the encoding of a document
> > between
> > opening and saving so you don't have to use the
> > encoding dialog
> > every save.
> >
> > This fixes bug 1466.
> >
> > Andrew Dunbar.
> >
> > --
> > http://linguaphile.sourceforge.net> Index:
> src/af/xap/xp/xad_Document.cpp
> >
> ===================================================================
> > RCS file:
> > /cvsroot/abi/src/af/xap/xp/xad_Document.cpp,v
> > retrieving revision 1.17
> > diff -u -r1.17 xad_Document.cpp
> > --- src/af/xap/xp/xad_Document.cpp 2001/05/25
> > 18:02:26 1.17
> > +++ src/af/xap/xp/xad_Document.cpp 2001/06/08
> > 23:31:56
> > @@ -28,6 +28,7 @@
> > {
> > m_iRefCount = 1;
> > m_szFilename = NULL;
> > + *m_szEncodingName = '\0';
> >
> > // TODO do we need to auto-increase the bucket
> > count,
> > // TODO if the ignore list gets long?
> > @@ -188,4 +189,22 @@
> > UT_ASSERT(m_pIgnoreList);
> >
> > return true;
> > +}
> > +
> > +// Document-wide Encoding name used for some file
> > formats (Text, RTF, HTML)
> > +
> > +void AD_Document::setEncodingName(const char
> > *szEncodingName)
> > +{
> > + if (szEncodingName == NULL)
> > + szEncodingName = "";
> > +
> > + UT_ASSERT(strlen(szEncodingName) < 16);
> > +
> > + strncpy(m_szEncodingName,szEncodingName,16);
> > + m_szEncodingName[15] = 0;
> > +}
> > +
> > +const char * AD_Document::getEncodingName(void)
> > const
> > +{
> > + return *m_szEncodingName ? m_szEncodingName : 0;
> > }
> > Index: src/af/xap/xp/xad_Document.h
> >
> ===================================================================
> > RCS file:
> > /cvsroot/abi/src/af/xap/xp/xad_Document.h,v
> > retrieving revision 1.17
> > diff -u -r1.17 xad_Document.h
> > --- src/af/xap/xp/xad_Document.h 2001/05/25 05:52:12
> > 1.17
> > +++ src/af/xap/xp/xad_Document.h 2001/06/08 23:31:56
> > @@ -60,11 +60,15 @@
> > bool enumIgnores(UT_uint32 k, const
> > UT_UCSChar ** pszWord) const;
> > bool clearIgnores(void);
> >
> > + void setEncodingName(const char *
> > szEncodingName);
> > + const char * getEncodingName(void) const;
> > +
> > protected:
> > virtual ~AD_Document(); // Use unref() instead.
> >
> > int m_iRefCount;
> > const char * m_szFilename;
> > + char m_szEncodingName[16];
> >
> > UT_HashTable * m_pIgnoreList;
> > };
> > Index: src/wp/impexp/xp/ie_exp_Text.cpp
> >
> ===================================================================
> > RCS file:
> > /cvsroot/abi/src/wp/impexp/xp/ie_exp_Text.cpp,v
> > retrieving revision 1.25
> > diff -u -r1.25 ie_exp_Text.cpp
> > --- src/wp/impexp/xp/ie_exp_Text.cpp 2001/06/07
> > 15:52:42 1.25
> > +++ src/wp/impexp/xp/ie_exp_Text.cpp 2001/06/08
> > 23:33:15
> > @@ -104,11 +104,18 @@
> > IE_Exp_Text::IE_Exp_Text(PD_Document * pDocument,
> > bool bEncoded)
> > : IE_Exp(pDocument)
> > {
> > + UT_ASSERT(pDocument);
> > +
> > + const char *szEncodingName =
> > pDocument->getEncodingName();
> > + if (!szEncodingName || !*szEncodingName)
> > + szEncodingName =
> >
> XAP_EncodingManager::get_instance()->getNativeEncodingName();
> > +
> > m_error = 0;
> > m_pListener = NULL;
> > m_bIsEncoded = bEncoded;
> > +
> > // TODO Use persistent document encoding when it
> > exists
> > -
> >
> _setEncoding(XAP_EncodingManager::get_instance()->getNativeEncodingName());
> > + _setEncoding(szEncodingName);
> > }
> >
> > /*!
> > @@ -255,6 +262,7 @@
> >
> > strcpy(szEnc,s);
> > _setEncoding((const char *)szEnc);
> > + m_pDocument->setEncodingName(szEnc);
> > }
> >
> > pDialogFactory->releaseDialog(pDialog);
> > Index: src/wp/impexp/xp/ie_imp_Text.cpp
> >
> ===================================================================
> > RCS file:
> > /cvsroot/abi/src/wp/impexp/xp/ie_imp_Text.cpp,v
> > retrieving revision 1.26
> > diff -u -r1.26 ie_imp_Text.cpp
> > --- src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/07
> > 15:52:42 1.26
> > +++ src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/08
> > 23:33:38
> > @@ -292,8 +292,9 @@
> >
> > UT_Error error;
> >
> > - // First we need to determine the encoding.
> > - X_CleanupIfError(error,_recognizeEncoding(fp));
> > + // First we try to determine the encoding.
> > + if (_recognizeEncoding(fp) == UT_OK)
> > + m_pDocument->setEncodingName(m_szEncoding);
> > X_CleanupIfError(error,_writeHeader(fp));
> > X_CleanupIfError(error,_parseFile(fp));
> >
> > @@ -319,9 +320,16 @@
> > IE_Imp_Text::IE_Imp_Text(PD_Document * pDocument,
> > bool bEncoded)
> > : IE_Imp(pDocument)
> > {
> > + UT_ASSERT(pDocument);
> > +
> > + const char *szEncodingName =
> > pDocument->getEncodingName();
> > + if (!szEncodingName || !*szEncodingName)
> > + szEncodingName =
> >
> XAP_EncodingManager::get_instance()->getNativeEncodingName();
> > +
> > m_bIsEncoded = bEncoded;
> > +
> > // TODO Use persistent document encoding when it
> > exists
> > -
> >
> _setEncoding(XAP_EncodingManager::get_instance()->getNativeEncodingName());
> > + _setEncoding(szEncodingName);
> > }
> >
> >
> >
> /*****************************************************************/
> > @@ -491,6 +499,7 @@
> >
> > strcpy(szEnc,s);
> > _setEncoding((const char *)szEnc);
> > + m_pDocument->setEncodingName(szEnc);
> > }
> >
> > pDialogFactory->releaseDialog(pDialog);
> >
>
> __________________________________________________
> Do You Yahoo!?
> Get personalized email addresses from Yahoo! Mail - only $35
> a year! http://personal.mail.yahoo.com/

-- 
http://linguaphile.sourceforge.net

Index: src/af/xap/xp/xad_Document.cpp =================================================================== RCS file: /cvsroot/abi/src/af/xap/xp/xad_Document.cpp,v retrieving revision 1.17 diff -u -r1.17 xad_Document.cpp --- src/af/xap/xp/xad_Document.cpp 2001/05/25 18:02:26 1.17 +++ src/af/xap/xp/xad_Document.cpp 2001/06/10 13:03:50 @@ -189,3 +189,18 @@ return true; } + +// Document-wide Encoding name used for some file formats (Text, RTF, HTML) + +void AD_Document::setEncodingName(const char *szEncodingName) +{ + if (szEncodingName == NULL) + szEncodingName = ""; + + m_szEncodingName = szEncodingName; +} + +const char * AD_Document::getEncodingName(void) const +{ + return m_szEncodingName.size() ? m_szEncodingName.c_str() : 0; +} Index: src/af/xap/xp/xad_Document.h =================================================================== RCS file: /cvsroot/abi/src/af/xap/xp/xad_Document.h,v retrieving revision 1.17 diff -u -r1.17 xad_Document.h --- src/af/xap/xp/xad_Document.h 2001/05/25 05:52:12 1.17 +++ src/af/xap/xp/xad_Document.h 2001/06/10 13:03:51 @@ -24,6 +24,7 @@ // TODO should the filename be UT_UCSChar rather than char ? #include "ut_types.h" +#include "ut_string_class.h" // fwd. decl. class UT_HashTable; @@ -60,11 +61,15 @@ bool enumIgnores(UT_uint32 k, const UT_UCSChar ** pszWord) const; bool clearIgnores(void); + void setEncodingName(const char * szEncodingName); + const char * getEncodingName(void) const; + protected: virtual ~AD_Document(); // Use unref() instead. int m_iRefCount; const char * m_szFilename; + UT_String m_szEncodingName; UT_HashTable * m_pIgnoreList; }; Index: src/wp/impexp/xp/ie_exp_Text.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_exp_Text.cpp,v retrieving revision 1.25 diff -u -r1.25 ie_exp_Text.cpp --- src/wp/impexp/xp/ie_exp_Text.cpp 2001/06/07 15:52:42 1.25 +++ src/wp/impexp/xp/ie_exp_Text.cpp 2001/06/10 13:04:47 @@ -104,11 +104,18 @@ IE_Exp_Text::IE_Exp_Text(PD_Document * pDocument, bool bEncoded) : IE_Exp(pDocument) { + UT_ASSERT(pDocument); + + const char *szEncodingName = pDocument->getEncodingName(); + if (!szEncodingName || !*szEncodingName) + szEncodingName = XAP_EncodingManager::get_instance()->getNativeEncodingName(); + m_error = 0; m_pListener = NULL; m_bIsEncoded = bEncoded; + // TODO Use persistent document encoding when it exists - _setEncoding(XAP_EncodingManager::get_instance()->getNativeEncodingName()); + _setEncoding(szEncodingName); } /*! @@ -255,6 +262,7 @@ strcpy(szEnc,s); _setEncoding((const char *)szEnc); + m_pDocument->setEncodingName(szEnc); } pDialogFactory->releaseDialog(pDialog); Index: src/wp/impexp/xp/ie_imp_RTF.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_RTF.cpp,v retrieving revision 1.69 diff -u -r1.69 ie_imp_RTF.cpp --- src/wp/impexp/xp/ie_imp_RTF.cpp 2001/06/05 15:25:55 1.69 +++ src/wp/impexp/xp/ie_imp_RTF.cpp 2001/06/10 13:05:03 @@ -532,6 +530,7 @@ { UT_VECTOR_PURGEALL(_rtfAbiListTable *,m_vecAbiListTable); } + m_mbtowc.setInCharset(XAP_EncodingManager::get_instance()->getNativeEncodingName()); } @@ -1837,13 +1838,17 @@ case 'a': if (strcmp((char*)pKeyword, "ansicpg") == 0) { - m_mbtowc.setInCharset(XAP_EncodingManager::get_instance()->charsetFromCodepage((UT_uint32)param)); + const char *szEncoding = XAP_EncodingManager::get_instance()->charsetFromCodepage((UT_uint32)param); + m_mbtowc.setInCharset(szEncoding); + m_pDocument->setEncodingName(szEncoding); return true; } else if (strcmp((char*)pKeyword, "ansi") == 0) { // this is charset Windows-1252 - m_mbtowc.setInCharset(XAP_EncodingManager::get_instance()->charsetFromCodepage(1252)); + const char *szEncoding = XAP_EncodingManager::get_instance()->charsetFromCodepage(1252); + m_mbtowc.setInCharset(szEncoding); + m_pDocument->setEncodingName(szEncoding); return true; } break; @@ -1989,7 +1994,10 @@ case 'm': if (strcmp((char *)pKeyword, "mac") == 0) { + // TODO some iconv's may have a different name - "MacRoman" + // TODO EncodingManager should handle encoding names m_mbtowc.setInCharset("MACINTOSH"); + m_pDocument->setEncodingName("MacRoman"); return true; } case 'o': Index: src/wp/impexp/xp/ie_imp_Text.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_Text.cpp,v retrieving revision 1.26 diff -u -r1.26 ie_imp_Text.cpp --- src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/07 15:52:42 1.26 +++ src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/10 13:05:05 @@ -292,8 +292,9 @@ UT_Error error; - // First we need to determine the encoding. - X_CleanupIfError(error,_recognizeEncoding(fp)); + // First we try to determine the encoding. + if (_recognizeEncoding(fp) == UT_OK) + m_pDocument->setEncodingName(m_szEncoding); X_CleanupIfError(error,_writeHeader(fp)); X_CleanupIfError(error,_parseFile(fp)); @@ -319,9 +320,16 @@ IE_Imp_Text::IE_Imp_Text(PD_Document * pDocument, bool bEncoded) : IE_Imp(pDocument) { + UT_ASSERT(pDocument); + + const char *szEncodingName = pDocument->getEncodingName(); + if (!szEncodingName || !*szEncodingName) + szEncodingName = XAP_EncodingManager::get_instance()->getNativeEncodingName(); + m_bIsEncoded = bEncoded; + // TODO Use persistent document encoding when it exists - _setEncoding(XAP_EncodingManager::get_instance()->getNativeEncodingName()); + _setEncoding(szEncodingName); } /*****************************************************************/ @@ -491,6 +499,7 @@ strcpy(szEnc,s); _setEncoding((const char *)szEnc); + m_pDocument->setEncodingName(szEnc); } pDialogFactory->releaseDialog(pDialog);

_________________________________________________________ Do You Yahoo!? Get your free @yahoo.com address at http://mail.yahoo.com



This archive was generated by hypermail 2b25 : Sun Jun 10 2001 - 08:22:28 CDT