• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ie_imp_XML.h

Go to the documentation of this file.
00001 //* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
00002 
00003 /* AbiWord
00004  * Copyright (C) 2001 AbiSource, Inc.
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License
00008  * as published by the Free Software Foundation; either version 2
00009  * of the License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00019  * 02110-1301 USA.
00020  */
00021 
00022 
00023 #ifndef IE_IMP_XML_H
00024 #define IE_IMP_XML_H
00025 
00026 #include <stdio.h>
00027 #include <string>
00028 #include <map>
00029 #include <memory>
00030 
00031 #include "ut_xml.h"
00032 
00033 #include "ut_vector.h"
00034 #include "ut_stack.h"
00035 #include "ie_imp.h"
00036 #include "ut_bytebuf.h"
00037 #include "ut_string_class.h"
00038 
00039 class PD_Document;
00040 
00041 class PD_DocumentRDFMutation;
00042 typedef std::shared_ptr<PD_DocumentRDFMutation> PD_DocumentRDFMutationHandle;
00043 
00044 
00045 struct ABI_EXPORT xmlToIdMapping {
00046   const char *m_name;
00047   int m_type;
00048 };
00049 
00050 // The importer/reader for reading generic
00051 // XML documents. Currently, the following classes derive from this:
00052 //
00053 // ABW, AWT, GZABW
00054 // DBK
00055 // WML
00056 // XHTML
00057 // XSL-FO
00058 // KWORD 1 && 2 (soon)
00059 
00060 class ABI_EXPORT IE_Imp_XML : public IE_Imp, public UT_XML::Listener
00061 {
00062 public:
00063     IE_Imp_XML(PD_Document * pDocument, bool whiteSignificant);
00064     virtual ~IE_Imp_XML();
00065     virtual UT_Error    importFile(const char * data, UT_uint32 length);
00066     virtual UT_Error    importFile(const UT_ByteBuf * data);
00067 
00068     virtual bool        pasteFromBuffer(PD_DocumentRange * pDocRange,
00069                                         const unsigned char * pData,
00070                                         UT_uint32 lenData,
00071                                         const char * szEncoding = 0);
00072 
00073     /* (Partial) Implementation of UT_XML::Listener
00074      *
00075      * You *must* override these next two methods:
00076      */
00077     virtual void startElement (const gchar * name, const gchar ** atts);
00078     virtual void endElement (const gchar * name);
00079     /*
00080      * but you get this one for free:
00081      */
00082     virtual void charData (const gchar * buffer, int length);
00083 
00084     /* If you don't wish the XML parser to use the standard/default file handler, you
00085      * can provide your own via an implementation of UT_XML::Reader here:
00086      */
00087 protected:
00088     void setReader (UT_XML::Reader * pReader) { m_pReader = pReader; }
00089 private:
00090     UT_XML::Reader * m_pReader;
00091 
00092     /* If you wish to use a non-standard parser (e.g., for HTML), then maybe this
00093      * is useful...
00094      */
00095 protected:
00096     void setParser (UT_XML * pParser) { m_pParser = pParser; }
00097     void stopParser(void) {if(m_pParser) m_pParser->stop();}
00098 private:
00099     UT_XML * m_pParser;
00100 
00101 public:
00102     void            incOperationCount(void) { m_iOperationCount++; }
00103     UT_uint32       getOperationCount(void) const { return m_iOperationCount; }
00104 
00105 protected:
00106 
00107     virtual UT_Error    _loadFile(GsfInput * input);
00108     int             _mapNameToToken (const char * name, xmlToIdMapping * idlist, int len);
00109 
00110     const gchar* _getXMLPropValue(const gchar *name, const gchar **atts);
00111 
00112     UT_uint32       _getInlineDepth(void) const;
00113     bool            _pushInlineFmt(const PP_PropertyVector & atts);
00114     void            _popInlineFmt(void);
00115 
00116     typedef enum _parseState { _PS_Init,
00117                    _PS_Doc,
00118                    _PS_Sec,
00119                    _PS_Block,
00120                    _PS_DataSec,
00121                    _PS_DataItem,
00122                    _PS_StyleSec,
00123                    _PS_Style,
00124                    _PS_IgnoredWordsSec,
00125                    _PS_IgnoredWordsItem,
00126                    _PS_ListSec,
00127                    _PS_List,
00128                    _PS_Field,
00129                    _PS_PageSize,
00130                    _PS_MetaData,
00131                    _PS_Meta,
00132                    _PS_RevisionSec,
00133                    _PS_Revision,
00134                    _PS_AuthorSec,
00135                    _PS_Author,
00136                    _PS_HistorySec,
00137                    _PS_Table,
00138                    _PS_Cell,
00139                    _PS_Version,
00140                    _PS_RDFTriple,
00141                    _PS_RDFData,
00142     } ParseState;
00143 
00144  protected:
00145 
00146     // TODO: make us private, refactor code
00147     UT_Error        m_error;
00148     ParseState      m_parseState;
00149 
00150     gchar       m_charDataSeen[4];
00151     UT_uint32       m_lenCharDataSeen;
00152     UT_uint32       m_lenCharDataExpected;
00153     UT_uint32       m_iOperationCount;
00154     bool            m_bSeenCR;
00155     bool            m_bWhiteSignificant;
00156     bool            m_bWasSpace;
00157 
00158     PP_PropertyVector m_vecInlineFmt;
00159     UT_NumberStack      m_nstackFmtStartIndex;
00160 
00161     UT_ByteBufPtr   m_currentDataItem;
00162     std::string     m_currentDataItemName;
00163     std::string     m_currentDataItemMimeType;
00164     bool            m_currentDataItemEncoded;
00165 
00166     const char *    m_szFileName;
00167 
00168     std::string     m_currentMetaDataName;
00169     UT_uint32       m_currentRevisionId;
00170     time_t          m_currentRevisionTime;
00171     UT_uint32       m_currentRevisionVersion;
00172 
00173     // For reading RDF triples
00174     std::string     m_rdfSubject;
00175     std::string     m_rdfPredicate;
00176     std::string     m_rdfXSDType;
00177     int             m_rdfObjectType;
00178     PD_DocumentRDFMutationHandle m_rdfMutation;
00179 
00180     typedef std::map<std::string, UT_sint32> token_map_t;
00181     token_map_t m_tokens;
00182 
00183 private:
00184     UT_uint32   m_iCharCount;
00185     bool        m_bStripLeading;
00186 protected:
00187     UT_uint32   _data_CharCount () const { return m_iCharCount; }
00188     void        _data_NewBlock ();
00189 };
00190 
00191 #endif /* IE_IMP_XML_H */

Generated on Sun Feb 14 2021 for AbiWord by  doxygen 1.7.1