Classes | Defines | Typedefs | Enumerations | Functions | Variables

ie_imp_MsWord_97.cpp File Reference

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ut_locale.h"
#include <zlib.h>
#include "wv.h"
#include "ut_string_class.h"
#include "ut_string.h"
#include "ut_std_string.h"
#include "ut_bytebuf.h"
#include "ut_units.h"
#include "ut_math.h"
#include "ut_assert.h"
#include "ut_debugmsg.h"
#include "ut_stack.h"
#include "xap_App.h"
#include "xap_Frame.h"
#include "xap_EncodingManager.h"
#include "xap_DialogFactory.h"
#include "xap_Dlg_Password.h"
#include "fg_Graphic.h"
#include "fg_GraphicRaster.h"
#include "fg_GraphicVector.h"
#include "pd_Document.h"
#include "ie_impexp_MsWord_97.h"
#include "ie_imp_MsWord_97.h"
#include "ie_impGraphic.h"
#include "ap_Strings.h"
#include "ap_Dialog_Id.h"
#include "pf_Frag_Strux.h"
#include "pt_PieceTable.h"
#include "pd_Style.h"
#include "fp_PageSize.h"
#include "ut_Language.h"
#include <gsf/gsf-infile.h>
#include <gsf/gsf-infile-msole.h>
#include <gsf/gsf-msole-utils.h>
#include <gsf/gsf-docprop-vector.h>
#include <gsf/gsf-meta-names.h>

Classes

struct  field
struct  Doc_Field_Mapping_t
struct  ListIdLevelPair
struct  DocAndLid

Defines

#define X_CheckError(v)   do { if (!(v)) return 1; } while (0)
#define SUPPORTS_OLD_IMAGES   1
#define FieldMappingSize   (sizeof(s_Tokens)/sizeof(s_Tokens[0]))
#define DOC_TEXTRUN_SIZE   2048
#define DOC_PROPBUFFER_SIZE   1024
#define ErrCleanupAndExit(code)   do {wvOLEFree (&ps); return(code);} while(0)
#define GetPassword()   _getPassword ( XAP_App::getApp()->getLastFocussedFrame() )
#define ErrorMessage(x)   do { XAP_Frame *_pFrame = XAP_App::getApp()->getLastFocussedFrame(); if ( _pFrame ) _errorMessage (_pFrame, (x)); } while (0)
#define PT_MAX_ATTRIBUTES   8

Typedefs

typedef UT_uint32 Doc_Color_t

Enumerations

enum  Doc_Field_t {
  F_TIME, F_DATE, F_EDITTIME, F_AUTHOR,
  F_PAGE, F_NUMCHARS, F_NUMPAGES, F_NUMWORDS,
  F_FILENAME, F_HYPERLINK, F_PAGEREF, F_EMBED,
  F_TOC, F_DateTimePicture, F_TOC_FROM_RANGE, F_DATEINAME,
  F_SPEICHERDAT, F_MERGEFIELD, F_OTHER
}
enum  MSWordListIdType {
  WLNF_INVALID = -1, WLNF_EUROPEAN_ARABIC = 0, WLNF_UPPER_ROMAN = 1, WLNF_LOWER_ROMAN = 2,
  WLNF_UPPER_LETTER = 3, WLNF_LOWER_LETTER = 4, WLNF_ORDINAL = 5, WLNF_BULLETS = 23,
  WLNF_HEBREW_NUMBERS = 45
}
enum  MSWord_ImageType { MSWord_UnknownImage, MSWord_VectorImage, MSWord_RasterImage }
enum  { LS_OFF = 0, LS_NORMAL = 1 }

Functions

static int charProc (wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid)
static int specCharProc (wvParseStruct *ps, U16 eachchar, CHP *achp)
static int eleProc (wvParseStruct *ps, wvTag tag, void *props, int dirty)
static int docProc (wvParseStruct *ps, wvTag tag)
static const gchar * s_translateStyleId (UT_uint32 id)
static char * s_stripDangerousChars (const char *s)
static char * s_convert_to_utf8 (const wvParseStruct *ps, const char *s)
static UT_String sMapIcoToColor (UT_uint16 ico, bool bForeground)
static Doc_Field_t s_mapNameToField (const char *name)
static const char * s_mapPageIdToString (UT_uint16 id)
static const char * s_mapDocToAbiListId (MSWordListIdType id)
static void s_mapDocToAbiListDelim (UT_uint16 *pStr, UT_uint32 iLen, UT_UTF8String &sDelim)
static const char * s_mapDocToAbiListStyle (MSWordListIdType id)
static const char * s_fieldFontForListStyle (MSWordListIdType id)
static UT_UTF8String _getPassword (XAP_Frame *pFrame)
static void cb_print_property (char const *name, GsfDocProp const *prop, DocAndLid *doc)
static void print_summary_stream (GsfInfile *msole, const char *stream_name, int lid, PD_Document *doc)
static int s_cmp_bookmarks_qsort (const void *a, const void *b)
static int s_cmp_bookmarks_bsearch (const void *a, const void *b)
static MSWord_ImageType s_determineImageType (Blip *b)
static IEGraphicFileType s_determineIEGFT (Blip *b)
static int sConvertLineStyle (short lineType)
static double brc_to_pixel (int x)
static UT_sint32 s_cmp_lids (const void *P1, const void *P2)

Variables

static Doc_Color_t word_colors [][3]
static Doc_Field_Mapping_t s_Tokens []
static IE_SuffixConfidence IE_Imp_MsWord_97_Sniffer__SuffixConfidence []
static IE_MimeConfidence IE_Imp_MsWord_97_Sniffer__MimeConfidence []
struct {
   const char *   metadata_key
   const char *   abi_metadata_name
metadata_names []
static const gsize nr_metadata_names = G_N_ELEMENTS(metadata_names)

Define Documentation

#define DOC_PROPBUFFER_SIZE   1024
#define DOC_TEXTRUN_SIZE   2048
#define ErrCleanupAndExit (   code  )     do {wvOLEFree (&ps); return(code);} while(0)
#define ErrorMessage (   x  )     do { XAP_Frame *_pFrame = XAP_App::getApp()->getLastFocussedFrame(); if ( _pFrame ) _errorMessage (_pFrame, (x)); } while (0)
#define FieldMappingSize   (sizeof(s_Tokens)/sizeof(s_Tokens[0]))
#define GetPassword (  )     _getPassword ( XAP_App::getApp()->getLastFocussedFrame() )
#define PT_MAX_ATTRIBUTES   8

imports a stylesheet from our document

Referenced by IE_Imp_MsWord_97::_handleStyleSheet().

#define SUPPORTS_OLD_IMAGES   1
#define X_CheckError (   v  )     do { if (!(v)) return 1; } while (0)

Typedef Documentation


Enumeration Type Documentation

anonymous enum
Enumerator:
LS_OFF 
LS_NORMAL 
Enumerator:
F_TIME 
F_DATE 
F_EDITTIME 
F_AUTHOR 
F_PAGE 
F_NUMCHARS 
F_NUMPAGES 
F_NUMWORDS 
F_FILENAME 
F_HYPERLINK 
F_PAGEREF 
F_EMBED 
F_TOC 
F_DateTimePicture 
F_TOC_FROM_RANGE 
F_DATEINAME 
F_SPEICHERDAT 
F_MERGEFIELD 
F_OTHER 
Enumerator:
MSWord_UnknownImage 
MSWord_VectorImage 
MSWord_RasterImage 

Surprise, surprise, there are more list numerical formats than the 5 the MS documentation states happens to mention, so here I will put what I found out (later we will move it to some better place)

Enumerator:
WLNF_INVALID 
WLNF_EUROPEAN_ARABIC 
WLNF_UPPER_ROMAN 
WLNF_LOWER_ROMAN 
WLNF_UPPER_LETTER 
WLNF_LOWER_LETTER 
WLNF_ORDINAL 
WLNF_BULLETS 
WLNF_HEBREW_NUMBERS 

Function Documentation

static UT_UTF8String _getPassword ( XAP_Frame pFrame  )  [static]
static double brc_to_pixel ( int  x  )  [static]
static void cb_print_property ( char const *  name,
GsfDocProp const *  prop,
DocAndLid doc 
) [static]
static int charProc ( wvParseStruct ps,
U16  eachchar,
U8  chartype,
U16  lid 
) [static]
static int docProc ( wvParseStruct ps,
wvTag  tag 
) [static]
static int eleProc ( wvParseStruct ps,
wvTag  tag,
void *  props,
int  dirty 
) [static]
static void print_summary_stream ( GsfInfile *  msole,
const char *  stream_name,
int  lid,
PD_Document doc 
) [static]
static int s_cmp_bookmarks_bsearch ( const void *  a,
const void *  b 
) [static]
static int s_cmp_bookmarks_qsort ( const void *  a,
const void *  b 
) [static]
static UT_sint32 s_cmp_lids ( const void *  P1,
const void *  P2 
) [static]

s_cmp_lids This function is used to sort the textboxPos lids in order of their lid values. This matches the order of the text sort in the in the out-of-stream table. Used by theqsort method on UT_Vector.

Parameters:
const void * P1 - pointer to a textboxPos pointer
const void * P2 - pointer to a textboxPos pointer
Returns:
-ve if sz1 < sz2, 0 if sz1 == sz2, +ve if sz1 > sz2

Referenced by IE_Imp_MsWord_97::_findNextTextboxSection().

static char* s_convert_to_utf8 ( const wvParseStruct ps,
const char *  s 
) [static]
static IEGraphicFileType s_determineIEGFT ( Blip b  )  [static]
static MSWord_ImageType s_determineImageType ( Blip b  )  [static]
static const char* s_fieldFontForListStyle ( MSWordListIdType  id  )  [static]

Map msword list enums back to abi's field font for that given style

References UT_DEBUGMSG, WLNF_BULLETS, WLNF_EUROPEAN_ARABIC, WLNF_LOWER_LETTER, WLNF_LOWER_ROMAN, WLNF_ORDINAL, WLNF_UPPER_LETTER, and WLNF_UPPER_ROMAN.

Referenced by IE_Imp_MsWord_97::_beginPara().

static void s_mapDocToAbiListDelim ( UT_uint16 pStr,
UT_uint32  iLen,
UT_UTF8String sDelim 
) [static]

form AW list deliminator string

References UT_UTF8String::appendUCS4().

Referenced by IE_Imp_MsWord_97::_beginPara().

static const char* s_mapDocToAbiListId ( MSWordListIdType  id  )  [static]
static const char* s_mapDocToAbiListStyle ( MSWordListIdType  id  )  [static]

Map msword list enums back to abi's list styles

References WLNF_BULLETS, WLNF_EUROPEAN_ARABIC, WLNF_LOWER_LETTER, WLNF_LOWER_ROMAN, WLNF_ORDINAL, WLNF_UPPER_LETTER, and WLNF_UPPER_ROMAN.

Referenced by IE_Imp_MsWord_97::_beginPara().

static Doc_Field_t s_mapNameToField ( const char *  name  )  [static]
static const char* s_mapPageIdToString ( UT_uint16  id  )  [static]
static char* s_stripDangerousChars ( const char *  s  )  [static]

Strip characters that would confuse either the xml parser or our property parser; caller is responsible to g_free the returned pointer

References UT_return_val_if_fail.

Referenced by IE_Imp_MsWord_97::_beginPara().

static const gchar* s_translateStyleId ( UT_uint32  id  )  [static]

Translates MS numerical id's for standard styles into our names The style names that have been commented out are those that do not have currently a localised equivalent in AW

References UT_ASSERT_HARMLESS, UT_DEBUGMSG, and UT_SHOULD_NOT_HAPPEN.

Referenced by IE_Imp_MsWord_97::_beginChar(), IE_Imp_MsWord_97::_beginPara(), and IE_Imp_MsWord_97::_handleStyleSheet().

static int sConvertLineStyle ( short  lineType  )  [static]
static UT_String sMapIcoToColor ( UT_uint16  ico,
bool  bForeground 
) [static]
static int specCharProc ( wvParseStruct ps,
U16  eachchar,
CHP achp 
) [static]

Variable Documentation

const char* abi_metadata_name

Referenced by cb_print_property().

Initial value:
 {
    { IE_MIME_MATCH_FULL,   IE_MIMETYPE_MSWord,         UT_CONFIDENCE_GOOD  },
    { IE_MIME_MATCH_FULL,   "application/vnd.ms-word",  UT_CONFIDENCE_GOOD  },
    { IE_MIME_MATCH_FULL,   "text/doc",                 UT_CONFIDENCE_GOOD  }, 
    { IE_MIME_MATCH_BOGUS,  "",                         UT_CONFIDENCE_ZILCH }
}
Initial value:
const char* metadata_key

Referenced by cb_print_property().

struct { ... } metadata_names[] [static]

Referenced by cb_print_property().

const gsize nr_metadata_names = G_N_ELEMENTS(metadata_names) [static]
Initial value:
{
    {"TIME",       F_TIME},
    {"EDITTIME",   F_EDITTIME},
    {"DATE",       F_DATE},
    {"date",       F_DATE},
    {"DATEINAME",      F_DATE}, 
    {"SPEICHERDAT",    F_DATE}, 
    {"\\@",        F_DateTimePicture},

    {"FILENAME",   F_FILENAME},
    {"\\filename", F_FILENAME},
    {"PAGE",       F_PAGE},
    {"\\*Arabisch",F_PAGE},
    {"NUMCHARS",   F_NUMCHARS},
    {"NUMPAGES",   F_NUMPAGES},
    {"NUMWORDS",   F_NUMWORDS},
    {"MERGEFIELD", F_MERGEFIELD},
    
    {"HYPERLINK",  F_HYPERLINK},
    {"PAGEREF",    F_PAGEREF},
    {"EMBED",      F_EMBED},
    {"TOC",        F_TOC},
    {"\\o",        F_TOC_FROM_RANGE},
    {"AUTHOR",     F_AUTHOR},

    { "*",         F_OTHER}
}
Doc_Color_t word_colors[][3] [static]
Initial value:
 {
    {0x00, 0x00, 0x00}, 
    {0x00, 0x00, 0xff}, 
    {0x00, 0xff, 0xff}, 
    {0x00, 0xff, 0x00}, 
    {0xff, 0x00, 0xff}, 
    {0xff, 0x00, 0x00}, 
    {0xff, 0xff, 0x00}, 
    {0xff, 0xff, 0xff}, 
    {0x00, 0x00, 0x80}, 
    {0x00, 0x80, 0x80}, 
    {0x00, 0x80, 0x00}, 
    {0x80, 0x00, 0x80}, 
    {0x80, 0x00, 0x00}, 
    {0x80, 0x80, 0x00}, 
    {0x80, 0x80, 0x80}, 
    {0xc0, 0xc0, 0xc0}, 
}

Referenced by sMapIcoToColor().