00001
00002
00003
00004
00005
00006
00007
00008
00009 #if !defined(_MARKUP_H_INCLUDED_)
00010 #define _MARKUP_H_INCLUDED_
00011
00012 #include <stdlib.h>
00013 #include <string.h>
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #if _MSC_VER > 1000 // VC++
00025 #pragma once
00026 #if ! defined(MARKUP_SAFESTR) // not VC++ safe strings
00027 #pragma warning(disable:4996) // VC++ 2005 deprecated function warnings
00028 #endif // not VC++ safe strings
00029 #if defined(MARKUP_STL) && _MSC_VER < 1400 // STL pre VC++ 2005
00030 #pragma warning(disable:4786) // std::string long names
00031 #endif // VC++ 2005 STL
00032 #else // not VC++
00033 #if ! defined(MARKUP_STL)
00034 #define MARKUP_STL
00035 #endif // not STL
00036 #if defined(__GNUC__) && ! defined(MARKUP_ICONV) && ! defined(MARKUP_STDCONV) && ! defined(MARKUP_WINCONV)
00037 #define MARKUP_ICONV
00038 #endif // GNUC and not ICONV not STDCONV not WINCONV
00039 #endif // not VC++
00040 #if (defined(_UNICODE) || defined(UNICODE)) && ! defined(MARKUP_WCHAR)
00041 #define MARKUP_WCHAR
00042 #endif // _UNICODE or UNICODE
00043 #if (defined(_MBCS) || defined(MBCS)) && ! defined(MARKUP_MBCS)
00044 #define MARKUP_MBCS
00045 #endif // _MBCS and not MBCS
00046 #if ! defined(MARKUP_SIZEOFWCHAR)
00047 #if __SIZEOF_WCHAR_T__ == 4 || __WCHAR_MAX__ > 0x10000
00048 #define MARKUP_SIZEOFWCHAR 4
00049 #else // sizeof(wchar_t) != 4
00050 #define MARKUP_SIZEOFWCHAR 2
00051 #endif // sizeof(wchar_t) != 4
00052 #endif // not MARKUP_SIZEOFWCHAR
00053 #if ! defined(MARKUP_WINCONV) && ! defined(MARKUP_STDCONV) && ! defined(MARKUP_ICONV)
00054 #define MARKUP_WINCONV
00055 #endif // not WINCONV not STDCONV not ICONV
00056
00057
00058
00059 #define MCD_ACP 0
00060 #define MCD_UTF8 65001
00061 #define MCD_UTF16 1200
00062 #define MCD_UTF32 65005
00063 #if defined(MARKUP_WCHAR)
00064 #define MCD_CHAR wchar_t
00065 #define MCD_PCSZ const wchar_t*
00066 #define MCD_PSZLEN (int)wcslen
00067 #define MCD_PSZCHR wcschr
00068 #define MCD_PSZSTR wcsstr
00069 #define MCD_PSZTOL wcstol
00070 #define MCD_PSZNCMP wcsncmp
00071 #if defined(MARKUP_SAFESTR) // VC++ safe strings
00072 #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
00073 #define MCD_PSZCPY(sz,p) wcscpy_s(MCD_SSZ(sz),p)
00074 #define MCD_PSZNCPY(sz,p,n) wcsncpy_s(MCD_SSZ(sz),p,n)
00075 #define MCD_PSZCAT(sz,p) wcscat_s(MCD_SSZ(sz),p)
00076 #define MCD_SPRINTF swprintf_s
00077 #define MCD_FOPEN(f,n,m) {if(_wfopen_s(&f,n,m)!=0)f=NULL;}
00078 #else // not VC++ safe strings
00079 #if defined(__GNUC__)
00080 #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
00081 #else // not GNUC
00082 #define MCD_SSZ(sz) sz
00083 #endif // not GNUC
00084 #define MCD_PSZCPY wcscpy
00085 #define MCD_PSZNCPY wcsncpy
00086 #define MCD_PSZCAT wcscat
00087 #define MCD_SPRINTF swprintf
00088 #define MCD_FOPEN(f,n,m) f=_wfopen(n,m)
00089 #endif // not VC++ safe strings
00090 #define MCD_T(s) L ## s
00091 #if MARKUP_SIZEOFWCHAR == 4 // sizeof(wchar_t) == 4
00092 #define MCD_ENC MCD_T("UTF-32")
00093 #else // sizeof(wchar_t) == 2
00094 #define MCD_ENC MCD_T("UTF-16")
00095 #endif
00096 #define MCD_CLEN(p) 1
00097 #else // not MARKUP_WCHAR
00098 #define MCD_CHAR char
00099 #define MCD_PCSZ const char*
00100 #define MCD_PSZLEN (int)strlen
00101 #define MCD_PSZCHR strchr
00102 #define MCD_PSZSTR strstr
00103 #define MCD_PSZTOL strtol
00104 #define MCD_PSZNCMP strncmp
00105 #if defined(MARKUP_SAFESTR) // VC++ safe strings
00106 #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
00107 #define MCD_PSZCPY(sz,p) strcpy_s(MCD_SSZ(sz),p)
00108 #define MCD_PSZNCPY(sz,p,n) strncpy_s(MCD_SSZ(sz),p,n)
00109 #define MCD_PSZCAT(sz,p) strcat_s(MCD_SSZ(sz),p)
00110 #define MCD_SPRINTF sprintf_s
00111 #define MCD_FOPEN(f,n,m) {if(fopen_s(&f,n,m)!=0)f=NULL;}
00112 #else // not VC++ safe strings
00113 #define MCD_SSZ(sz) sz
00114 #define MCD_PSZCPY strcpy
00115 #define MCD_PSZNCPY strncpy
00116 #define MCD_PSZCAT strcat
00117 #define MCD_SPRINTF sprintf
00118 #define MCD_FOPEN(f,n,m) f=fopen(n,m)
00119 #endif // not VC++ safe strings
00120 #define MCD_T(s) s
00121 #if defined(MARKUP_MBCS) // MBCS/double byte
00122 #define MCD_ENC MCD_T("")
00123 #if defined(MARKUP_WINCONV)
00124 #define MCD_CLEN(p) (int)_mbclen((const unsigned char*)p)
00125 #else // not WINCONV
00126 #define MCD_CLEN(p) (int)mblen(p,MB_CUR_MAX)
00127 #endif // not WINCONV
00128 #else // not MBCS/double byte
00129 #define MCD_ENC MCD_T("UTF-8")
00130 #define MCD_CLEN(p) 1
00131 #endif // not MBCS/double byte
00132 #endif // not MARKUP_WCHAR
00133 #if _MSC_VER < 1000 // not VC++
00134 #define MCD_STRERROR strerror(errno)
00135 #endif // not VC++
00136
00137
00138
00139
00140 #if defined(MARKUP_STL) // STL
00141 #include <string>
00142 #if defined(MARKUP_WCHAR)
00143 #define MCD_STR std::wstring
00144 #else // not MARKUP_WCHAR
00145 #define MCD_STR std::string
00146 #endif // not MARKUP_WCHAR
00147 #define MCD_2PCSZ(s) s.c_str()
00148 #define MCD_STRLENGTH(s) (int)s.size()
00149 #define MCD_STRCLEAR(s) s.erase()
00150 #define MCD_STRISEMPTY(s) s.empty()
00151 #define MCD_STRMID(s,n,l) s.substr(n,l)
00152 #define MCD_STRASSIGN(s,p,n) s.assign(p,n)
00153 #define MCD_STRCAPACITY(s) (int)s.capacity()
00154 #define MCD_STRINSERTREPLACE(d,i,r,s) d.replace(i,r,s)
00155 #define MCD_GETBUFFER(s,n) new MCD_CHAR[n+1]; s.reserve(n)
00156 #define MCD_RELEASEBUFFER(s,p,n) s.assign(p,n); delete[]p
00157 #define MCD_BLDRESERVE(s,n) s.reserve(n)
00158 #define MCD_BLDCHECK(s,n,d) ;
00159 #define MCD_BLDRELEASE(s) ;
00160 #define MCD_BLDAPPENDN(s,p,n) s.append(p,n)
00161 #define MCD_BLDAPPEND(s,p) s.append(p)
00162 #define MCD_BLDAPPEND1(s,c) s+=(MCD_CHAR)(c)
00163 #else // not STL, i.e. MFC
00164 #include <afx.h>
00165 #define MCD_STR CString
00166 #define MCD_2PCSZ(s) ((MCD_PCSZ)s)
00167 #define MCD_STRLENGTH(s) s.GetLength()
00168 #define MCD_STRCLEAR(s) s.Empty()
00169 #define MCD_STRISEMPTY(s) s.IsEmpty()
00170 #define MCD_STRMID(s,n,l) s.Mid(n,l)
00171 #define MCD_STRASSIGN(s,p,n) memcpy(s.GetBuffer(n),p,(n)*sizeof(MCD_CHAR));s.ReleaseBuffer(n);
00172 #define MCD_STRCAPACITY(s) (((CStringData*)((MCD_PCSZ)s)-1)->nAllocLength)
00173 #define MCD_GETBUFFER(s,n) s.GetBuffer(n)
00174 #define MCD_RELEASEBUFFER(s,p,n) s.ReleaseBuffer(n)
00175 #define MCD_BLDRESERVE(s,n) MCD_CHAR*pD=s.GetBuffer(n); int nL=0
00176 #define MCD_BLDCHECK(s,n,d) if(nL+(int)(d)>n){s.ReleaseBuffer(nL);n<<=2;pD=s.GetBuffer(n);}
00177 #define MCD_BLDRELEASE(s) s.ReleaseBuffer(nL)
00178 #define MCD_BLDAPPENDN(s,p,n) MCD_PSZNCPY(&pD[nL],p,n);nL+=n
00179 #define MCD_BLDAPPEND(s,p) MCD_PSZCPY(&pD[nL],p);nL+=MCD_PSZLEN(p)
00180 #define MCD_BLDAPPEND1(s,c) pD[nL++]=(MCD_CHAR)(c)
00181 #endif // not STL
00182 #define MCD_STRTOINT(s) MCD_PSZTOL(MCD_2PCSZ(s),NULL,10)
00183
00184
00185 struct MCD_CSTR
00186 {
00187 MCD_CSTR() { pcsz=NULL; };
00188 MCD_CSTR( MCD_PCSZ p ) { pcsz=p; };
00189 MCD_CSTR( const MCD_STR& s ) { pcsz = MCD_2PCSZ(s); };
00190 operator MCD_PCSZ() const { return pcsz; };
00191 MCD_PCSZ pcsz;
00192 };
00193
00194
00195 #if defined(MARKUP_WCHAR) && defined(__GNUC__)
00196 #undef MCD_FOPEN
00197 #define MCD_FOPEN(f,n,m) f=fopen(n,m)
00198 #define MCD_T_FILENAME(s) s
00199 #define MCD_PCSZ_FILENAME const char*
00200 struct MCD_CSTR_FILENAME
00201 {
00202 MCD_CSTR_FILENAME() { pcsz=NULL; };
00203 MCD_CSTR_FILENAME( MCD_PCSZ_FILENAME p ) { pcsz=p; };
00204 MCD_CSTR_FILENAME( const std::string& s ) { pcsz = s.c_str(); };
00205 operator MCD_PCSZ_FILENAME() const { return pcsz; };
00206 MCD_PCSZ_FILENAME pcsz;
00207 };
00208 #else
00209 #define MCD_CSTR_FILENAME MCD_CSTR
00210 #define MCD_T_FILENAME MCD_T
00211 #define MCD_PCSZ_FILENAME MCD_PCSZ
00212 #endif
00213
00214 #if defined(_DEBUG) // DEBUG
00215 #define _DS(i) (i?&(MCD_2PCSZ(m_strDoc))[m_aPos[i].nStart]:0)
00216 #define MARKUP_SETDEBUGSTATE m_pMainDS=_DS(m_iPos); m_pChildDS=_DS(m_iPosChild)
00217 #else // not DEBUG
00218 #define MARKUP_SETDEBUGSTATE
00219 #endif // not DEBUG
00220
00221 class CMarkup
00222 {
00223 public:
00224 CMarkup() { SetDoc( NULL ); InitDocFlags(); };
00225 CMarkup( MCD_CSTR szDoc ) { SetDoc( szDoc ); InitDocFlags(); };
00226 CMarkup( int nFlags ) { SetDoc( NULL ); m_nDocFlags = nFlags; };
00227 CMarkup( const CMarkup& markup ) { *this = markup; };
00228 void operator=( const CMarkup& markup );
00229 ~CMarkup() {};
00230
00231
00232 bool Load( MCD_CSTR_FILENAME szFileName );
00233 bool SetDoc( MCD_PCSZ pDoc );
00234 bool SetDoc( const MCD_STR& strDoc );
00235 bool IsWellFormed();
00236 bool FindElem( MCD_CSTR szName=NULL );
00237 bool FindChildElem( MCD_CSTR szName=NULL );
00238 bool IntoElem();
00239 bool OutOfElem();
00240 void ResetChildPos() { x_SetPos(m_iPosParent,m_iPos,0); };
00241 void ResetMainPos() { x_SetPos(m_iPosParent,0,0); };
00242 void ResetPos() { x_SetPos(0,0,0); };
00243 MCD_STR GetTagName() const;
00244 MCD_STR GetChildTagName() const { return x_GetTagName(m_iPosChild); };
00245 MCD_STR GetData() const { return x_GetData(m_iPos); };
00246 MCD_STR GetChildData() const { return x_GetData(m_iPosChild); };
00247 MCD_STR GetElemContent() const { return x_GetElemContent(m_iPos); };
00248 MCD_STR GetAttrib( MCD_CSTR szAttrib ) const { return x_GetAttrib(m_iPos,szAttrib); };
00249 MCD_STR GetChildAttrib( MCD_CSTR szAttrib ) const { return x_GetAttrib(m_iPosChild,szAttrib); };
00250 MCD_STR GetAttribName( int n ) const;
00251 int FindNode( int nType=0 );
00252 int GetNodeType() { return m_nNodeType; };
00253 bool SavePos( MCD_CSTR szPosName=MCD_T(""), int nMap = 0 );
00254 bool RestorePos( MCD_CSTR szPosName=MCD_T(""), int nMap = 0 );
00255 bool SetMapSize( int nSize, int nMap = 0 );
00256 const MCD_STR& GetError() const { return m_strError; };
00257 int GetDocFlags() const { return m_nDocFlags; };
00258 void SetDocFlags( int nFlags ) { m_nDocFlags = nFlags; };
00259 enum MarkupDocFlags
00260 {
00261 MDF_UTF16LEFILE = 1,
00262 MDF_UTF8PREAMBLE = 4,
00263 MDF_IGNORECASE = 8,
00264 MDF_READFILE = 16,
00265 MDF_WRITEFILE = 32,
00266 MDF_APPENDFILE = 64,
00267 MDF_UTF16BEFILE = 128
00268 };
00269 enum MarkupNodeFlags
00270 {
00271 MNF_WITHCDATA = 0x01,
00272 MNF_WITHNOLINES = 0x02,
00273 MNF_WITHXHTMLSPACE = 0x04,
00274 MNF_WITHREFS = 0x08,
00275 MNF_WITHNOEND = 0x10,
00276 MNF_ESCAPEQUOTES = 0x100,
00277 MNF_NONENDED = 0x100000,
00278 MNF_ILLDATA = 0x200000
00279 };
00280 enum MarkupNodeType
00281 {
00282 MNT_ELEMENT = 1,
00283 MNT_TEXT = 2,
00284 MNT_WHITESPACE = 4,
00285 MNT_CDATA_SECTION = 8,
00286 MNT_PROCESSING_INSTRUCTION = 16,
00287 MNT_COMMENT = 32,
00288 MNT_DOCUMENT_TYPE = 64,
00289 MNT_EXCLUDE_WHITESPACE = 123,
00290 MNT_LONE_END_TAG = 128,
00291 MNT_NODE_ERROR = 32768
00292 };
00293
00294
00295 bool Save( MCD_CSTR_FILENAME szFileName );
00296 const MCD_STR& GetDoc() const { return m_strDoc; };
00297 bool AddElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags); };
00298 bool InsertElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT); };
00299 bool AddChildElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_CHILD); };
00300 bool InsertChildElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT|MNF_CHILD); };
00301 bool AddElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags); };
00302 bool InsertElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT); };
00303 bool AddChildElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_CHILD); };
00304 bool InsertChildElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT|MNF_CHILD); };
00305 bool AddAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue ) { return x_SetAttrib(m_iPos,szAttrib,szValue); };
00306 bool AddChildAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue); };
00307 bool AddAttrib( MCD_CSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPos,szAttrib,nValue); };
00308 bool AddChildAttrib( MCD_CSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue); };
00309 bool AddSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,0); };
00310 bool InsertSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_INSERT); };
00311 MCD_STR GetSubDoc() const { return x_GetSubDoc(m_iPos); };
00312 bool AddChildSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD); };
00313 bool InsertChildSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD|MNF_INSERT); };
00314 MCD_STR GetChildSubDoc() const { return x_GetSubDoc(m_iPosChild); };
00315 bool AddNode( int nType, MCD_CSTR szText ) { return x_AddNode(nType,szText,0); };
00316 bool InsertNode( int nType, MCD_CSTR szText ) { return x_AddNode(nType,szText,MNF_INSERT); };
00317
00318
00319 bool RemoveElem();
00320 bool RemoveChildElem();
00321 bool RemoveNode();
00322 bool SetAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue, int nFlags=0 ) { return x_SetAttrib(m_iPos,szAttrib,szValue,nFlags); };
00323 bool SetChildAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue, int nFlags=0 ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue,nFlags); };
00324 bool SetAttrib( MCD_CSTR szAttrib, int nValue, int nFlags=0 ) { return x_SetAttrib(m_iPos,szAttrib,nValue,nFlags); };
00325 bool SetChildAttrib( MCD_CSTR szAttrib, int nValue, int nFlags=0 ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue,nFlags); };
00326 bool SetData( MCD_CSTR szData, int nFlags=0 ) { return x_SetData(m_iPos,szData,nFlags); };
00327 bool SetChildData( MCD_CSTR szData, int nFlags=0 ) { return x_SetData(m_iPosChild,szData,nFlags); };
00328 bool SetData( int nValue ) { return x_SetData(m_iPos,nValue); };
00329 bool SetChildData( int nValue ) { return x_SetData(m_iPosChild,nValue); };
00330 bool SetElemContent( MCD_CSTR szContent ) { return x_SetElemContent(szContent); };
00331
00332
00333 static bool ReadTextFile( MCD_CSTR_FILENAME szFileName, MCD_STR& strDoc, MCD_STR* pstrError=NULL, int* pnDocFlags=NULL, MCD_STR* pstrEncoding=NULL );
00334 static bool WriteTextFile( MCD_CSTR_FILENAME szFileName, const MCD_STR& strDoc, MCD_STR* pstrError=NULL, int* pnDocFlags=NULL, MCD_STR* pstrEncoding=NULL );
00335 static MCD_STR EscapeText( MCD_CSTR szText, int nFlags = 0 );
00336 static MCD_STR UnescapeText( MCD_CSTR szText, int nTextLength = -1 );
00337 static int UTF16To8( char *pszUTF8, const unsigned short* pwszUTF16, int nUTF8Count );
00338 static int UTF8To16( unsigned short* pwszUTF16, const char* pszUTF8, int nUTF8Count );
00339 static MCD_STR UTF8ToA( MCD_CSTR pszUTF8, int* pnFailed = NULL );
00340 static MCD_STR AToUTF8( MCD_CSTR pszANSI );
00341 static void EncodeCharUTF8( int nUChar, char* pszUTF8, int& nUTF8Len );
00342 static int DecodeCharUTF8( const char*& pszUTF8, const char* pszUTF8End = NULL );
00343 static void EncodeCharUTF16( int nUChar, unsigned short* pwszUTF16, int& nUTF16Len );
00344 static int DecodeCharUTF16( const unsigned short*& pwszUTF16, const unsigned short* pszUTF16End = NULL );
00345 static bool DetectUTF8( const char* pText, int nTextLen, int* pnNonASCII = NULL );
00346 static MCD_STR GetDeclaredEncoding( MCD_CSTR szDoc );
00347
00348 protected:
00349
00350 #if defined(_DEBUG)
00351 MCD_PCSZ m_pMainDS;
00352 MCD_PCSZ m_pChildDS;
00353 #endif // DEBUG
00354
00355 MCD_STR m_strDoc;
00356 MCD_STR m_strError;
00357
00358 int m_iPosParent;
00359 int m_iPos;
00360 int m_iPosChild;
00361 int m_iPosFree;
00362 int m_iPosDeleted;
00363 int m_nNodeType;
00364 int m_nNodeOffset;
00365 int m_nNodeLength;
00366 int m_nDocFlags;
00367
00368 struct ElemPos
00369 {
00370 ElemPos() {};
00371 ElemPos( const ElemPos& pos ) { *this = pos; };
00372 int StartTagLen() const { return nStartTagLen; };
00373 void SetStartTagLen( int n ) { nStartTagLen = n; };
00374 void AdjustStartTagLen( int n ) { nStartTagLen += n; };
00375 int EndTagLen() const { return nEndTagLen; };
00376 void SetEndTagLen( int n ) { nEndTagLen = n; };
00377 bool IsEmptyElement() { return (StartTagLen()==nLength)?true:false; };
00378 int StartContent() const { return nStart + StartTagLen(); };
00379 int ContentLen() const { return nLength - StartTagLen() - EndTagLen(); };
00380 int StartAfter() const { return nStart + nLength; };
00381 int Level() const { return nFlags & 0xffff; };
00382 void SetLevel( int nLev ) { nFlags = (nFlags & ~0xffff) | nLev; };
00383 void ClearVirtualParent() { memset(this,0,sizeof(ElemPos)); };
00384
00385
00386 int nStart;
00387 int nLength;
00388 unsigned int nStartTagLen : 22;
00389 unsigned int nEndTagLen : 10;
00390 int nFlags;
00391 int iElemParent;
00392 int iElemChild;
00393 int iElemNext;
00394 int iElemPrev;
00395 };
00396
00397 enum MarkupNodeFlagsInternal
00398 {
00399 MNF_REPLACE = 0x001000,
00400 MNF_INSERT = 0x002000,
00401 MNF_CHILD = 0x004000,
00402 MNF_QUOTED = 0x008000,
00403 MNF_EMPTY = 0x010000,
00404 MNF_DELETED = 0x020000,
00405 MNF_FIRST = 0x080000,
00406 MNF_PUBLIC = 0x300000,
00407 MNF_ILLFORMED = 0x800000,
00408 MNF_USER = 0xf000000
00409 };
00410
00411 struct NodePos
00412 {
00413 NodePos() {};
00414 NodePos( int n ) { nNodeFlags=n; nNodeType=0; nStart=0; nLength=0; };
00415 int nNodeType;
00416 int nStart;
00417 int nLength;
00418 int nNodeFlags;
00419 MCD_STR strMeta;
00420 };
00421
00422 struct TokenPos
00423 {
00424 TokenPos( MCD_CSTR sz, int n ) { Clear(); pDoc=sz; nTokenFlags=n; };
00425 void Clear() { nL=0; nR=-1; nNext=0; };
00426 int Length() const { return nR - nL + 1; };
00427 int StrNIACmp( MCD_PCSZ p1, MCD_PCSZ p2, int n )
00428 {
00429
00430 bool bNonA = false;
00431 MCD_CHAR c1, c2;
00432 while ( n-- )
00433 {
00434 c1 = *p1++; c2 = *p2++;
00435 if ( c1 != c2 )
00436 {
00437 if (bNonA || !((c1>='a'&&c1<='z'&&c1==c2+('a'-'A'))||(c2>='a'&&c2<='z'&&c2==c1+('a'-'A'))))
00438 return c1 - c2;
00439 }
00440 else if ( (unsigned int)c1 > 127 )
00441 bNonA = true;
00442 }
00443 return 0;
00444 }
00445
00446 bool Match( MCD_CSTR szName )
00447 {
00448 int nLen = nR - nL + 1;
00449 if ( nTokenFlags & MDF_IGNORECASE )
00450 return ( (StrNIACmp( &pDoc[nL], szName, nLen ) == 0)
00451 && ( szName[nLen] == '\0' || MCD_PSZCHR(MCD_T(" =/[]"),szName[nLen]) ) );
00452 else
00453 return ( (MCD_PSZNCMP( &pDoc[nL], szName, nLen ) == 0)
00454 && ( szName[nLen] == '\0' || MCD_PSZCHR(MCD_T(" =/[]"),szName[nLen]) ) );
00455 };
00456 int nL;
00457 int nR;
00458 int nNext;
00459 MCD_PCSZ pDoc;
00460 int nTokenFlags;
00461 int nPreSpaceStart;
00462 int nPreSpaceLength;
00463 };
00464
00465 struct SavedPos
00466 {
00467
00468 SavedPos() { nSavedPosFlags=0; iPos=0; };
00469 MCD_STR strName;
00470 int iPos;
00471 enum { SPM_MAIN = 1, SPM_CHILD = 2, SPM_USED = 4, SPM_LAST = 8 };
00472 int nSavedPosFlags;
00473 };
00474
00475 struct SavedPosMap
00476 {
00477
00478 SavedPosMap( int nSize ) { nMapSize=nSize; pTable = new SavedPos*[nSize]; memset(pTable,0,nSize*sizeof(SavedPos*)); };
00479 ~SavedPosMap() { if (pTable) { for (int n=0;n<nMapSize;++n) if (pTable[n]) delete[] pTable[n]; delete[] pTable; } };
00480 SavedPos** pTable;
00481 int nMapSize;
00482 };
00483
00484 struct SavedPosMapArray
00485 {
00486
00487 SavedPosMapArray() { pMaps = NULL; };
00488 ~SavedPosMapArray() { RemoveAll(); };
00489 void RemoveAll() { SavedPosMap**p = pMaps; if (p) { while (*p) delete *p++; delete[] pMaps; pMaps=NULL; } };
00490 SavedPosMap** pMaps;
00491 };
00492 SavedPosMapArray m_SavedPosMapArray;
00493
00494 struct PosArray
00495 {
00496 PosArray() { Clear(); };
00497 ~PosArray() { Release(); };
00498 enum { PA_SEGBITS = 16, PA_SEGMASK = 0xffff };
00499 void RemoveAll() { Release(); Clear(); };
00500 void Release() { for (int n=0;n<SegsUsed();++n) delete[] (char*)pSegs[n]; if (pSegs) delete[] (char*)pSegs; };
00501 void Clear() { nSegs=0; nSize=0; pSegs=NULL; };
00502 int GetSize() const { return nSize; };
00503 int SegsUsed() const { return ((nSize-1)>>PA_SEGBITS) + 1; };
00504 ElemPos& operator[](int n) const { return pSegs[n>>PA_SEGBITS][n&PA_SEGMASK]; };
00505 ElemPos** pSegs;
00506 int nSize;
00507 int nSegs;
00508 };
00509 PosArray m_aPos;
00510
00511 struct NodeStack
00512 {
00513 NodeStack() { nTop=-1; nSize=0; pN=NULL; };
00514 ~NodeStack() { if (pN) delete [] pN; };
00515 NodePos& Top() { return pN[nTop]; };
00516 NodePos& At( int n ) { return pN[n]; };
00517 void Add() { ++nTop; if (nTop==nSize) Alloc(nSize*2+6); };
00518 void Remove() { --nTop; };
00519 int TopIndex() { return nTop; };
00520 protected:
00521 void Alloc( int nNewSize ) { NodePos* pNNew = new NodePos[nNewSize]; Copy(pNNew); nSize=nNewSize; };
00522 void Copy( NodePos* pNNew ) { for(int n=0;n<nSize;++n) pNNew[n]=pN[n]; if (pN) delete [] pN; pN=pNNew; };
00523 NodePos* pN;
00524 int nSize;
00525 int nTop;
00526 };
00527
00528 struct FilePos
00529 {
00530 FilePos() { fp = NULL; nDocFlags = 0; nFileByteLen = 0; };
00531 FILE* fp;
00532 int nDocFlags;
00533 int nFileByteLen;
00534 int nReadByteLen;
00535 int nFileCharUnitSize;
00536 int nFileTextLen;
00537 MCD_STR strIOResult;
00538 MCD_STR strEncoding;
00539 };
00540
00541 struct ConvertEncoding
00542 {
00543 ConvertEncoding( MCD_CSTR pszToEncoding, MCD_CSTR pszFromEncoding, const void* pFromBuffer, int nFromBufferLen )
00544 {
00545 strToEncoding = pszToEncoding;
00546 strFromEncoding = pszFromEncoding;
00547 pFrom = pFromBuffer;
00548 nFromLen = nFromBufferLen;
00549 nFailedChars = 0;
00550 nToCount = 0;
00551 };
00552 MCD_STR strToEncoding;
00553 MCD_STR strFromEncoding;
00554 const void* pFrom;
00555 int nFromLen;
00556 int nToCount;
00557 int nFailedChars;
00558 };
00559
00560 void x_SetPos( int iPosParent, int iPos, int iPosChild )
00561 {
00562 m_iPosParent = iPosParent;
00563 m_iPos = iPos;
00564 m_iPosChild = iPosChild;
00565 m_nNodeOffset = 0;
00566 m_nNodeLength = 0;
00567 m_nNodeType = iPos?MNT_ELEMENT:0;
00568 MARKUP_SETDEBUGSTATE;
00569 };
00570 int x_GetFreePos()
00571 {
00572 if ( m_iPosFree == m_aPos.GetSize() )
00573 x_AllocPosArray();
00574 return m_iPosFree++;
00575 };
00576 bool x_AllocPosArray( int nNewSize = 0 );
00577
00578 void InitDocFlags()
00579 {
00580
00581 #if defined(MARKUP_IGNORECASE) // ignore case
00582 m_nDocFlags = MDF_IGNORECASE;
00583 #else // not ignore case
00584 m_nDocFlags = 0;
00585 #endif // not ignore case
00586 };
00587
00588 bool x_ParseDoc();
00589 int x_ParseElem( int iPos, TokenPos& token );
00590 static bool x_FindAny( MCD_PCSZ pDoc, int& nChar );
00591 static bool x_FindName( TokenPos& token );
00592 static MCD_STR x_GetToken( const TokenPos& token );
00593 int x_FindElem( int iPosParent, int iPos, MCD_PCSZ szPath ) const;
00594 MCD_STR x_GetPath( int iPos ) const;
00595 MCD_STR x_GetTagName( int iPos ) const;
00596 MCD_STR x_GetData( int iPos ) const;
00597 MCD_STR x_GetAttrib( int iPos, MCD_PCSZ pAttrib ) const;
00598 static MCD_STR x_EncodeCDATASection( MCD_PCSZ szData );
00599 bool x_AddElem( MCD_PCSZ pName, MCD_PCSZ pValue, int nFlags );
00600 bool x_AddElem( MCD_PCSZ pName, int nValue, int nFlags );
00601 MCD_STR x_GetSubDoc( int iPos ) const;
00602 bool x_AddSubDoc( MCD_PCSZ pSubDoc, int nFlags );
00603 static bool x_FindAttrib( TokenPos& token, MCD_PCSZ pAttrib, int n=0 );
00604 bool x_SetAttrib( int iPos, MCD_PCSZ pAttrib, MCD_PCSZ pValue, int nFlags=0 );
00605 bool x_SetAttrib( int iPos, MCD_PCSZ pAttrib, int nValue, int nFlags=0 );
00606 bool x_AddNode( int nNodeType, MCD_PCSZ pText, int nNodeFlags );
00607 void x_RemoveNode( int iPosParent, int& iPos, int& nNodeType, int& nNodeOffset, int& nNodeLength );
00608 void x_AdjustForNode( int iPosParent, int iPos, int nShift );
00609 static bool x_CreateNode( MCD_STR& strNode, int nNodeType, MCD_PCSZ pText );
00610 int x_InsertNew( int iPosParent, int& iPosRel, NodePos& node );
00611 void x_LinkElem( int iPosParent, int iPosBefore, int iPos );
00612 int x_UnlinkElem( int iPos );
00613 int x_ReleaseSubDoc( int iPos );
00614 int x_ReleasePos( int iPos );
00615 bool x_GetMap( SavedPosMap*& pMap, int nMap, int nMapSize = 7 );
00616 void x_CheckSavedPos();
00617 static int x_ParseNode( TokenPos& token, NodePos& node );
00618 bool x_SetData( int iPos, MCD_PCSZ szData, int nFlags );
00619 bool x_SetData( int iPos, int nValue );
00620 int x_RemoveElem( int iPos );
00621 MCD_STR x_GetElemContent( int iPos ) const;
00622 bool x_SetElemContent( MCD_PCSZ szContent );
00623 void x_DocChange( int nLeft, int nReplace, const MCD_STR& strInsert );
00624 void x_Adjust( int iPos, int nShift, bool bAfterPos = false );
00625 static MCD_STR x_GetLastError();
00626 static int x_Hash( MCD_PCSZ p, int nSize ) { unsigned int n=0; while (*p) n += (unsigned int)(*p++); return n % nSize; };
00627 static int x_GetEncodingCodePage( MCD_CSTR psz7Encoding );
00628 static bool x_EndianSwapRequired( int nDocFlags );
00629 static void x_EndianSwapUTF16( unsigned short* pUTF16, int nCharLen );
00630 static bool x_CanConvert( MCD_CSTR pszToEnc, MCD_CSTR pszFromEnc );
00631 static int x_ConvertEncoding( ConvertEncoding& convert, void* pTo );
00632 #if defined(MARKUP_ICONV)
00633 static int x_IConv( ConvertEncoding& convert, void* pTo, int nToCharSize, int nFromCharSize );
00634 static const char* x_IConvName( char* szEncoding, MCD_CSTR pszEncoding );
00635 #endif
00636 static bool x_Open( MCD_CSTR_FILENAME szFileName, FilePos& file );
00637 static bool x_Read( void* pBuffer, FilePos& file );
00638 static bool x_ReadText( MCD_STR& strDoc, FilePos& file );
00639 static bool x_Write( void* pBuffer, FilePos& file, const void* pConstBuffer = NULL );
00640 static bool x_WriteText( const MCD_STR& strDoc, FilePos& file );
00641 static bool x_Close( FilePos& file );
00642 static MCD_STR x_IntToStr( int n ) { MCD_CHAR sz[25]; MCD_SPRINTF(MCD_SSZ(sz),MCD_T("%d"),n); MCD_STR s=sz; return s; };
00643 };
00644
00645 #endif // !defined(_MARKUP_H_INCLUDED_)