Main Page   Modules   Namespace List   Data Structures   File List   Data Fields   Examples  

SgmlParser.h

00001 /*
00002  * libsgml -- SGML state machine parsing library.
00003  *                                                                  
00004  * Copyright (c) 2002 Uninformed Research (http://www.uninformed.org)
00005  * All rights reserved.
00006  *
00007  * skape
00008  * [email protected]
00009  */
00020 #ifndef _LIBHTTP_SGMLPARSER_H
00021 #define _LIBHTTP_SGMLPARSER_H
00022 
00023 #ifdef __cplusplus
00024 extern "C" {
00025 #endif
00026 
00027 enum SgmlExtensionType {
00028         SGML_EXTENSION_TYPE_XML = 0,
00029         SGML_EXTENSION_TYPE_HTML,
00030 
00031         SGML_EXTENSION_TYPE_CUSTOM = 255
00032 };
00033 
00034 struct _sgml_parser;
00035 
00041 typedef struct _sgml_handlers {
00042 
00046         void (*preparse)(struct _sgml_parser *parser, void *userContext);
00050         void (*postparse)(struct _sgml_parser *parser, void *userContext);
00051 
00055         void (*elementBegin)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00059         void (*elementEnd)(struct _sgml_parser *parser, void *userContext, const char *elementName);
00060 
00064         void (*attributeNew)(struct _sgml_parser *parser, void *userContext, const char *attributeName, const char *attributeValue);
00065 
00069         void (*textNew)(struct _sgml_parser *parser, void *userContext, const char *text);
00070 
00074         void (*commentNew)(struct _sgml_parser *parser, void *userContext, const char *comment);
00075 
00076 } SGML_HANDLERS;
00077 
00082 #define SGML_STC_LETTER_TYPE_SPECIFIC   0x00
00083 #define SGML_STC_LETTER_TYPE_SPECIFICWS 0x01
00084 #define SGML_STC_LETTER_TYPE_NOT        0x02
00085 #define SGML_STC_LETTER_TYPE_NOTWS      0x03
00086 #define SGML_STC_LETTER_TYPE_ANY        0x04
00087 
00088 #define SGML_STC_FLAG_DIVERT           (1 << 0)
00089 #define SGML_STC_FLAG_UPDATE_STATE     (1 << 1)
00090 #define SGML_STC_FLAG_INCL_IN_BUFFER   (1 << 2)
00091 
00092 #define SGML_PARSER_STATE_INTEXT                      (1 << 0)
00093 
00094 #define SGML_PARSER_STATE_INELEMENT                   (1 << 1)
00095 #define SGML_PARSER_STATE_INELEMENTNAME               (1 << 2)
00096 #define SGML_PARSER_STATE_INELEMENTNAME_ACTUAL        (1 << 3)
00097 #define SGML_PARSER_STATE_INELEMENTCLOSURE            (1 << 4)
00098         
00099 #define SGML_PARSER_STATE_INATTRIBUTENAME             (1 << 5)
00100 #define SGML_PARSER_STATE_INATTRIBUTENAME_ACTUAL      (1 << 6)
00101 
00102 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_NS         (1 << 10)
00103 
00104 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_S          (1 << 12) 
00105 
00106 #define SGML_PARSER_STATE_INCOMMENT                   (1 << 15)
00107 #define SGML_PARSER_STATE_INCOMMENTGOTEXCLAMATION     (1 << 16)
00108 #define SGML_PARSER_STATE_INCOMMENTGOTDASH1           (1 << 17)
00109 #define SGML_PARSER_STATE_INCOMMENTGOTDASH2           (1 << 18)
00110 
00111 #define SGML_PARSER_STATE_INATTRIBUTEVALUE_DBLQUOTE   (1 << 19)
00112 
00113 typedef struct _sgml_state_table_rule {
00114 
00115         unsigned long stateIndexId;
00116 
00117         unsigned char letterType;
00118         unsigned char letter;
00119 
00120         unsigned long flags;
00121 
00122         unsigned long divertTableId;
00123 
00124         unsigned long isState;
00125         unsigned long notState;
00126 
00127         unsigned long addState; 
00128         unsigned long remState;
00129 
00130 } SGML_STATE_TABLE_RULE;
00131 
00132 typedef struct _sgml_state_table {
00133 
00134         unsigned long          stateIndexId;
00135 
00136         SGML_STATE_TABLE_RULE  *rules;
00137         unsigned long          ruleSize;
00138 
00139 } SGML_STATE_TABLE;
00140 
00141 typedef struct _sgml_parser {
00142 
00143         enum SgmlExtensionType type;
00144 
00145         SGML_HANDLERS          handlers;        
00146 
00147         SGML_STATE_TABLE       *stateTable;
00148         unsigned long          stateTableElements;
00149         SGML_STATE_TABLE_RULE  *stateTableRules;
00150         unsigned long          stateTableRuleElements;
00151 
00152         struct {
00153 
00154                 SGML_STATE_TABLE   *currentState;
00155 
00156                 char               *lastElementName;
00157                 char               *lastAttributeName;
00158 
00159                 char               *currentBuffer;
00160                 unsigned long      currentBufferSize;
00161 
00162                 unsigned long      state;
00163 
00164                 void               *extensionContext;
00165                 void               *userContext;
00166 
00167                 void               (*onStateChange)(struct _sgml_parser *parser, unsigned long oldState, unsigned long newState);
00168                 void               (*onDivert)(struct _sgml_parser *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00169 
00170                 void               (*setExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00171                 void               (*getExtensionParam)(struct _sgml_parser *parser, void *extensionContext, unsigned long param, void *value);
00172 
00173         } internal;
00174 
00175 } SGML_PARSER;
00176 
00188 SGML_PARSER *sgmlParserNew();
00207 unsigned long sgmlParserInitialize(SGML_PARSER *parser, enum SgmlExtensionType type, SGML_HANDLERS *handlers, void *userContext);
00214 void sgmlParserDestroy(SGML_PARSER *parser, unsigned char destroyParser);
00215 
00224 unsigned long sgmlParserParseString(SGML_PARSER *parser, const char *string, const unsigned long stringLength);
00232 unsigned long sgmlParserParseFile(SGML_PARSER *parser, const char *file);
00233 
00246 void sgmlParserExtensionSetParam(SGML_PARSER *parser, unsigned long param, void *value);
00259 void sgmlParserExtensionGetParam(SGML_PARSER *parser, unsigned long param, void *value);
00260 
00261 #define sgmlParserGetExtensionContext(parser) parser->internal.extensionContext
00262 #define sgmlParserGetUserContext(parser) parser->internal.userContext
00263 
00268 void _sgmlParserInitializeStateTable(SGML_PARSER *parser);
00269 void _sgmlParserInitializeStateTableRules(SGML_PARSER *parser);
00270 
00271 unsigned long _sgmlParseChunk(SGML_PARSER *parser, const char *chunk, const unsigned long chunkSize);
00272 void _sgmlParserAppendBuffer(SGML_PARSER *parser, const char *chunk, unsigned long startOffset, unsigned long length);
00273 void _sgmlParserResetBuffer(SGML_PARSER *parser);
00274 
00275 void _sgmlOnStateChange(SGML_PARSER *parser, unsigned long oldState, unsigned long newState);
00276 void _sgmlOnDivert(SGML_PARSER *parser, unsigned long newIndex, unsigned long oldState, unsigned long newState, const char *lastBuffer, unsigned long lastBufferSize);
00277 
00278 #ifdef __cplusplus
00279 }
00280 #endif
00281 
00282 #endif

Generated on Tue Mar 25 19:49:04 2003 for libsgml by doxygen1.3-rc3