from libc.string cimport const_char from lxml.includes.tree cimport xmlDoc from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1 cdef extern from "libxml/HTMLparser.h": ctypedef enum htmlParserOption: HTML_PARSE_NOERROR # suppress error reports HTML_PARSE_NOWARNING # suppress warning reports HTML_PARSE_PEDANTIC # pedantic error reporting HTML_PARSE_NOBLANKS # remove blank nodes HTML_PARSE_NONET # Forbid network access # libxml2 2.6.21+ only: HTML_PARSE_RECOVER # Relaxed parsing HTML_PARSE_COMPACT # compact small text nodes # libxml2 2.7.7+ only: HTML_PARSE_NOIMPLIED # Do not add implied html/body... elements # libxml2 2.7.8+ only: HTML_PARSE_NODEFDTD # do not default a doctype if not found # libxml2 2.8.0+ only: XML_PARSE_IGNORE_ENC # ignore internal document encoding hint xmlSAXHandlerV1 htmlDefaultSAXHandler cdef xmlParserCtxt* htmlCreateMemoryParserCtxt( char* buffer, int size) nogil cdef xmlParserCtxt* htmlCreateFileParserCtxt( char* filename, char* encoding) nogil cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax, void* user_data, char* chunk, int size, char* filename, int enc) nogil cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil cdef int htmlParseChunk(xmlParserCtxt* ctxt, char* chunk, int size, int terminate) nogil cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt, char* filename, const_char* encoding, int options) nogil cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt, char* buffer, char* URL, const_char* encoding, int options) nogil cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt, xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void* ioctx, char* URL, const_char* encoding, int options) nogil cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt, char* buffer, int size, char* filename, const_char* encoding, int options) nogil