#ifndef HAS_ETREE_DEFS_H #define HAS_ETREE_DEFS_H /* quick check for Python/libxml2/libxslt devel setup */ #include "Python.h" #ifndef PY_VERSION_HEX # error the development package of Python (header files etc.) is not installed correctly #else # if PY_VERSION_HEX < 0x02070000 || PY_MAJOR_VERSION >= 3 && PY_VERSION_HEX < 0x03050000 # error this version of lxml requires Python 2.7, 3.5 or later # endif #endif #include "libxml/xmlversion.h" #ifndef LIBXML_VERSION # error the development package of libxml2 (header files etc.) is not installed correctly #else #if LIBXML_VERSION < 20700 # error minimum required version of libxml2 is 2.7.0 #endif #endif #include "libxslt/xsltconfig.h" #ifndef LIBXSLT_VERSION # error the development package of libxslt (header files etc.) is not installed correctly #else #if LIBXSLT_VERSION < 10123 # error minimum required version of libxslt is 1.1.23 #endif #endif /* v_arg functions */ #define va_int(ap) va_arg(ap, int) #define va_charptr(ap) va_arg(ap, char *) #ifdef PYPY_VERSION # define IS_PYPY 1 #else # define IS_PYPY 0 #endif #if PY_MAJOR_VERSION >= 3 # define IS_PYTHON2 0 /* prefer for special casing Python 2.x */ # define IS_PYTHON3 1 /* avoid */ #else # define IS_PYTHON2 1 # define IS_PYTHON3 0 #endif #if IS_PYTHON2 #ifndef LXML_UNICODE_STRINGS #define LXML_UNICODE_STRINGS 0 #endif #else #undef LXML_UNICODE_STRINGS #define LXML_UNICODE_STRINGS 1 #endif #if !IS_PYPY # define PyWeakref_LockObject(obj) (NULL) #endif /* Threading is not currently supported by PyPy */ #if IS_PYPY # ifndef WITHOUT_THREADING # define WITHOUT_THREADING # endif #endif #if IS_PYPY # undef PyFile_AsFile # define PyFile_AsFile(o) (NULL) # undef PyByteArray_Check # define PyByteArray_Check(o) (0) #elif !IS_PYTHON2 /* Python 3+ doesn't have PyFile_*() anymore */ # define PyFile_AsFile(o) (NULL) #endif #if PY_VERSION_HEX <= 0x03030000 && !(defined(CYTHON_PEP393_ENABLED) && CYTHON_PEP393_ENABLED) #define PyUnicode_IS_READY(op) (0) #define PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) #define PyUnicode_KIND(u) (sizeof(Py_UNICODE)) #define PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) #endif #if IS_PYPY # ifndef PyUnicode_FromFormat # define PyUnicode_FromFormat PyString_FromFormat # endif # if !IS_PYTHON2 && !defined(PyBytes_FromFormat) # ifdef PyString_FromFormat # define PyBytes_FromFormat PyString_FromFormat # else #include static PyObject* PyBytes_FromFormat(const char* format, ...) { PyObject *string; va_list vargs; #ifdef HAVE_STDARG_PROTOTYPES va_start(vargs, format); #else va_start(vargs); #endif string = PyUnicode_FromFormatV(format, vargs); va_end(vargs); if (string && PyUnicode_Check(string)) { PyObject *bstring = PyUnicode_AsUTF8String(string); Py_DECREF(string); string = bstring; } if (string && !PyBytes_CheckExact(string)) { Py_DECREF(string); string = NULL; PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object"); } return string; } # endif # endif #endif /* PySlice_GetIndicesEx() has wrong signature in Py<=3.1 */ #if PY_VERSION_HEX >= 0x03020000 # define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(o, l, b, e, s, sl) #else # define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(((PySliceObject*)o), l, b, e, s, sl) #endif #ifdef WITHOUT_THREADING # undef PyEval_SaveThread # define PyEval_SaveThread() (NULL) # undef PyEval_RestoreThread # define PyEval_RestoreThread(state) if (state); else {} # undef PyGILState_Ensure # define PyGILState_Ensure() (PyGILState_UNLOCKED) # undef PyGILState_Release # define PyGILState_Release(state) if (state); else {} # undef Py_UNBLOCK_THREADS # define Py_UNBLOCK_THREADS _save = NULL; # undef Py_BLOCK_THREADS # define Py_BLOCK_THREADS if (_save); else {} #endif #ifdef WITHOUT_THREADING # define ENABLE_THREADING 0 #else # define ENABLE_THREADING 1 #endif #if LIBXML_VERSION < 20704 /* FIXME: hack to make new error reporting compile in old libxml2 versions */ # define xmlStructuredErrorContext NULL # define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o) #endif /* schematron was added in libxml2 2.6.21 */ #ifdef LIBXML_SCHEMATRON_ENABLED # define ENABLE_SCHEMATRON 1 #else # define ENABLE_SCHEMATRON 0 # define XML_SCHEMATRON_OUT_QUIET 0 # define XML_SCHEMATRON_OUT_XML 0 # define XML_SCHEMATRON_OUT_ERROR 0 typedef void xmlSchematron; typedef void xmlSchematronParserCtxt; typedef void xmlSchematronValidCtxt; # define xmlSchematronNewDocParserCtxt(doc) NULL # define xmlSchematronNewParserCtxt(file) NULL # define xmlSchematronParse(ctxt) NULL # define xmlSchematronFreeParserCtxt(ctxt) # define xmlSchematronFree(schema) # define xmlSchematronNewValidCtxt(schema, options) NULL # define xmlSchematronValidateDoc(ctxt, doc) 0 # define xmlSchematronFreeValidCtxt(ctxt) # define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data) #endif #if LIBXML_VERSION < 20708 # define HTML_PARSE_NODEFDTD 4 #endif #if LIBXML_VERSION < 20900 # define XML_PARSE_BIG_LINES 4194304 #endif #include "libxml/tree.h" #ifndef LIBXML2_NEW_BUFFER typedef xmlBuffer xmlBuf; # define xmlBufContent(buf) xmlBufferContent(buf) # define xmlBufUse(buf) xmlBufferLength(buf) #endif /* libexslt 1.1.25+ support EXSLT functions in XPath */ #if LIBXSLT_VERSION < 10125 #define exsltDateXpathCtxtRegister(ctxt, prefix) #define exsltSetsXpathCtxtRegister(ctxt, prefix) #define exsltMathXpathCtxtRegister(ctxt, prefix) #define exsltStrXpathCtxtRegister(ctxt, prefix) #endif #define LXML_GET_XSLT_ENCODING(result_var, style) XSLT_GET_IMPORT_PTR(result_var, style, encoding) /* work around MSDEV 6.0 */ #if (_MSC_VER == 1200) && (WINVER < 0x0500) long _ftol( double ); //defined by VC6 C libs long _ftol2( double dblSource ) { return _ftol( dblSource ); } #endif #ifdef __GNUC__ /* Test for GCC > 2.95 */ #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) #define unlikely_condition(x) __builtin_expect((x), 0) #else /* __GNUC__ > 2 ... */ #define unlikely_condition(x) (x) #endif /* __GNUC__ > 2 ... */ #else /* __GNUC__ */ #define unlikely_condition(x) (x) #endif /* __GNUC__ */ #ifndef Py_TYPE #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif #define PY_NEW(T) \ (((PyTypeObject*)(T))->tp_new( \ (PyTypeObject*)(T), __pyx_empty_tuple, NULL)) #define _fqtypename(o) ((Py_TYPE(o))->tp_name) #define lxml_malloc(count, item_size) \ (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \ (PyMem_Malloc((count) * item_size))) #define lxml_realloc(mem, count, item_size) \ (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \ (PyMem_Realloc(mem, (count) * item_size))) #define lxml_free(mem) PyMem_Free(mem) #if PY_MAJOR_VERSION < 3 #define _isString(obj) (PyString_CheckExact(obj) || \ PyUnicode_CheckExact(obj) || \ PyType_IsSubtype(Py_TYPE(obj), &PyBaseString_Type)) #else /* builtin subtype type checks are almost as fast as exact checks in Py2.7+ * and Unicode is more common in Py3 */ #define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj)) #endif #define _isElement(c_node) \ (((c_node)->type == XML_ELEMENT_NODE) || \ ((c_node)->type == XML_COMMENT_NODE) || \ ((c_node)->type == XML_ENTITY_REF_NODE) || \ ((c_node)->type == XML_PI_NODE)) #define _isElementOrXInclude(c_node) \ (_isElement(c_node) || \ ((c_node)->type == XML_XINCLUDE_START) || \ ((c_node)->type == XML_XINCLUDE_END)) #define _getNs(c_node) \ (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href)) #include "string.h" static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) { xmlDoc *c_doc; void *context; *is_owned = 0; if (unlikely_condition(!PyCapsule_IsValid(capsule, (const char*)"libxml2:xmlDoc"))) { PyErr_SetString( PyExc_TypeError, "Not a valid capsule. The capsule argument must be a capsule object with name libxml2:xmlDoc"); return NULL; } c_doc = (xmlDoc*) PyCapsule_GetPointer(capsule, (const char*)"libxml2:xmlDoc"); if (unlikely_condition(!c_doc)) return NULL; if (unlikely_condition(c_doc->type != XML_DOCUMENT_NODE && c_doc->type != XML_HTML_DOCUMENT_NODE)) { PyErr_Format( PyExc_ValueError, "Illegal document provided: expected XML or HTML, found %d", (int)c_doc->type); return NULL; } context = PyCapsule_GetContext(capsule); if (unlikely_condition(!context && PyErr_Occurred())) return NULL; if (context && strcmp((const char*) context, "destructor:xmlFreeDoc") == 0) { /* take ownership by setting destructor to NULL */ if (PyCapsule_SetDestructor(capsule, NULL) == 0) { /* ownership transferred => invalidate capsule by clearing its name */ if (unlikely_condition(PyCapsule_SetName(capsule, NULL))) { /* this should never happen since everything above succeeded */ xmlFreeDoc(c_doc); return NULL; } *is_owned = 1; } } return c_doc; } /* Macro pair implementation of a depth first tree walker * * Calls the code block between the BEGIN and END macros for all elements * below c_tree_top (exclusively), starting at c_node (inclusively iff * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes * that match _isElement(), the normal variant will stop on every node * except text nodes. * * To traverse the node and all of its children and siblings in Pyrex, call * cdef xmlNode* some_node * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1) * # do something with some_node * END_FOR_EACH_ELEMENT_FROM(some_node) * * To traverse only the children and siblings of a node, call * cdef xmlNode* some_node * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0) * # do something with some_node * END_FOR_EACH_ELEMENT_FROM(some_node) * * To traverse only the children, do: * cdef xmlNode* some_node * some_node = parent_node.children * BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1) * # do something with some_node * END_FOR_EACH_ELEMENT_FROM(some_node) * * NOTE: 'some_node' MUST be a plain 'xmlNode*' ! * * NOTE: parent modification during the walk can divert the iterator, but * should not segfault ! */ #define _LX__ELEMENT_MATCH(c_node, only_elements) \ ((only_elements) ? (_isElement(c_node)) : 1) #define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \ while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \ c_node = c_node->next; #define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \ { \ /* walk through children first */ \ xmlNode* _lx__next = c_node->children; \ if (_lx__next != 0) { \ if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \ _lx__next = 0; \ } else { \ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ } \ } \ if ((_lx__next == 0) && (c_node != c_stop_node)) { \ /* try siblings */ \ _lx__next = c_node->next; \ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ /* back off through parents */ \ while (_lx__next == 0) { \ c_node = c_node->parent; \ if (c_node == 0) \ break; \ if (c_node == c_stop_node) \ break; \ if ((only_elements) && !_isElement(c_node)) \ break; \ /* we already traversed the parents -> siblings */ \ _lx__next = c_node->next; \ _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ } \ } \ c_node = _lx__next; \ } #define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \ { \ if (c_node != 0) { \ const xmlNode* _lx__tree_top = (c_tree_top); \ const int _lx__only_elements = (only_elements); \ /* make sure we start at an element */ \ if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \ /* we skip the node, so 'inclusive' is irrelevant */ \ if (c_node == _lx__tree_top) \ c_node = 0; /* nothing to traverse */ \ else { \ c_node = c_node->next; \ _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \ } \ } else if (! (inclusive)) { \ /* skip the first node */ \ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \ } \ \ /* now run the user code on the elements we find */ \ while (c_node != 0) { \ /* here goes the code to be run for each element */ #define _LX__END_FOR_EACH_FROM(c_node) \ _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \ } \ } \ } #define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1) #define END_FOR_EACH_ELEMENT_FROM(c_node) \ _LX__END_FOR_EACH_FROM(c_node) #define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \ _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0) #define END_FOR_EACH_FROM(c_node) \ _LX__END_FOR_EACH_FROM(c_node) #endif /* HAS_ETREE_DEFS_H */