Skip to content

Commit c7828c9

Browse files
authored
Add docstrings and comments to functions handling the parser context (GH-449)
1 parent 7131631 commit c7828c9

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

src/lxml/parser.pxi

+23
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,9 @@ cdef class _ParserContext(_ResolverContext):
565565
return context
566566

567567
cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
568+
"""
569+
Connects the libxml2-level context to the lxml-level parser context.
570+
"""
568571
self._c_ctxt = c_ctxt
569572
c_ctxt._private = <void*>self
570573

@@ -589,6 +592,12 @@ cdef class _ParserContext(_ResolverContext):
589592
raise ParserError, "parser locking failed"
590593
self._error_log.clear()
591594
self._doc = None
595+
# Connect the lxml error log with libxml2's error handling. In the case of parsing
596+
# HTML, ctxt->sax is not set to null, so this always works. The libxml2 function
597+
# that does this is htmlInitParserCtxt in HTMLparser.c. For HTML (and possibly XML
598+
# too), libxml2's SAX's serror is set to be the place where errors are sent when
599+
# schannel is set to ctxt->sax->serror in xmlCtxtErrMemory in libxml2's
600+
# parserInternals.c.
592601
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
593602
self._c_ctxt.sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError
594603
self._orig_loader = _register_document_loader() if set_document_loader else NULL
@@ -634,6 +643,9 @@ cdef _initParserContext(_ParserContext context,
634643
context._initParserContext(c_ctxt)
635644

636645
cdef void _forwardParserError(xmlparser.xmlParserCtxt* _parser_context, const xmlerror.xmlError* error) noexcept with gil:
646+
"""
647+
Add an error created by libxml2 to the lxml-level error_log.
648+
"""
637649
(<_ParserContext>_parser_context._private)._error_log._receive(error)
638650

639651
cdef void _receiveParserError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
@@ -679,6 +691,8 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
679691
xmlparser.xmlParserCtxt* c_ctxt,
680692
xmlDoc* result, filename,
681693
bint recover, bint free_doc) except NULL:
694+
# The C-level argument xmlDoc* result is passed in as NULL if the parser was not able
695+
# to parse the document.
682696
cdef bint well_formed
683697
if result is not NULL:
684698
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
@@ -690,6 +704,9 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
690704
c_ctxt.myDoc = NULL
691705

692706
if result is not NULL:
707+
# "wellFormed" in libxml2 is 0 if the parser found fatal errors. It still returns a
708+
# parse result document if 'recover=True'. Here, we determine if we can present
709+
# the document to the user or consider it incorrect or broken enough to raise an error.
693710
if (context._validator is not None and
694711
not context._validator.isvalid()):
695712
well_formed = 0 # actually not 'valid', but anyway ...
@@ -893,6 +910,9 @@ cdef class _BaseParser:
893910
return self._push_parser_context
894911

895912
cdef _ParserContext _createContext(self, target, events_to_collect):
913+
"""
914+
This method creates and configures the lxml-level parser.
915+
"""
896916
cdef _SaxParserContext sax_context
897917
if target is not None:
898918
sax_context = _TargetParserContext(self)
@@ -939,6 +959,9 @@ cdef class _BaseParser:
939959
return 0
940960

941961
cdef xmlparser.xmlParserCtxt* _newParserCtxt(self) except NULL:
962+
"""
963+
Create and initialise a libxml2-level parser context.
964+
"""
942965
cdef xmlparser.xmlParserCtxt* c_ctxt
943966
if self._for_html:
944967
c_ctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5)

0 commit comments

Comments
 (0)