@@ -565,6 +565,9 @@ cdef class _ParserContext(_ResolverContext):
565
565
return context
566
566
567
567
cdef void _initParserContext(self , xmlparser.xmlParserCtxt* c_ctxt) noexcept:
568
+ """
569
+ Connects the libxml2-level context to the lxml-level parser context.
570
+ """
568
571
self ._c_ctxt = c_ctxt
569
572
c_ctxt._private = < void * > self
570
573
@@ -589,6 +592,12 @@ cdef class _ParserContext(_ResolverContext):
589
592
raise ParserError, " parser locking failed"
590
593
self ._error_log.clear()
591
594
self ._doc = None
595
+ # Connect the lxml error log with libxml2's error handling. In the case of parsing
596
+ # HTML, ctxt->sax is not set to null, so this always works. The libxml2 function
597
+ # that does this is htmlInitParserCtxt in HTMLparser.c. For HTML (and possibly XML
598
+ # too), libxml2's SAX's serror is set to be the place where errors are sent when
599
+ # schannel is set to ctxt->sax->serror in xmlCtxtErrMemory in libxml2's
600
+ # parserInternals.c.
592
601
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
593
602
self ._c_ctxt.sax.serror = < xmlerror.xmlStructuredErrorFunc> _receiveParserError
594
603
self ._orig_loader = _register_document_loader() if set_document_loader else NULL
@@ -634,6 +643,9 @@ cdef _initParserContext(_ParserContext context,
634
643
context._initParserContext(c_ctxt)
635
644
636
645
cdef void _forwardParserError(xmlparser.xmlParserCtxt* _parser_context, const xmlerror.xmlError* error) noexcept with gil:
646
+ """
647
+ Add an error created by libxml2 to the lxml-level error_log.
648
+ """
637
649
(< _ParserContext> _parser_context._private)._error_log._receive(error)
638
650
639
651
cdef void _receiveParserError(void * c_context, const xmlerror.xmlError* error) noexcept nogil:
@@ -679,6 +691,8 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
679
691
xmlparser.xmlParserCtxt* c_ctxt,
680
692
xmlDoc* result, filename,
681
693
bint recover, bint free_doc) except NULL :
694
+ # The C-level argument xmlDoc* result is passed in as NULL if the parser was not able
695
+ # to parse the document.
682
696
cdef bint well_formed
683
697
if result is not NULL :
684
698
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
@@ -690,6 +704,9 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
690
704
c_ctxt.myDoc = NULL
691
705
692
706
if result is not NULL :
707
+ # "wellFormed" in libxml2 is 0 if the parser found fatal errors. It still returns a
708
+ # parse result document if 'recover=True'. Here, we determine if we can present
709
+ # the document to the user or consider it incorrect or broken enough to raise an error.
693
710
if (context._validator is not None and
694
711
not context._validator.isvalid()):
695
712
well_formed = 0 # actually not 'valid', but anyway ...
@@ -893,6 +910,9 @@ cdef class _BaseParser:
893
910
return self ._push_parser_context
894
911
895
912
cdef _ParserContext _createContext(self , target, events_to_collect):
913
+ """
914
+ This method creates and configures the lxml-level parser.
915
+ """
896
916
cdef _SaxParserContext sax_context
897
917
if target is not None :
898
918
sax_context = _TargetParserContext(self )
@@ -939,6 +959,9 @@ cdef class _BaseParser:
939
959
return 0
940
960
941
961
cdef xmlparser.xmlParserCtxt* _newParserCtxt(self ) except NULL :
962
+ """
963
+ Create and initialise a libxml2-level parser context.
964
+ """
942
965
cdef xmlparser.xmlParserCtxt* c_ctxt
943
966
if self ._for_html:
944
967
c_ctxt = htmlparser.htmlCreateMemoryParserCtxt(' dummy' , 5 )
0 commit comments