diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 7ea7c901eceb19..dcf8372f374a83 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1383,6 +1383,13 @@ xml.parsers.expat .. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack +xml.sax.expatreader +------------------- + +* Add support for `namespace prefixes `_. + (Contributed by Yassir Karroum in :gh:`118317`.) + + zlib ---- diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 29babd7bf6996a..3342bb121f5426 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1,5 +1,4 @@ # regression test for SAX 2.0 - from xml.sax import make_parser, ContentHandler, \ SAXException, SAXReaderNotAvailable, SAXParseException import unittest @@ -13,7 +12,7 @@ XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser from xml.sax.handler import (feature_namespaces, feature_external_ges, - LexicalHandler) + LexicalHandler, feature_namespace_prefixes) from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from xml import sax from io import BytesIO, StringIO @@ -1307,6 +1306,55 @@ def test_expat_locator_withinfo_nonascii(self): self.assertEqual(parser.getSystemId(), fname) self.assertEqual(parser.getPublicId(), None) + def test_qualified_names(self): + + class Handler(ContentHandler): + def startElementNS(self, name, qname, attrs): + self.qname = qname + + for xml_s, expected_qname in ( + ("", "Q:E"), + ("", "E"), + ("", "E"), + ): + parser = create_parser() + parser.setFeature(feature_namespaces, 1) + parser.setFeature(feature_namespace_prefixes, 1) + + h = Handler() + + parser.setContentHandler(h) + parser.parse(StringIO(xml_s)) + self.assertEqual(h.qname, expected_qname) + + def test_qualified_names_when_feature_not_set(self): + + class Handler(ContentHandler): + def startElementNS(self, name, qname, attrs): + self.qname = qname + + for xml_s in ( + "", + "", + "", + ): + parser = create_parser() + parser.setFeature(feature_namespaces, 1) + + h = Handler() + + parser.setContentHandler(h) + parser.parse(StringIO(xml_s)) + + self.assertIsNone(h.qname) + + def test_namespace_prefixed_enabled_when_namespace_is_not(self): + + parser = create_parser() + with self.assertRaises(SAXException): + parser.setFeature(feature_namespace_prefixes, 1) + + # =========================================================================== # diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 37b1add2848487..b3994e4acacdbc 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -89,6 +89,7 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20): self._entity_stack = [] self._external_ges = 0 self._interning = None + self._namespace_prefixes = 0 # XMLReader methods @@ -124,8 +125,9 @@ def getFeature(self, name): return self._namespaces elif name == feature_string_interning: return self._interning is not None - elif name in (feature_validation, feature_external_pes, - feature_namespace_prefixes): + elif name == feature_namespace_prefixes: + return self._namespace_prefixes + elif name in (feature_validation, feature_external_pes): return 0 elif name == feature_external_ges: return self._external_ges @@ -154,9 +156,9 @@ def setFeature(self, name, state): raise SAXNotSupportedException( "expat does not read external parameter entities") elif name == feature_namespace_prefixes: - if state: - raise SAXNotSupportedException( - "expat does not report namespace prefixes") + if state and not self._namespaces: + raise SAXException(f"{feature_namespace_prefixes} requires {feature_namespaces} to be enabled") + self._namespace_prefixes = state else: raise SAXNotRecognizedException( "Feature '%s' not recognized" % name) @@ -345,11 +347,14 @@ def start_element_ns(self, name, attrs): pair = name.split() if len(pair) == 1: # no namespace + elem_qname = name pair = (None, name) elif len(pair) == 3: + elem_qname = "%s:%s" % (pair[2], pair[1]) pair = pair[0], pair[1] else: # default namespace + elem_qname = pair[1] pair = tuple(pair) newattrs = {} @@ -372,7 +377,10 @@ def start_element_ns(self, name, attrs): newattrs[apair] = value qnames[apair] = qname - self._cont_handler.startElementNS(pair, None, + if not self._namespace_prefixes: + elem_qname = None + + self._cont_handler.startElementNS(pair, elem_qname, AttributesNSImpl(newattrs, qnames)) def end_element_ns(self, name): diff --git a/Misc/NEWS.d/next/Library/2024-04-26-14-21-04.gh-issue-54873.vf2bfp.rst b/Misc/NEWS.d/next/Library/2024-04-26-14-21-04.gh-issue-54873.vf2bfp.rst new file mode 100644 index 00000000000000..cabf2130ebc69b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-26-14-21-04.gh-issue-54873.vf2bfp.rst @@ -0,0 +1,2 @@ +Backported namespaces prefixes support for xml.sax.expatreader from PyXML +(0.8.4)