Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6ecfc28
bpo-54873: Backported namespaces prefixes support for xml.sax.expatre…
ukarroum Apr 26, 2024
e9ac454
bpo-54873: Added News entry
ukarroum Apr 26, 2024
9c365c2
Merge branch 'main' into fix-issue-54873
ukarroum Jun 9, 2024
2fc666a
Merge branch 'main' into fix-issue-54873
ukarroum Apr 29, 2025
fe9d82f
Merge branch 'main' into fix-issue-54873
ukarroum Nov 2, 2025
be878cb
Moved test_namespace_prefix to test_sax.py
ukarroum Nov 2, 2025
0e3f870
test_sax: Replaced namespace tests with one test on qualified names
ukarroum Nov 2, 2025
c88e7ed
ExpatParser: Add namespacePrefixesHandling
ukarroum Nov 2, 2025
27cb273
namespace prefixes: add link to w3.org
ukarroum Nov 2, 2025
91112b9
Merge branch 'main' into fix-issue-54873
ukarroum Nov 24, 2025
d3cbe5f
Merge branch 'main' into fix-issue-54873
ukarroum Apr 17, 2026
1d343f1
Update imports in Lib/test/test_sax.py
ukarroum Apr 17, 2026
256a97b
Approuved readibility of unit test
ukarroum Apr 17, 2026
7c0da60
Update pyXML spelling in News.d
ukarroum Apr 17, 2026
e219450
Update Lib/xml/sax/expatreader.py
ukarroum Apr 17, 2026
34af031
Addressed PR comments
ukarroum Apr 17, 2026
8075f8b
Merge branch 'fix-issue-54873' of github.com:ukarroum/cpython into fi…
ukarroum Apr 17, 2026
2004846
Removed unused import
ukarroum Apr 17, 2026
c3e00e2
Update news entry
ukarroum Apr 17, 2026
d3b08ec
Update Doc/whatsnew/3.15.rst
ukarroum Apr 17, 2026
e8bcb30
Update news entry
ukarroum Apr 17, 2026
dfff704
Merge branch 'fix-issue-54873' of github.com:ukarroum/cpython into fi…
ukarroum Apr 17, 2026
7de4ce5
Add a check on namespace feature when enabling namespace prefixes
ukarroum Apr 17, 2026
08d659e
Add test for feature_namespace_prefixes enabled while feature_namespa…
ukarroum Apr 17, 2026
7cdc402
Update Lib/test/test_sax.py
ukarroum Apr 17, 2026
cd7de73
Update test
ukarroum Apr 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,13 @@ xml.parsers.expat
.. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack


xml.sax.expatreader
-------------------

* Add support for `namespace prefixes <https://www.w3.org/TR/xml-names/#dt-prefix>`_.
(Contributed by Yassir Karroum in :gh:`118317`.)


zlib
----

Expand Down
52 changes: 50 additions & 2 deletions Lib/test/test_sax.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# regression test for SAX 2.0

from xml.sax import make_parser, ContentHandler, \
SAXException, SAXReaderNotAvailable, SAXParseException
import unittest
Expand All @@ -13,7 +12,7 @@
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
from xml.sax.handler import (feature_namespaces, feature_external_ges,
LexicalHandler)
LexicalHandler, feature_namespace_prefixes)
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from xml import sax
from io import BytesIO, StringIO
Expand Down Expand Up @@ -1307,6 +1306,55 @@ def test_expat_locator_withinfo_nonascii(self):
self.assertEqual(parser.getSystemId(), fname)
self.assertEqual(parser.getPublicId(), None)

def test_qualified_names(self):

class Handler(ContentHandler):
def startElementNS(self, name, qname, attrs):
self.qname = qname

for xml_s, expected_qname in (
("<Q:E xmlns:Q='http://example.org/testuri'/>", "Q:E"),
("<E xmlns='http://example.org/testuri'/>", "E"),
("<E />", "E"),
):
parser = create_parser()
parser.setFeature(feature_namespaces, 1)
parser.setFeature(feature_namespace_prefixes, 1)

h = Handler()

parser.setContentHandler(h)
parser.parse(StringIO(xml_s))
self.assertEqual(h.qname, expected_qname)

def test_qualified_names_when_feature_not_set(self):

class Handler(ContentHandler):
def startElementNS(self, name, qname, attrs):
self.qname = qname

for xml_s in (
"<Q:E xmlns:Q='http://example.org/testuri'/>",
"<E xmlns='http://example.org/testuri'/>",
"<E />",
):
parser = create_parser()
parser.setFeature(feature_namespaces, 1)

h = Handler()

parser.setContentHandler(h)
parser.parse(StringIO(xml_s))

self.assertIsNone(h.qname)

def test_namespace_prefixed_enabled_when_namespace_is_not(self):

parser = create_parser()
with self.assertRaises(SAXException):
parser.setFeature(feature_namespace_prefixes, 1)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't leave 3 blank lines, only 2 is sufficient.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed



# ===========================================================================
#
Expand Down
20 changes: 14 additions & 6 deletions Lib/xml/sax/expatreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(self, namespaceHandling=0, bufsize=2**16-20):
self._entity_stack = []
self._external_ges = 0
self._interning = None
self._namespace_prefixes = 0

# XMLReader methods

Expand Down Expand Up @@ -124,8 +125,9 @@ def getFeature(self, name):
return self._namespaces
elif name == feature_string_interning:
return self._interning is not None
elif name in (feature_validation, feature_external_pes,
feature_namespace_prefixes):
elif name == feature_namespace_prefixes:
return self._namespace_prefixes
elif name in (feature_validation, feature_external_pes):
return 0
elif name == feature_external_ges:
return self._external_ges
Expand Down Expand Up @@ -154,9 +156,9 @@ def setFeature(self, name, state):
raise SAXNotSupportedException(
"expat does not read external parameter entities")
elif name == feature_namespace_prefixes:
if state:
raise SAXNotSupportedException(
"expat does not report namespace prefixes")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was it a limitation from the C extension module? was it a Expat version limitation?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a quick git blame, and looks like this specific check was added in this commit: 18476a3 (from 2002).
and in this commit Expat version used is 1.95 (which should support namespace prefixes), so I would say the limitation was probably just in the python wrapper.

if state and not self._namespaces:
raise SAXException(f"{feature_namespace_prefixes} requires {feature_namespaces} to be enabled")
self._namespace_prefixes = state
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ukarroum I wonder what the expected behaviors is here when feature_namespaces is currently disabled. Or more generally speaking: Do we know anything about the relation between these two features? Potential options seem to be:

  • Block enabling/disabling when their dependency would be violated.
  • Auto-enable/disable to respect dependency at all times
  • Keep the two fully separate but check for both rather than just self._namespace_prefixes where checking for being active?

Just thinking aloud.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, I think it will make sense to just enable feature_namespaces when feature_namespace_prefixes is also enabled (and also disable feature_namespace_prefixes if feature_namespaces is disabled).
I will update both set/get feature

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually one drawback I see for this is :

enable feature_namespace_prefixes
disable feature_namespaces
enable feature_namespaces

this will result in feature_namespace_prefixes being disabled which feels odd.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went with 1) instead.
I pushed a small change to check if namespace is enabled when we enable namespace prefixes.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ukarroum I like it. I think one small dedicated test for that would be great where namespaces are disabled and the exception is raise with the expected message. (Arguably, the success case is already covered by existing tests.)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added test

else:
raise SAXNotRecognizedException(
"Feature '%s' not recognized" % name)
Expand Down Expand Up @@ -345,11 +347,14 @@ def start_element_ns(self, name, attrs):
pair = name.split()
if len(pair) == 1:
# no namespace
elem_qname = name
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea whether this is correct or not here. Is there some specs that we could follow for the implementation?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you're referring specificaly to elem_qname, there is the spec: https://www.w3.org/TR/xml-names/#ns-qualnames.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also added another test to test the "else" branch.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@picnixz these lines are 1:1 from PyXML 0.8.4:

meld

pair = (None, name)
elif len(pair) == 3:
elem_qname = "%s:%s" % (pair[2], pair[1])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self: This is the key value provider in this pull request. pair[2] was previously not exposed.

pair = pair[0], pair[1]
else:
# default namespace
elem_qname = pair[1]
pair = tuple(pair)

newattrs = {}
Expand All @@ -372,7 +377,10 @@ def start_element_ns(self, name, attrs):
newattrs[apair] = value
qnames[apair] = qname

self._cont_handler.startElementNS(pair, None,
if not self._namespace_prefixes:
elem_qname = None

self._cont_handler.startElementNS(pair, elem_qname,
Comment thread
ukarroum marked this conversation as resolved.
AttributesNSImpl(newattrs, qnames))

def end_element_ns(self, name):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Backported namespaces prefixes support for xml.sax.expatreader from PyXML
(0.8.4)
Loading