diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index a47d31465b19f1..f9ffaa9a94aacc 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -41,6 +41,21 @@ The XML handling submodules are: * :mod:`xml.sax`: SAX2 base classes and convenience functions * :mod:`xml.parsers.expat`: the Expat parser binding +This module also defines utility functions. + +.. function:: is_valid_name(name) + + Return ``True`` if the string is a valid element or attribute name, + ``False`` otherwise. + + Almost all characters are permitted in names, except control characters and + those which either are or reasonably could be used as delimiters. + Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/", + "!", "?", and "=" are forbidden. + The name cannot start with a digit or a character like "-", ".", and "·". + + ..versionadded:: next + .. _xml-security: .. _xml-vulnerabilities: diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index b63e7a4790e9af..91b53570f5420f 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1530,6 +1530,14 @@ wave (Contributed by Lionel Koenig and Michiel W. Beijen in :gh:`60729`.) +xml +--- + +* Add the :func:`xml.is_valid_name` function, which allows to check + whether a string can be used as an element or attribute name in XML. + (Contributed by Serhiy Storchaka in :gh:`139489`.) + + xml.parsers.expat ----------------- diff --git a/Lib/test/test_xml.py b/Lib/test/test_xml.py new file mode 100644 index 00000000000000..fd3633e43982d7 --- /dev/null +++ b/Lib/test/test_xml.py @@ -0,0 +1,27 @@ +import xml +import unittest + + +class TestUtils(unittest.TestCase): + + def test_is_valid_name(self): + is_valid_name = xml.is_valid_name + self.assertFalse(is_valid_name('')) + self.assertTrue(is_valid_name('name')) + self.assertTrue(is_valid_name('NAME')) + self.assertTrue(is_valid_name('name0:-._·')) + self.assertTrue(is_valid_name('_')) + self.assertTrue(is_valid_name(':')) + self.assertTrue(is_valid_name('Ñàḿĕ')) + self.assertTrue(is_valid_name('\U000EFFFF')) + self.assertFalse(is_valid_name('0')) + self.assertFalse(is_valid_name('-')) + self.assertFalse(is_valid_name('.')) + self.assertFalse(is_valid_name('·')) + self.assertFalse(is_valid_name('na me')) + for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000': + self.assertFalse(is_valid_name('name' + c)) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/xml/__init__.py b/Lib/xml/__init__.py index bf6d8ddfd04c93..002d6d3e0e8267 100644 --- a/Lib/xml/__init__.py +++ b/Lib/xml/__init__.py @@ -16,5 +16,6 @@ """ +from .utils import * -__all__ = ["dom", "parsers", "sax", "etree"] +__all__ = ["dom", "parsers", "sax", "etree", "is_valid_name"] diff --git a/Lib/xml/utils.py b/Lib/xml/utils.py new file mode 100644 index 00000000000000..7825fe917bce33 --- /dev/null +++ b/Lib/xml/utils.py @@ -0,0 +1,25 @@ +lazy import re + + +def is_valid_name(name): + """Test whether a string is a valid element or attribute name.""" + # https://www.w3.org/TR/xml/#NT-Name + return re.fullmatch( + # NameStartChar + '[' + ':A-Z_a-z' + '\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF' + '\u200C\u200D' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']' + # NameChar + '[' + r'\-.0-9:A-Z_a-z' + '\xB7' + '\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF' + '\u200C\u200D\u203F\u2040' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']*+', + name) is not None diff --git a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst new file mode 100644 index 00000000000000..05edefda053a08 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst @@ -0,0 +1,2 @@ +Add the :func:`xml.is_valid_name` function, which allows to check +whether a string can be used as an element or attribute name in XML.