diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index ed0b0b2735b5c3..76edbcb0f865ae 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -371,6 +371,64 @@ include a `salt `_. .. versionadded:: 3.6 +.. _hashlib-saslprep: + +String preparation +------------------ + +.. function:: saslprep(data, *, allow_unassigned_code_points) + + Prepare a Unicode string according to :rfc:`4013` (SASLprep), which is a + profile of the :rfc:`3454` *stringprep* algorithm. SASLprep is used to + normalise usernames and passwords before they are transmitted in + authentication protocols such as SASL (e.g. SMTP, IMAP, LDAP). + + *data* may be a :class:`str` or :class:`bytes`. Byte strings are returned + unchanged. Unicode strings are processed in four steps: + + 1. **Map** — non-ASCII space characters (table C.1.2) are replaced with + ``U+0020``; characters commonly mapped to nothing (table B.1) are + removed. + 2. **Normalise** — the string is normalised using Unicode NFKC. + 3. **Prohibit** — a :exc:`ValueError` is raised if the string contains + any character from the RFC 4013 prohibited-output tables (control + characters, private-use characters, non-characters, and others). + 4. **Bidi check** — a :exc:`ValueError` is raised if the string mixes + right-to-left and left-to-right text in a way that violates + :rfc:`3454` section 6. + + *allow_unassigned_code_points* must be supplied as a keyword argument. + Pass ``False`` for *stored strings* such as passwords stored in a + database (unassigned code points are prohibited, per :rfc:`3454` + section 7). Pass ``True`` for *queries* such as a password typed at a + prompt (unassigned code points are permitted). Always pass this + explicitly; there is no default. + + Returns the prepared :class:`str`, or the original *data* unchanged if + it is a :class:`bytes` object. + + >>> from hashlib import saslprep + >>> saslprep("I\u00ADX", allow_unassigned_code_points=False) # soft hyphen removed + 'IX' + >>> saslprep("\u2168", allow_unassigned_code_points=False) # Roman numeral IX + 'IX' + >>> saslprep(b"user", allow_unassigned_code_points=False) # bytes unchanged + b'user' + + .. versionadded:: 3.15 + + .. seealso:: + + :rfc:`4013` + SASLprep: Stringprep Profile for User Names and Passwords. + + :rfc:`3454` + Preparation of Internationalized Strings ("stringprep"). + + :mod:`stringprep` + The underlying Unicode character tables used by this function. + + .. _hashlib-blake2: BLAKE2 diff --git a/Lib/_saslprep.py b/Lib/_saslprep.py new file mode 100644 index 00000000000000..727612fbbf324b --- /dev/null +++ b/Lib/_saslprep.py @@ -0,0 +1,101 @@ +# Copyright 2016-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implementation of RFC 4013 SASLprep.""" + +import stringprep +import unicodedata +from collections.abc import Callable + +# RFC 4013 section 2.3 prohibited output. +# A strict reading of RFC 4013 requires table C.1.2 here, but characters +# from it are mapped to SPACE in the Map step, so they won't appear here. +_PROHIBITED: tuple[Callable[[str], bool], ...] = ( + stringprep.in_table_c12, + stringprep.in_table_c21_c22, + stringprep.in_table_c3, + stringprep.in_table_c4, + stringprep.in_table_c5, + stringprep.in_table_c6, + stringprep.in_table_c7, + stringprep.in_table_c8, + stringprep.in_table_c9, +) + + +def saslprep(data: bytes | str, *, allow_unassigned_code_points: bool) -> bytes | str: + """Prepare a string per RFC 4013 SASLprep. + + :Parameters: + - `data`: The string to SASLprep. Unicode strings (:class:`str`) are + normalized; byte strings (:class:`bytes`) are returned unchanged. + - `allow_unassigned_code_points`: Per RFC 3454 and RFC 4013, pass + ``False`` for stored strings (unassigned code points are prohibited) + and ``True`` for queries (unassigned code points are permitted). + Always pass this explicitly; there is no default. + + :Returns: + The SASLprep'd version of `data`. + + :Raises ValueError: + If `data` contains a prohibited character or fails the bidirectional + string check. + """ + if not isinstance(data, str): + return data + + prohibited: tuple[Callable[[str], bool], ...] + if allow_unassigned_code_points: + prohibited = _PROHIBITED + else: + prohibited = _PROHIBITED + (stringprep.in_table_a1,) + + # RFC 3454 section 2, step 1 - Map + # RFC 4013 section 2.1: map non-ASCII space characters to SPACE (U+0020), + # and map commonly-mapped-to-nothing characters to nothing. + in_table_c12 = stringprep.in_table_c12 + in_table_b1 = stringprep.in_table_b1 + data = "".join( + "\u0020" if in_table_c12(c) else c + for c in data + if not in_table_b1(c) + ) + + # RFC 3454 section 2, step 2 - Normalize + # RFC 4013 section 2.2: use Unicode NFKC normalization. + data = unicodedata.ucd_3_2_0.normalize("NFKC", data) + + if not data: + return data + + in_table_d1 = stringprep.in_table_d1 + if in_table_d1(data[0]): + if not in_table_d1(data[-1]): + # RFC 3454 section 6, rule 3: if a string contains any RandALCat + # character, the first and last characters MUST be RandALCat. + raise ValueError("SASLprep: failed bidirectional check") + # RFC 3454 section 6, rule 2: if a string contains any RandALCat + # character, it MUST NOT contain any LCat character. + prohibited = prohibited + (stringprep.in_table_d2,) + else: + # Rule 3 (converse): if the first character is not RandALCat, + # no other character may be RandALCat. + prohibited = prohibited + (in_table_d1,) + + # RFC 3454 section 2, steps 3 and 4 - Prohibit and check bidi. + for char in data: + if any(in_table(char) for in_table in prohibited): + raise ValueError("SASLprep: failed prohibited character check") + + return data diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 6c73eb9f31f8e4..2c14a9c0158ba3 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -65,7 +65,8 @@ algorithms_available = set(__always_supported) __all__ = __always_supported + ('new', 'algorithms_guaranteed', - 'algorithms_available', 'file_digest') + 'algorithms_available', 'file_digest', + 'saslprep') __builtin_constructor_cache = {} @@ -197,6 +198,8 @@ def __hash_new(name, *args, **kwargs): new = __py_new __get_hash = __get_builtin_constructor +from _saslprep import saslprep + try: # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA from _hashlib import pbkdf2_hmac diff --git a/Lib/smtplib.py b/Lib/smtplib.py index 4cfc2338d99c67..e34a77d4e49852 100644 --- a/Lib/smtplib.py +++ b/Lib/smtplib.py @@ -51,6 +51,7 @@ import datetime import sys from email.base64mime import body_encode as encode_base64 +from hashlib import saslprep __all__ = ["SMTPException", "SMTPNotSupportedError", "SMTPServerDisconnected", "SMTPResponseException", "SMTPSenderRefused", "SMTPRecipientsRefused", "SMTPDataError", @@ -645,7 +646,7 @@ def auth(self, mechanism, authobject, *, initial_response_ok=True): mechanism = mechanism.upper() initial_response = (authobject() if initial_response_ok else None) if initial_response is not None: - response = encode_base64(initial_response.encode('ascii'), eol='') + response = encode_base64(initial_response.encode('utf-8'), eol='') (code, resp) = self.docmd("AUTH", mechanism + " " + response) self._auth_challenge_count = 1 else: @@ -656,7 +657,7 @@ def auth(self, mechanism, authobject, *, initial_response_ok=True): self._auth_challenge_count += 1 challenge = base64.decodebytes(resp) response = encode_base64( - authobject(challenge).encode('ascii'), eol='') + authobject(challenge).encode('utf-8'), eol='') (code, resp) = self.docmd(response) # If server keeps sending challenges, something is wrong. if self._auth_challenge_count > _MAXCHALLENGE: @@ -676,22 +677,25 @@ def auth_cram_md5(self, challenge=None): return None if not _have_cram_md5_support: raise SMTPException("CRAM-MD5 is not supported") - password = self.password.encode('ascii') + password = saslprep(self.password, allow_unassigned_code_points=False).encode('utf-8') authcode = hmac.HMAC(password, challenge, 'md5') - return f"{self.user} {authcode.hexdigest()}" + return f"{saslprep(self.user, allow_unassigned_code_points=False)} {authcode.hexdigest()}" def auth_plain(self, challenge=None): """ Authobject to use with PLAIN authentication. Requires self.user and self.password to be set.""" - return "\0%s\0%s" % (self.user, self.password) + return "\0%s\0%s" % ( + saslprep(self.user, allow_unassigned_code_points=False), + saslprep(self.password, allow_unassigned_code_points=False), + ) def auth_login(self, challenge=None): """ Authobject to use with LOGIN authentication. Requires self.user and self.password to be set.""" if challenge is None or self._auth_challenge_count < 2: - return self.user + return saslprep(self.user, allow_unassigned_code_points=False) else: - return self.password + return saslprep(self.password, allow_unassigned_code_points=False) def login(self, user, password, *, initial_response_ok=True): """Log in on an SMTP server that requires authentication. diff --git a/Lib/test/test_smtplib.py b/Lib/test/test_smtplib.py index b8aac8c20202a2..739f6273a56328 100644 --- a/Lib/test/test_smtplib.py +++ b/Lib/test/test_smtplib.py @@ -3,6 +3,7 @@ from email.message import EmailMessage from email.base64mime import body_encode as encode_base64 import email.utils +from hashlib import saslprep import hashlib import hmac import socket @@ -808,6 +809,13 @@ def testLineTooLong(self): } sim_auth = ('Mr.A@somewhere.com', 'somepassword') +sim_auths = { + 'Mr.A@somewhere.com': 'somepassword', + # Unicode username and password (Devanagari). + '\u092D\u093E\u0930\u0924@\u092D\u093E\u0930\u0924': '\u092D\u093E\u0930\u0924@', + # Password that SASLprep normalizes: Roman numeral IX (U+2168) -> 'IX'. + 'Mr.C@somewhere.com': 'IX', +} sim_cram_md5_challenge = ('PENCeUxFREJoU0NnbmhNWitOMjNGNn' 'dAZWx3b29kLmlubm9zb2Z0LmNvbT4=') sim_lists = {'list-1':['Mr.A@somewhere.com','Mrs.C@somewhereesle.com'], @@ -897,7 +905,11 @@ def _auth_plain(self, arg=None): self.push('535 Splitting response {!r} into user and password' ' failed: {}'.format(logpass, e)) return - self._authenticated(user, password == sim_auth[1]) + stored = sim_auths.get(user, '') + self._authenticated( + user, + password == saslprep(stored, allow_unassigned_code_points=False), + ) def _auth_login(self, arg=None): if arg is None: @@ -909,7 +921,11 @@ def _auth_login(self, arg=None): self.push('334 UGFzc3dvcmQ6') else: password = self._decode_base64(arg) - self._authenticated(self._auth_login_user, password == sim_auth[1]) + stored = sim_auths.get(self._auth_login_user, '') + self._authenticated( + self._auth_login_user, + password == saslprep(stored, allow_unassigned_code_points=False), + ) del self._auth_login_user def _auth_buggy(self, arg=None): @@ -928,8 +944,9 @@ def _auth_cram_md5(self, arg=None): self.push('535 Splitting response {!r} into user and password ' 'failed: {}'.format(logpass, e)) return - pwd = sim_auth[1].encode('ascii') - msg = self._decode_base64(sim_cram_md5_challenge).encode('ascii') + stored = sim_auths.get(user, '') + pwd = saslprep(stored, allow_unassigned_code_points=False).encode('utf-8') + msg = self._decode_base64(sim_cram_md5_challenge).encode('utf-8') try: valid_hashed_pass = hmac.HMAC(pwd, msg, 'md5').hexdigest() except ValueError: @@ -1122,21 +1139,36 @@ def testEXPN(self): self.assertEqual(smtp.expn(u), expected_unknown) smtp.quit() + def helpAUTH_x(self, feature): + """Helper: test all sim_auths credentials against the given AUTH feature.""" + self.serv.add_feature(feature) + for username, password in sim_auths.items(): + with self.subTest(username=username): + smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', + timeout=support.LOOPBACK_TIMEOUT) + resp = smtp.login(username, password) + self.assertEqual(resp, (235, b'Authentication Succeeded')) + smtp.close() + with self.subTest(username=username, wrong_password=True): + smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', + timeout=support.LOOPBACK_TIMEOUT) + with self.assertRaises(smtplib.SMTPAuthenticationError): + smtp.login(username, "No" + password) + smtp.close() + with self.subTest(username=username, saslprep_equivalent=True): + # A soft-hyphen (U+00AD) SASLprep-normalizes to nothing, + # so the password with a trailing soft-hyphen should succeed. + smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', + timeout=support.LOOPBACK_TIMEOUT) + resp = smtp.login(username, password + "\u00AD") + self.assertEqual(resp, (235, b'Authentication Succeeded')) + smtp.close() + def testAUTH_PLAIN(self): - self.serv.add_feature("AUTH PLAIN") - smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', - timeout=support.LOOPBACK_TIMEOUT) - resp = smtp.login(sim_auth[0], sim_auth[1]) - self.assertEqual(resp, (235, b'Authentication Succeeded')) - smtp.close() + self.helpAUTH_x("AUTH PLAIN") def testAUTH_LOGIN(self): - self.serv.add_feature("AUTH LOGIN") - smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', - timeout=support.LOOPBACK_TIMEOUT) - resp = smtp.login(sim_auth[0], sim_auth[1]) - self.assertEqual(resp, (235, b'Authentication Succeeded')) - smtp.close() + self.helpAUTH_x("AUTH LOGIN") def testAUTH_LOGIN_initial_response_ok(self): self.serv.add_feature("AUTH LOGIN") @@ -1178,12 +1210,7 @@ def auth_buggy(challenge=None): @hashlib_helper.requires_hashdigest('md5', openssl=True) def testAUTH_CRAM_MD5(self): - self.serv.add_feature("AUTH CRAM-MD5") - smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', - timeout=support.LOOPBACK_TIMEOUT) - resp = smtp.login(sim_auth[0], sim_auth[1]) - self.assertEqual(resp, (235, b'Authentication Succeeded')) - smtp.close() + self.helpAUTH_x("AUTH CRAM-MD5") @hashlib_helper.block_algorithm('md5') @mock.patch("smtplib._have_cram_md5_support", False) @@ -1221,12 +1248,7 @@ def testAUTH_CRAM_MD5_blocked_and_fallback(self): @hashlib_helper.requires_hashdigest('md5', openssl=True) def testAUTH_multiple(self): # Test that multiple authentication methods are tried. - self.serv.add_feature("AUTH BOGUS PLAIN LOGIN CRAM-MD5") - smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', - timeout=support.LOOPBACK_TIMEOUT) - resp = smtp.login(sim_auth[0], sim_auth[1]) - self.assertEqual(resp, (235, b'Authentication Succeeded')) - smtp.close() + self.helpAUTH_x("AUTH BOGUS PLAIN LOGIN CRAM-MD5") def test_auth_function(self): supported = {'PLAIN', 'LOGIN'} diff --git a/Misc/NEWS.d/next/Library/2026-04-07-00-00-00.gh-issue-73936.wrxC5G.rst b/Misc/NEWS.d/next/Library/2026-04-07-00-00-00.gh-issue-73936.wrxC5G.rst new file mode 100644 index 00000000000000..4393036d5a94d7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-07-00-00-00.gh-issue-73936.wrxC5G.rst @@ -0,0 +1,4 @@ +Add :func:`hashlib.saslprep`, an implementation of the RFC 4013 SASLprep +string-preparation algorithm, and use it in :mod:`smtplib` to support Unicode +usernames and passwords in SMTP authentication. SASLprep implementation +contributed by MongoDB, Inc. diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index 8937e666bbbdd5..9dd41879d9991c 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -73,6 +73,7 @@ static const char* _Py_stdlib_module_names[] = { "_queue", "_random", "_remote_debugging", +"_saslprep", "_scproxy", "_sha1", "_sha2",