Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions Doc/library/hashlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,64 @@ include a `salt <https://en.wikipedia.org/wiki/Salt_%28cryptography%29>`_.
.. versionadded:: 3.6


.. _hashlib-saslprep:

String preparation
------------------

.. function:: saslprep(data, *, allow_unassigned_code_points)

Prepare a Unicode string according to :rfc:`4013` (SASLprep), which is a
profile of the :rfc:`3454` *stringprep* algorithm. SASLprep is used to
normalise usernames and passwords before they are transmitted in
authentication protocols such as SASL (e.g. SMTP, IMAP, LDAP).

*data* may be a :class:`str` or :class:`bytes`. Byte strings are returned
unchanged. Unicode strings are processed in four steps:

1. **Map** — non-ASCII space characters (table C.1.2) are replaced with
``U+0020``; characters commonly mapped to nothing (table B.1) are
removed.
2. **Normalise** — the string is normalised using Unicode NFKC.
3. **Prohibit** — a :exc:`ValueError` is raised if the string contains
any character from the RFC 4013 prohibited-output tables (control
characters, private-use characters, non-characters, and others).
4. **Bidi check** — a :exc:`ValueError` is raised if the string mixes
right-to-left and left-to-right text in a way that violates
:rfc:`3454` section 6.

*allow_unassigned_code_points* must be supplied as a keyword argument.
Pass ``False`` for *stored strings* such as passwords stored in a
database (unassigned code points are prohibited, per :rfc:`3454`
section 7). Pass ``True`` for *queries* such as a password typed at a
prompt (unassigned code points are permitted). Always pass this
explicitly; there is no default.

Returns the prepared :class:`str`, or the original *data* unchanged if
it is a :class:`bytes` object.

>>> from hashlib import saslprep
>>> saslprep("I\u00ADX", allow_unassigned_code_points=False) # soft hyphen removed
'IX'
>>> saslprep("\u2168", allow_unassigned_code_points=False) # Roman numeral IX
'IX'
>>> saslprep(b"user", allow_unassigned_code_points=False) # bytes unchanged
b'user'

.. versionadded:: 3.15

.. seealso::

:rfc:`4013`
SASLprep: Stringprep Profile for User Names and Passwords.

:rfc:`3454`
Preparation of Internationalized Strings ("stringprep").

:mod:`stringprep`
The underlying Unicode character tables used by this function.


.. _hashlib-blake2:

BLAKE2
Expand Down
101 changes: 101 additions & 0 deletions Lib/_saslprep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright 2016-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Implementation of RFC 4013 SASLprep."""

import stringprep
import unicodedata
from collections.abc import Callable

# RFC 4013 section 2.3 prohibited output.
# A strict reading of RFC 4013 requires table C.1.2 here, but characters
# from it are mapped to SPACE in the Map step, so they won't appear here.
_PROHIBITED: tuple[Callable[[str], bool], ...] = (
stringprep.in_table_c12,
stringprep.in_table_c21_c22,
stringprep.in_table_c3,
stringprep.in_table_c4,
stringprep.in_table_c5,
stringprep.in_table_c6,
stringprep.in_table_c7,
stringprep.in_table_c8,
stringprep.in_table_c9,
)


def saslprep(data: bytes | str, *, allow_unassigned_code_points: bool) -> bytes | str:
"""Prepare a string per RFC 4013 SASLprep.

:Parameters:
- `data`: The string to SASLprep. Unicode strings (:class:`str`) are
normalized; byte strings (:class:`bytes`) are returned unchanged.
- `allow_unassigned_code_points`: Per RFC 3454 and RFC 4013, pass
``False`` for stored strings (unassigned code points are prohibited)
and ``True`` for queries (unassigned code points are permitted).
Always pass this explicitly; there is no default.

:Returns:
The SASLprep'd version of `data`.

:Raises ValueError:
If `data` contains a prohibited character or fails the bidirectional
string check.
"""
if not isinstance(data, str):
return data

prohibited: tuple[Callable[[str], bool], ...]
if allow_unassigned_code_points:
prohibited = _PROHIBITED
else:
prohibited = _PROHIBITED + (stringprep.in_table_a1,)

# RFC 3454 section 2, step 1 - Map
# RFC 4013 section 2.1: map non-ASCII space characters to SPACE (U+0020),
# and map commonly-mapped-to-nothing characters to nothing.
in_table_c12 = stringprep.in_table_c12
in_table_b1 = stringprep.in_table_b1
data = "".join(
"\u0020" if in_table_c12(c) else c
for c in data
if not in_table_b1(c)
)

# RFC 3454 section 2, step 2 - Normalize
# RFC 4013 section 2.2: use Unicode NFKC normalization.
data = unicodedata.ucd_3_2_0.normalize("NFKC", data)

if not data:
return data

in_table_d1 = stringprep.in_table_d1
if in_table_d1(data[0]):
if not in_table_d1(data[-1]):
# RFC 3454 section 6, rule 3: if a string contains any RandALCat
# character, the first and last characters MUST be RandALCat.
raise ValueError("SASLprep: failed bidirectional check")
# RFC 3454 section 6, rule 2: if a string contains any RandALCat
# character, it MUST NOT contain any LCat character.
prohibited = prohibited + (stringprep.in_table_d2,)
else:
# Rule 3 (converse): if the first character is not RandALCat,
# no other character may be RandALCat.
prohibited = prohibited + (in_table_d1,)

# RFC 3454 section 2, steps 3 and 4 - Prohibit and check bidi.
for char in data:
if any(in_table(char) for in_table in prohibited):
raise ValueError("SASLprep: failed prohibited character check")

return data
5 changes: 4 additions & 1 deletion Lib/hashlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@
algorithms_available = set(__always_supported)

__all__ = __always_supported + ('new', 'algorithms_guaranteed',
'algorithms_available', 'file_digest')
'algorithms_available', 'file_digest',
'saslprep')


__builtin_constructor_cache = {}
Expand Down Expand Up @@ -197,6 +198,8 @@ def __hash_new(name, *args, **kwargs):
new = __py_new
__get_hash = __get_builtin_constructor

from _saslprep import saslprep

try:
# OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
from _hashlib import pbkdf2_hmac
Expand Down
18 changes: 11 additions & 7 deletions Lib/smtplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import datetime
import sys
from email.base64mime import body_encode as encode_base64
from hashlib import saslprep

__all__ = ["SMTPException", "SMTPNotSupportedError", "SMTPServerDisconnected", "SMTPResponseException",
"SMTPSenderRefused", "SMTPRecipientsRefused", "SMTPDataError",
Expand Down Expand Up @@ -645,7 +646,7 @@ def auth(self, mechanism, authobject, *, initial_response_ok=True):
mechanism = mechanism.upper()
initial_response = (authobject() if initial_response_ok else None)
if initial_response is not None:
response = encode_base64(initial_response.encode('ascii'), eol='')
response = encode_base64(initial_response.encode('utf-8'), eol='')
(code, resp) = self.docmd("AUTH", mechanism + " " + response)
self._auth_challenge_count = 1
else:
Expand All @@ -656,7 +657,7 @@ def auth(self, mechanism, authobject, *, initial_response_ok=True):
self._auth_challenge_count += 1
challenge = base64.decodebytes(resp)
response = encode_base64(
authobject(challenge).encode('ascii'), eol='')
authobject(challenge).encode('utf-8'), eol='')
(code, resp) = self.docmd(response)
# If server keeps sending challenges, something is wrong.
if self._auth_challenge_count > _MAXCHALLENGE:
Expand All @@ -676,22 +677,25 @@ def auth_cram_md5(self, challenge=None):
return None
if not _have_cram_md5_support:
raise SMTPException("CRAM-MD5 is not supported")
password = self.password.encode('ascii')
password = saslprep(self.password, allow_unassigned_code_points=False).encode('utf-8')
authcode = hmac.HMAC(password, challenge, 'md5')
return f"{self.user} {authcode.hexdigest()}"
return f"{saslprep(self.user, allow_unassigned_code_points=False)} {authcode.hexdigest()}"

def auth_plain(self, challenge=None):
""" Authobject to use with PLAIN authentication. Requires self.user and
self.password to be set."""
return "\0%s\0%s" % (self.user, self.password)
return "\0%s\0%s" % (
saslprep(self.user, allow_unassigned_code_points=False),
saslprep(self.password, allow_unassigned_code_points=False),
)

def auth_login(self, challenge=None):
""" Authobject to use with LOGIN authentication. Requires self.user and
self.password to be set."""
if challenge is None or self._auth_challenge_count < 2:
return self.user
return saslprep(self.user, allow_unassigned_code_points=False)
else:
return self.password
return saslprep(self.password, allow_unassigned_code_points=False)

def login(self, user, password, *, initial_response_ok=True):
"""Log in on an SMTP server that requires authentication.
Expand Down
78 changes: 50 additions & 28 deletions Lib/test/test_smtplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from email.message import EmailMessage
from email.base64mime import body_encode as encode_base64
import email.utils
from hashlib import saslprep
import hashlib
import hmac
import socket
Expand Down Expand Up @@ -808,6 +809,13 @@ def testLineTooLong(self):
}

sim_auth = ('Mr.A@somewhere.com', 'somepassword')
sim_auths = {
'Mr.A@somewhere.com': 'somepassword',
# Unicode username and password (Devanagari).
'\u092D\u093E\u0930\u0924@\u092D\u093E\u0930\u0924': '\u092D\u093E\u0930\u0924@',
# Password that SASLprep normalizes: Roman numeral IX (U+2168) -> 'IX'.
'Mr.C@somewhere.com': 'IX',
}
sim_cram_md5_challenge = ('PENCeUxFREJoU0NnbmhNWitOMjNGNn'
'dAZWx3b29kLmlubm9zb2Z0LmNvbT4=')
sim_lists = {'list-1':['Mr.A@somewhere.com','Mrs.C@somewhereesle.com'],
Expand Down Expand Up @@ -897,7 +905,11 @@ def _auth_plain(self, arg=None):
self.push('535 Splitting response {!r} into user and password'
' failed: {}'.format(logpass, e))
return
self._authenticated(user, password == sim_auth[1])
stored = sim_auths.get(user, '')
self._authenticated(
user,
password == saslprep(stored, allow_unassigned_code_points=False),
)

def _auth_login(self, arg=None):
if arg is None:
Expand All @@ -909,7 +921,11 @@ def _auth_login(self, arg=None):
self.push('334 UGFzc3dvcmQ6')
else:
password = self._decode_base64(arg)
self._authenticated(self._auth_login_user, password == sim_auth[1])
stored = sim_auths.get(self._auth_login_user, '')
self._authenticated(
self._auth_login_user,
password == saslprep(stored, allow_unassigned_code_points=False),
)
del self._auth_login_user

def _auth_buggy(self, arg=None):
Expand All @@ -928,8 +944,9 @@ def _auth_cram_md5(self, arg=None):
self.push('535 Splitting response {!r} into user and password '
'failed: {}'.format(logpass, e))
return
pwd = sim_auth[1].encode('ascii')
msg = self._decode_base64(sim_cram_md5_challenge).encode('ascii')
stored = sim_auths.get(user, '')
pwd = saslprep(stored, allow_unassigned_code_points=False).encode('utf-8')
msg = self._decode_base64(sim_cram_md5_challenge).encode('utf-8')
try:
valid_hashed_pass = hmac.HMAC(pwd, msg, 'md5').hexdigest()
except ValueError:
Expand Down Expand Up @@ -1122,21 +1139,36 @@ def testEXPN(self):
self.assertEqual(smtp.expn(u), expected_unknown)
smtp.quit()

def helpAUTH_x(self, feature):
"""Helper: test all sim_auths credentials against the given AUTH feature."""
self.serv.add_feature(feature)
for username, password in sim_auths.items():
with self.subTest(username=username):
smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost',
timeout=support.LOOPBACK_TIMEOUT)
resp = smtp.login(username, password)
self.assertEqual(resp, (235, b'Authentication Succeeded'))
smtp.close()
with self.subTest(username=username, wrong_password=True):
smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost',
timeout=support.LOOPBACK_TIMEOUT)
with self.assertRaises(smtplib.SMTPAuthenticationError):
smtp.login(username, "No" + password)
smtp.close()
with self.subTest(username=username, saslprep_equivalent=True):
# A soft-hyphen (U+00AD) SASLprep-normalizes to nothing,
# so the password with a trailing soft-hyphen should succeed.
smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost',
timeout=support.LOOPBACK_TIMEOUT)
resp = smtp.login(username, password + "\u00AD")
self.assertEqual(resp, (235, b'Authentication Succeeded'))
smtp.close()

def testAUTH_PLAIN(self):
self.serv.add_feature("AUTH PLAIN")
smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost',
timeout=support.LOOPBACK_TIMEOUT)
resp = smtp.login(sim_auth[0], sim_auth[1])
self.assertEqual(resp, (235, b'Authentication Succeeded'))
smtp.close()
self.helpAUTH_x("AUTH PLAIN")

def testAUTH_LOGIN(self):
self.serv.add_feature("AUTH LOGIN")
smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost',
timeout=support.LOOPBACK_TIMEOUT)
resp = smtp.login(sim_auth[0], sim_auth[1])
self.assertEqual(resp, (235, b'Authentication Succeeded'))
smtp.close()
self.helpAUTH_x("AUTH LOGIN")

def testAUTH_LOGIN_initial_response_ok(self):
self.serv.add_feature("AUTH LOGIN")
Expand Down Expand Up @@ -1178,12 +1210,7 @@ def auth_buggy(challenge=None):

@hashlib_helper.requires_hashdigest('md5', openssl=True)
def testAUTH_CRAM_MD5(self):
self.serv.add_feature("AUTH CRAM-MD5")
smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost',
timeout=support.LOOPBACK_TIMEOUT)
resp = smtp.login(sim_auth[0], sim_auth[1])
self.assertEqual(resp, (235, b'Authentication Succeeded'))
smtp.close()
self.helpAUTH_x("AUTH CRAM-MD5")

@hashlib_helper.block_algorithm('md5')
@mock.patch("smtplib._have_cram_md5_support", False)
Expand Down Expand Up @@ -1221,12 +1248,7 @@ def testAUTH_CRAM_MD5_blocked_and_fallback(self):
@hashlib_helper.requires_hashdigest('md5', openssl=True)
def testAUTH_multiple(self):
# Test that multiple authentication methods are tried.
self.serv.add_feature("AUTH BOGUS PLAIN LOGIN CRAM-MD5")
smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost',
timeout=support.LOOPBACK_TIMEOUT)
resp = smtp.login(sim_auth[0], sim_auth[1])
self.assertEqual(resp, (235, b'Authentication Succeeded'))
smtp.close()
self.helpAUTH_x("AUTH BOGUS PLAIN LOGIN CRAM-MD5")

def test_auth_function(self):
supported = {'PLAIN', 'LOGIN'}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Add :func:`hashlib.saslprep`, an implementation of the RFC 4013 SASLprep
string-preparation algorithm, and use it in :mod:`smtplib` to support Unicode
usernames and passwords in SMTP authentication. SASLprep implementation
contributed by MongoDB, Inc.
1 change: 1 addition & 0 deletions Python/stdlib_module_names.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading