From 00ae3e3145090fa28407dfa5346b6a24e5500020 Mon Sep 17 00:00:00 2001 From: elenril Date: Thu, 30 Apr 2026 18:24:23 +0100 Subject: [PATCH] bpo-39100: _header_value_parser: do not treat a Group as invalid-mailbox (GH-24872) When an address in an address-list has garbage at the end, the code will currently: 1. change the mailbox in the last parsed address into invalid-mailbox by overriding its token_type; 2. wrap the trailing garbage into another invalid-mailbox and append it to the last parsed address. However, that does not take into account that an address may also contain a Group instead of a single mailbox. In that case, overwriting token_type leads to undesirable results, e.g. parsing an email with the following 'To' header: unlisted-recipients:; (no To-header on input) raises an AttributeError from trying to treat the Group as a Mailbox. Moreover it is questionable whether the previously parsed mailbox should be treated as invalid in addition to the trailing garbage. Address both of the above by wrapping the trailing garbage in a new Address with a single invalid-mailbox, and append it to the AddressList directly. Changes the results of the test_get_address_list_mailboxes_invalid_addresses test, where the address list is now parsed into 4 mailboxes instead of 3 (all but the first one are invalid). (cherry picked from commit b413bc7a1f0946f734d9660239b4e2e8ddc48522) Co-authored-by: elenril --- Lib/email/_header_value_parser.py | 8 +++----- Lib/test/test_email/test__header_value_parser.py | 16 +++++++++++++--- ...2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst | 2 ++ 3 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 03fedd99539ed3..35e71e04c49358 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2059,12 +2059,10 @@ def get_address_list(value): address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value and value[0] != ',': - # Crap after address; treat it as an invalid mailbox. - # The mailbox info will still be available. - mailbox = address_list[-1][0] - mailbox.token_type = 'invalid-mailbox' + # Crap after address: add it to the address list + # as an invalid mailbox token, value = get_invalid_mailbox(value, ',') - mailbox.extend(token) + address_list.append(Address([token])) address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value: # Must be a , at this point. diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 7535497af792b3..f72e32b063483e 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2617,7 +2617,7 @@ def test_get_address_list_mailboxes_invalid_addresses(self): '') self.assertEqual(address_list.token_type, 'address-list') self.assertEqual(len(address_list.mailboxes), 1) - self.assertEqual(len(address_list.all_mailboxes), 3) + self.assertEqual(len(address_list.all_mailboxes), 4) self.assertEqual([str(x) for x in address_list.all_mailboxes], [str(x) for x in address_list.addresses]) self.assertEqual(address_list.mailboxes[0].domain, 'example.com') @@ -2626,11 +2626,13 @@ def test_get_address_list_mailboxes_invalid_addresses(self): self.assertEqual(address_list.addresses[1].token_type, 'address') self.assertEqual(len(address_list.addresses[0].mailboxes), 1) self.assertEqual(len(address_list.addresses[1].mailboxes), 0) - self.assertEqual(len(address_list.addresses[1].mailboxes), 0) + self.assertEqual(len(address_list.addresses[2].mailboxes), 0) + self.assertEqual(len(address_list.addresses[3].mailboxes), 0) self.assertEqual( address_list.addresses[1].all_mailboxes[0].local_part, 'Foo x') + self.assertEqual(address_list.addresses[2].all_mailboxes[0].value, '[]') self.assertEqual( - address_list.addresses[2].all_mailboxes[0].display_name, + address_list.addresses[3].all_mailboxes[0].display_name, "Nobody Is. Special") def test_get_address_list_group_empty(self): @@ -2695,6 +2697,14 @@ def test_get_address_list_group_and_mailboxes(self): self.assertEqual(str(address_list.addresses[1]), str(address_list.mailboxes[2])) + def test_get_address_list_trailing_garbage(self): + address_list = self._test_get_x(parser.get_address_list, + 'unlisted-recipients:; (no To-header on input)', + 'unlisted-recipients:; (no To-header on input)', + 'unlisted-recipients:; ', + [errors.InvalidHeaderDefect]*2 + [errors.ObsoleteHeaderDefect], + '') + def test_invalid_content_disposition(self): content_disp = self._test_parse_x( parser.parse_content_disposition_header, diff --git a/Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst b/Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst new file mode 100644 index 00000000000000..cf2ae770bd1940 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst @@ -0,0 +1,2 @@ +:mod:`email`: improve handling trailing garbage in address lists to avoid throwing +AttributeError in certain edge cases