From 1aa8bbe62f27b564cf15e2aad591c62744354a4e Mon Sep 17 00:00:00 2001 From: Ivan Savin Date: Wed, 17 Apr 2024 14:14:22 +0400 Subject: [PATCH] bpo-40944: Fix IndexError when parse emails with truncated Message-ID, address, routes, etc (GH-20790) Co-authored-by: Serhiy Storchaka --- Lib/email/_header_value_parser.py | 15 ++++--- .../test_email/test__header_value_parser.py | 40 +++++++++++++++++++ .../2020-06-10-19-24-17.bpo-40943.vjiiN_.rst | 1 + 3 files changed, 51 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-06-10-19-24-17.bpo-40943.vjiiN_.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 7755e629500a8f..d1b4c7df4f445f 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1213,7 +1213,7 @@ def get_bare_quoted_string(value): value is the text between the quote marks, with whitespace preserved and quoted pairs decoded. """ - if value[0] != '"': + if not value or value[0] != '"': raise errors.HeaderParseError( "expected '\"' but found '{}'".format(value)) bare_quoted_string = BareQuotedString() @@ -1454,7 +1454,7 @@ def get_local_part(value): """ local_part = LocalPart() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1613,7 +1613,7 @@ def get_domain(value): """ domain = Domain() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1689,6 +1689,8 @@ def get_obs_route(value): if value[0] in CFWS_LEADER: token, value = get_cfws(value) obs_route.append(token) + if not value: + break if value[0] == '@': obs_route.append(RouteComponentMarker) token, value = get_domain(value[1:]) @@ -1707,7 +1709,7 @@ def get_angle_addr(value): """ angle_addr = AngleAddr() - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) angle_addr.append(token) if not value or value[0] != '<': @@ -1717,7 +1719,7 @@ def get_angle_addr(value): value = value[1:] # Although it is not legal per RFC5322, SMTP uses '<>' in certain # circumstances. - if value[0] == '>': + if value and value[0] == '>': angle_addr.append(ValueTerminal('>', 'angle-addr-end')) angle_addr.defects.append(errors.InvalidHeaderDefect( "null addr-spec in angle-addr")) @@ -1769,6 +1771,9 @@ def get_name_addr(value): name_addr = NameAddr() # Both the optional display name and the angle-addr can start with cfws. leader = None + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(value)) if value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 39ed4ee2a38159..56a1e3a3de5aa2 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -801,6 +801,10 @@ def test_get_quoted_string_header_ends_in_qcontent(self): self.assertEqual(qs.content, 'bob') self.assertEqual(qs.quoted_value, ' "bob"') + def test_get_quoted_string_cfws_only_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_quoted_string(' (foo) ') + def test_get_quoted_string_no_quoted_string(self): with self.assertRaises(errors.HeaderParseError): parser.get_quoted_string(' (ab) xyz') @@ -1135,6 +1139,10 @@ def test_get_local_part_complex_obsolete_invalid(self): '@python.org') self.assertEqual(local_part.local_part, 'Fred.A.Johnson and dogs') + def test_get_local_part_empty_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_local_part('') + def test_get_local_part_no_part_raises(self): with self.assertRaises(errors.HeaderParseError): parser.get_local_part(' (foo) ') @@ -1387,6 +1395,10 @@ def test_get_domain_obsolete(self): '') self.assertEqual(domain.domain, 'example.com') + def test_get_domain_empty_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_domain("") + def test_get_domain_no_non_cfws_raises(self): with self.assertRaises(errors.HeaderParseError): parser.get_domain(" (foo)\t") @@ -1512,6 +1524,10 @@ def test_get_obs_route_no_route_before_end_raises(self): with self.assertRaises(errors.HeaderParseError): parser.get_obs_route('(foo) @example.com,') + def test_get_obs_route_no_route_before_end_raises2(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_obs_route('(foo) @example.com, (foo) ') + def test_get_obs_route_no_route_before_special_raises(self): with self.assertRaises(errors.HeaderParseError): parser.get_obs_route('(foo) [abc],') @@ -1520,6 +1536,14 @@ def test_get_obs_route_no_route_before_special_raises2(self): with self.assertRaises(errors.HeaderParseError): parser.get_obs_route('(foo) @example.com [abc],') + def test_get_obs_route_no_domain_after_at_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_obs_route('@') + + def test_get_obs_route_no_domain_after_at_raises2(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_obs_route('@example.com, @') + # get_angle_addr def test_get_angle_addr_simple(self): @@ -1646,6 +1670,14 @@ def test_get_angle_addr_ends_at_special(self): self.assertIsNone(angle_addr.route) self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + def test_get_angle_addr_empty_raise(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_angle_addr('') + + def test_get_angle_addr_left_angle_only_raise(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_angle_addr('<') + def test_get_angle_addr_no_angle_raise(self): with self.assertRaises(errors.HeaderParseError): parser.get_angle_addr('(foo) ') @@ -1857,6 +1889,10 @@ def test_get_name_addr_ends_at_special(self): self.assertIsNone(name_addr.route) self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + def test_get_name_addr_empty_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_name_addr('') + def test_get_name_addr_no_content_raises(self): with self.assertRaises(errors.HeaderParseError): parser.get_name_addr(' (foo) ') @@ -2732,6 +2768,10 @@ def test_get_msg_id_empty_id_right(self): with self.assertRaises(errors.HeaderParseError): parser.get_msg_id("") + def test_get_msg_id_no_id_right(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_msg_id("