Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Lib/test/test_source_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,23 @@ def test_issue7820(self):
# two bytes in common with the UTF-8 BOM
self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20')

def test_truncated_utf8_at_eof(self):
# Regression test for https://issues.oss-fuzz.com/issues/451112368
# Truncated multi-byte UTF-8 sequences at end of input caused an
# out-of-bounds read in Parser/tokenizer/helpers.c:valid_utf8().
truncated = [
b'\xc2', # 2-byte lead, missing 1 continuation
b'\xdf', # 2-byte lead, missing 1 continuation
b'\xe0', # 3-byte lead, missing 2 continuations
b'\xe0\xa0', # 3-byte lead, missing 1 continuation
b'\xf0\x90', # 4-byte lead, missing 2 continuations
b'\xf0\x90\x80', # 4-byte lead, missing 1 continuation
b'\xf3', # 4-byte lead, missing 3 (the oss-fuzz reproducer)
]
for seq in truncated:
with self.subTest(seq=seq):
self.assertRaises(SyntaxError, compile, seq, '<test>', 'exec')

@support.requires_subprocess()
def test_20731(self):
sub = subprocess.Popen([sys.executable,
Expand Down
4 changes: 2 additions & 2 deletions Parser/tokenizer/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,8 @@ valid_utf8(const unsigned char* s)
return 0;
}
length = expected + 1;
for (; expected; expected--)
if (s[expected] < 0x80 || s[expected] >= 0xC0)
for (int i = 1; i <= expected; i++)
if (s[i] < 0x80 || s[i] >= 0xC0)
return 0;
return length;
}
Expand Down
Loading