From 7d1cbcaee0b17b9e709ee100d76fe86d135e8c33 Mon Sep 17 00:00:00 2001 From: Samuel Sloniker Date: Tue, 22 Nov 2022 11:44:13 -0800 Subject: [PATCH] Make sure text is lowercase --- gptc/tokenizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gptc/tokenizer.py b/gptc/tokenizer.py index fe09223..bf687a2 100644 --- a/gptc/tokenizer.py +++ b/gptc/tokenizer.py @@ -17,6 +17,7 @@ def tokenize( converted_text: Union[str, List[str]] = text.lower() if has_emoji and use_emoji: + text = text.lower() parts = [] highest_end = 0 for emoji_part in emoji.emoji_list(text):