Closes #14
This commit is contained in:
Samuel Sloniker 2023-04-16 14:49:03 -07:00
parent f38f4ca801
commit 99ad07a876
Signed by: kj7rrv
GPG Key ID: 1BB4029E66285A62

View File

@ -7,7 +7,7 @@ import unicodedata
def tokenize(text: str, max_ngram_length: int = 1) -> List[str]:
text = unicodedata.normalize("NFKD", text).lower()
text = unicodedata.normalize("NFKD", text).casefold()
parts = []
highest_end = 0
for emoji_part in emoji.emoji_list(text):