parent
f38f4ca801
commit
99ad07a876
|
@ -7,7 +7,7 @@ import unicodedata
|
|||
|
||||
|
||||
def tokenize(text: str, max_ngram_length: int = 1) -> List[str]:
|
||||
text = unicodedata.normalize("NFKD", text).lower()
|
||||
text = unicodedata.normalize("NFKD", text).casefold()
|
||||
parts = []
|
||||
highest_end = 0
|
||||
for emoji_part in emoji.emoji_list(text):
|
||||
|
|
Loading…
Reference in New Issue
Block a user