parent
f38f4ca801
commit
99ad07a876
|
@ -7,7 +7,7 @@ import unicodedata
|
||||||
|
|
||||||
|
|
||||||
def tokenize(text: str, max_ngram_length: int = 1) -> List[str]:
|
def tokenize(text: str, max_ngram_length: int = 1) -> List[str]:
|
||||||
text = unicodedata.normalize("NFKD", text).lower()
|
text = unicodedata.normalize("NFKD", text).casefold()
|
||||||
parts = []
|
parts = []
|
||||||
highest_end = 0
|
highest_end = 0
|
||||||
for emoji_part in emoji.emoji_list(text):
|
for emoji_part in emoji.emoji_list(text):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user