Split hash function
This commit is contained in:
parent
08437a2696
commit
b3a43150d8
|
@ -39,13 +39,14 @@ def tokenize(text: str, max_ngram_length: int = 1) -> List[str]:
|
|||
return ngrams
|
||||
|
||||
|
||||
def hash_single(token: str) -> int:
|
||||
return int.from_bytes(
|
||||
hashlib.sha256(token.encode("utf-8")).digest()[:6], "big"
|
||||
)
|
||||
|
||||
|
||||
def hash(tokens: List[str]) -> List[int]:
|
||||
return [
|
||||
int.from_bytes(
|
||||
hashlib.sha256(token.encode("utf-8")).digest()[:6], "big"
|
||||
)
|
||||
for token in tokens
|
||||
]
|
||||
return [hash_single(token) for token in tokens]
|
||||
|
||||
|
||||
def normalize(text: str) -> str:
|
||||
|
|
Loading…
Reference in New Issue
Block a user