Lightweight NLP library in pure Python
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

14 lines
344 B

# SPDX-License-Identifier: GPL-3.0-or-later
from typing import Iterable, Iterator
import hashlib
def hash_single(token: str) -> int:
return int.from_bytes(
hashlib.sha256(token.encode("utf-8")).digest()[:6], "big"
)
def hash_list(tokens: Iterable[str]) -> Iterator[int]:
return (hash_single(token) for token in tokens)