Add normalize()

This commit is contained in:
Samuel Sloniker 2022-11-26 17:17:28 -08:00
parent fc4665bb9e
commit 08437a2696
Signed by: kj7rrv
GPG Key ID: 1BB4029E66285A62
2 changed files with 5 additions and 0 deletions

View File

@ -6,6 +6,7 @@ from gptc.compiler import compile as compile
from gptc.classifier import Classifier as Classifier from gptc.classifier import Classifier as Classifier
from gptc.pack import pack as pack from gptc.pack import pack as pack
from gptc.model import Model as Model, deserialize as deserialize from gptc.model import Model as Model, deserialize as deserialize
from gptc.tokenizer import normalize as normalize
from gptc.exceptions import ( from gptc.exceptions import (
GPTCError as GPTCError, GPTCError as GPTCError,
ModelError as ModelError, ModelError as ModelError,

View File

@ -46,3 +46,7 @@ def hash(tokens: List[str]) -> List[int]:
) )
for token in tokens for token in tokens
] ]
def normalize(text: str) -> str:
return " ".join(tokenize(text, 1))