Non-working type checks

This commit is contained in:
Samuel Sloniker 2022-07-17 16:51:19 -07:00
parent a207e281e7
commit 48639f5d8d

View File

@ -1,9 +1,12 @@
# SPDX-License-Identifier: LGPL-3.0-or-later # SPDX-License-Identifier: LGPL-3.0-or-later
import gptc.tokenizer import gptc.tokenizer
from typing import Iterable, Mapping, List, Dict, Union
def compile(raw_model, max_ngram_length=1): def compile(
raw_model: Iterable[Mapping[str, str]], max_ngram_length: int = 1
) -> Dict[str, Union[int, List[Union[str, int]]]]:
"""Compile a raw model. """Compile a raw model.
Parameters Parameters
@ -21,7 +24,7 @@ def compile(raw_model, max_ngram_length=1):
""" """
categories = {} categories: Dict[str, str] = {}
for portion in raw_model: for portion in raw_model:
text = gptc.tokenizer.tokenize(portion["text"], max_ngram_length) text = gptc.tokenizer.tokenize(portion["text"], max_ngram_length)
@ -31,7 +34,7 @@ def compile(raw_model, max_ngram_length=1):
except KeyError: except KeyError:
categories[category] = text categories[category] = text
categories_by_count = {} categories_by_count: Dict[str, Dict[str, float]] = {}
names = [] names = []
@ -49,7 +52,7 @@ def compile(raw_model, max_ngram_length=1):
categories_by_count[category][word] = 1 / len( categories_by_count[category][word] = 1 / len(
categories[category] categories[category]
) )
word_weights = {} word_weights: Dict[str, Dict[str, float]] = {}
for category, words in categories_by_count.items(): for category, words in categories_by_count.items():
for word, value in words.items(): for word, value in words.items():
try: try:
@ -57,7 +60,7 @@ def compile(raw_model, max_ngram_length=1):
except KeyError: except KeyError:
word_weights[word] = {category: value} word_weights[word] = {category: value}
model = {} model: Dict[str, Union[int, List[Union[str, int]]]] = {}
for word, weights in word_weights.items(): for word, weights in word_weights.items():
total = sum(weights.values()) total = sum(weights.values())
model[word] = [] model[word] = []