Non-working type checks
This commit is contained in:
parent
a207e281e7
commit
48639f5d8d
|
@ -1,9 +1,12 @@
|
||||||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||||
|
|
||||||
import gptc.tokenizer
|
import gptc.tokenizer
|
||||||
|
from typing import Iterable, Mapping, List, Dict, Union
|
||||||
|
|
||||||
|
|
||||||
def compile(raw_model, max_ngram_length=1):
|
def compile(
|
||||||
|
raw_model: Iterable[Mapping[str, str]], max_ngram_length: int = 1
|
||||||
|
) -> Dict[str, Union[int, List[Union[str, int]]]]:
|
||||||
"""Compile a raw model.
|
"""Compile a raw model.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
|
@ -21,7 +24,7 @@ def compile(raw_model, max_ngram_length=1):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
categories = {}
|
categories: Dict[str, str] = {}
|
||||||
|
|
||||||
for portion in raw_model:
|
for portion in raw_model:
|
||||||
text = gptc.tokenizer.tokenize(portion["text"], max_ngram_length)
|
text = gptc.tokenizer.tokenize(portion["text"], max_ngram_length)
|
||||||
|
@ -31,7 +34,7 @@ def compile(raw_model, max_ngram_length=1):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
categories[category] = text
|
categories[category] = text
|
||||||
|
|
||||||
categories_by_count = {}
|
categories_by_count: Dict[str, Dict[str, float]] = {}
|
||||||
|
|
||||||
names = []
|
names = []
|
||||||
|
|
||||||
|
@ -49,7 +52,7 @@ def compile(raw_model, max_ngram_length=1):
|
||||||
categories_by_count[category][word] = 1 / len(
|
categories_by_count[category][word] = 1 / len(
|
||||||
categories[category]
|
categories[category]
|
||||||
)
|
)
|
||||||
word_weights = {}
|
word_weights: Dict[str, Dict[str, float]] = {}
|
||||||
for category, words in categories_by_count.items():
|
for category, words in categories_by_count.items():
|
||||||
for word, value in words.items():
|
for word, value in words.items():
|
||||||
try:
|
try:
|
||||||
|
@ -57,7 +60,7 @@ def compile(raw_model, max_ngram_length=1):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
word_weights[word] = {category: value}
|
word_weights[word] = {category: value}
|
||||||
|
|
||||||
model = {}
|
model: Dict[str, Union[int, List[Union[str, int]]]] = {}
|
||||||
for word, weights in word_weights.items():
|
for word, weights in word_weights.items():
|
||||||
total = sum(weights.values())
|
total = sum(weights.values())
|
||||||
model[word] = []
|
model[word] = []
|
||||||
|
|
Loading…
Reference in New Issue
Block a user