Compare commits
3 Commits
e4eb322aa7
...
12f97ae765
Author | SHA1 | Date | |
---|---|---|---|
12f97ae765 | |||
c754293d69 | |||
8d42a92848 |
|
@ -41,39 +41,30 @@ def compile(
|
|||
|
||||
word_counts: Dict[int, Dict[str, int]] = {}
|
||||
|
||||
names = []
|
||||
names = tuple(categories.keys())
|
||||
|
||||
for category, text in categories.items():
|
||||
if not category in names:
|
||||
names.append(category)
|
||||
|
||||
for word in text:
|
||||
try:
|
||||
counts_for_word = word_counts[word]
|
||||
except KeyError:
|
||||
counts_for_word = {}
|
||||
word_counts[word] = counts_for_word
|
||||
if word in word_counts:
|
||||
try:
|
||||
word_counts[word][category] += 1
|
||||
except KeyError:
|
||||
word_counts[word][category] = 1
|
||||
else:
|
||||
word_counts[word] = {category: 1}
|
||||
|
||||
try:
|
||||
word_counts[word][category] += 1
|
||||
except KeyError:
|
||||
word_counts[word][category] = 1
|
||||
|
||||
word_counts = {
|
||||
word: counts
|
||||
for word, counts in word_counts.items()
|
||||
if sum(counts.values()) >= min_count
|
||||
category_lengths = {
|
||||
category: len(text) for category, text in categories.items()
|
||||
}
|
||||
|
||||
word_weights: Dict[int, Dict[str, float]] = {}
|
||||
for word, values in word_counts.items():
|
||||
for category, value in values.items():
|
||||
try:
|
||||
word_weights[word][category] = value / len(categories[category])
|
||||
except KeyError:
|
||||
word_weights[word] = {
|
||||
category: value / len(categories[category])
|
||||
}
|
||||
word_weights: Dict[int, Dict[str, float]] = {
|
||||
word: {
|
||||
category: value / category_lengths[category]
|
||||
for category, value in values.items()
|
||||
}
|
||||
for word, values in word_counts.items()
|
||||
if sum(values.values()) >= min_count
|
||||
}
|
||||
|
||||
model: Dict[int, List[int]] = {}
|
||||
for word, weights in word_weights.items():
|
||||
|
|
|
@ -62,7 +62,7 @@ class Model:
|
|||
}
|
||||
return probs
|
||||
|
||||
def get(self, token):
|
||||
def get(self, token: str) -> Dict[str, float]:
|
||||
try:
|
||||
weights = self.weights[
|
||||
gptc.tokenizer.hash_single(gptc.tokenizer.normalize(token))
|
||||
|
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "gptc"
|
||||
version = "3.1.0"
|
||||
version = "3.1.1"
|
||||
description = "General-purpose text classifier"
|
||||
readme = "README.md"
|
||||
authors = [{ name = "Samuel Sloniker", email = "sam@kj7rrv.com"}]
|
||||
|
|
Loading…
Reference in New Issue
Block a user