New model version

This commit is contained in:
Samuel Sloniker 2021-11-07 11:53:40 -08:00
parent 3baafa46fc
commit 3bf623aace
2 changed files with 15 additions and 7 deletions

View File

@ -22,7 +22,7 @@ class Classifier:
except: except:
model_version = 1 model_version = 1
if model_version == 1: if model_version == 2:
self.model = model self.model = model
else: else:
# The model is an unsupported version # The model is an unsupported version
@ -56,16 +56,15 @@ class Classifier:
probs = {} probs = {}
for word in text: for word in text:
try: try:
total = sum(model[word].values()) for category, value in enumerate(model[word]):
for category, value in model[word].items():
try: try:
probs[category] += value / total probs[category] += value
except KeyError: except KeyError:
probs[category] = value / total probs[category] = value
except KeyError: except KeyError:
pass pass
total = sum(probs.values()) total = sum(probs.values())
probs = {category: value/total for category, value in probs.items()} probs = {model['__names__'][category]: value/total for category, value in probs.items()}
return probs return probs
def classify(self, text): def classify(self, text):

View File

@ -27,7 +27,12 @@ def compile(raw_model):
categories_by_count = {} categories_by_count = {}
names = []
for category, text in categories.items(): for category, text in categories.items():
if not category in names:
names.append(category)
categories_by_count[category] = {} categories_by_count[category] = {}
for word in text: for word in text:
try: try:
@ -45,7 +50,11 @@ def compile(raw_model):
model = {} model = {}
for word, weights in word_weights.items(): for word, weights in word_weights.items():
total = sum(weights.values()) total = sum(weights.values())
model[word] = {category: weight/total for category, weight in weights.items()} model[word] = []
for category in names:
model[word].append(weights.get(category, 0)/total)
model['__names__'] = names
model['__version__'] = 2 model['__version__'] = 2
model['__raw__'] = raw_model model['__raw__'] = raw_model