Compiler performance improvements

This commit is contained in:
Samuel Sloniker 2022-11-27 14:32:44 -08:00
parent 8d42a92848
commit c754293d69
Signed by: kj7rrv
GPG Key ID: 1BB4029E66285A62

View File

@ -41,39 +41,30 @@ def compile(
word_counts: Dict[int, Dict[str, int]] = {} word_counts: Dict[int, Dict[str, int]] = {}
names = [] names = tuple(categories.keys())
for category, text in categories.items(): for category, text in categories.items():
if not category in names:
names.append(category)
for word in text: for word in text:
try: if word in word_counts:
counts_for_word = word_counts[word] try:
except KeyError: word_counts[word][category] += 1
counts_for_word = {} except KeyError:
word_counts[word] = counts_for_word word_counts[word][category] = 1
else:
word_counts[word] = {category: 1}
try: category_lengths = {
word_counts[word][category] += 1 category: len(text) for category, text in categories.items()
except KeyError:
word_counts[word][category] = 1
word_counts = {
word: counts
for word, counts in word_counts.items()
if sum(counts.values()) >= min_count
} }
word_weights: Dict[int, Dict[str, float]] = {} word_weights: Dict[int, Dict[str, float]] = {
for word, values in word_counts.items(): word: {
for category, value in values.items(): category: value / category_lengths[category]
try: for category, value in values.items()
word_weights[word][category] = value / len(categories[category]) }
except KeyError: for word, values in word_counts.items()
word_weights[word] = { if sum(values.values()) >= min_count
category: value / len(categories[category]) }
}
model: Dict[int, List[int]] = {} model: Dict[int, List[int]] = {}
for word, weights in word_weights.items(): for word, weights in word_weights.items():