Compiler performance improvements
This commit is contained in:
parent
8d42a92848
commit
c754293d69
|
@ -41,38 +41,29 @@ def compile(
|
||||||
|
|
||||||
word_counts: Dict[int, Dict[str, int]] = {}
|
word_counts: Dict[int, Dict[str, int]] = {}
|
||||||
|
|
||||||
names = []
|
names = tuple(categories.keys())
|
||||||
|
|
||||||
for category, text in categories.items():
|
for category, text in categories.items():
|
||||||
if not category in names:
|
|
||||||
names.append(category)
|
|
||||||
|
|
||||||
for word in text:
|
for word in text:
|
||||||
try:
|
if word in word_counts:
|
||||||
counts_for_word = word_counts[word]
|
|
||||||
except KeyError:
|
|
||||||
counts_for_word = {}
|
|
||||||
word_counts[word] = counts_for_word
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
word_counts[word][category] += 1
|
word_counts[word][category] += 1
|
||||||
except KeyError:
|
except KeyError:
|
||||||
word_counts[word][category] = 1
|
word_counts[word][category] = 1
|
||||||
|
else:
|
||||||
|
word_counts[word] = {category: 1}
|
||||||
|
|
||||||
word_counts = {
|
category_lengths = {
|
||||||
word: counts
|
category: len(text) for category, text in categories.items()
|
||||||
for word, counts in word_counts.items()
|
|
||||||
if sum(counts.values()) >= min_count
|
|
||||||
}
|
}
|
||||||
|
|
||||||
word_weights: Dict[int, Dict[str, float]] = {}
|
word_weights: Dict[int, Dict[str, float]] = {
|
||||||
for word, values in word_counts.items():
|
word: {
|
||||||
for category, value in values.items():
|
category: value / category_lengths[category]
|
||||||
try:
|
for category, value in values.items()
|
||||||
word_weights[word][category] = value / len(categories[category])
|
}
|
||||||
except KeyError:
|
for word, values in word_counts.items()
|
||||||
word_weights[word] = {
|
if sum(values.values()) >= min_count
|
||||||
category: value / len(categories[category])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
model: Dict[int, List[int]] = {}
|
model: Dict[int, List[int]] = {}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user