Lightweight NLP library in pure Python - currently implements a text classifier
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.2 KiB

# SPDX-License-Identifier: GPL-3.0-or-later
import timeit
import gptc
import json
import sys
max_ngram_length = 10
compile_iterations = 100
classify_iterations = 10000
with open("models/raw.json") as f:
raw_model = json.load(f)
with open("models/benchmark_text.txt") as f:
text = f.read()
print("Benchmarking GPTC on Python", sys.version)
print("Maximum ngram length:", max_ngram_length)
print(
"Average compilation time over",
compile_iterations,
"iterations:",
round(
1000000
* timeit.timeit(
"gptc.Model.compile(raw_model, max_ngram_length)",
number=compile_iterations,
globals=globals(),
)
/ compile_iterations
),
"microseconds",
)
classifier = gptc.Classifier(
gptc.compile(raw_model, max_ngram_length), max_ngram_length
)
print(
"Average classification time over",
classify_iterations,
"iterations:",
round(
1000000
* timeit.timeit(
"classifier.classify(text)",
number=classify_iterations,
globals=globals(),
)
/ classify_iterations
),
"microseconds",
)
print("--- benchmark complete ---")