diff --git a/.gitignore b/.gitignore index d1f3a59..a81c8ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,138 @@ -__pycache__ -*.swp -venv +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index af34275..0000000 --- a/MANIFEST +++ /dev/null @@ -1,5 +0,0 @@ -# file GENERATED by distutils, do NOT edit -README -setup.py -gptc/__init__.py -gptc/__main__.py diff --git a/build/lib/gptc/__init__.py b/build/lib/gptc/__init__.py deleted file mode 100644 index 061c263..0000000 --- a/build/lib/gptc/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -import sys -import spacy - -nlp = spacy.load('en_core_web_sm') - -def listify(text): - return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'] - - -def compile(raw_model): - model = {} - - for portion in raw_model: - text = listify(portion['text']) - category = portion['category'] - for word in text: - try: - model[category].append(word) - except: - model[category] = [word] - model[category].sort() - all_models = [ { 'text': model, 'stopword': i/10} for i in range(0, 21) ] - for test_model in all_models: - correct = 0 - classifier = Classifier(test_model) - for text in raw_model: - if classifier.check(text['text']) == text['category']: - correct += 1 - test_model['correct'] = correct - print('tested a model') - best = all_models[0] - for test_model in all_models: - if test_model['correct'] > best['correct']: - best = test_model - del best['correct'] - return best - return {'text': model} - - -class Classifier: - def __init__(self, model, supress_uncompiled_model_warning=False): - if type(model['text']) == dict: - self.model = model - else: - self.model = compile(model) - if not supress_uncompiled_model_warning: - print('WARNING: model was not compiled', file=sys.stderr) - print('In development, this is OK, but precompiling the model is preferred for production use.', file=sys.stderr) - self.warn = supress_uncompiled_model_warning - - def check(self, text): - model = self.model - stopword_value = 0.5 - try: - stopword_value = model['stopword'] - except: - pass - stopwords = spacy.lang.en.stop_words.STOP_WORDS - model = model['text'] - text = listify(text) - probs = {} - for word in text: - for category in model.keys(): - for catword in model[category]: - if word == catword: - weight = ( stopword_value if word in stopwords else 1 ) / len(model[category]) - try: - probs[category] += weight - except: - probs[category] = weight - most_likely = ['unknown', 0] - for category in probs.keys(): - if probs[category] > most_likely[1]: - most_likely = [category, probs[category]] - return most_likely[0] diff --git a/build/lib/gptc/__main__.py b/build/lib/gptc/__main__.py deleted file mode 100644 index ac04aa7..0000000 --- a/build/lib/gptc/__main__.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json - -parser = argparse.ArgumentParser(description="General Purpose Text Classifier") -parser.add_argument('model', help='model to use') -parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile') -args = parser.parse_args() - -import gptc # PEP 8 violation, but don't fix it -# Way better for performance of argparse checking - -with open(args.model, 'r') as f: - raw_model = json.load(f) -if args.compile: - with open(args.compile, 'w+') as f: - json.dump(gptc.compile(raw_model), f) -else: - classifier = gptc.Classifier(raw_model) - if sys.stdin.isatty(): - text = input('Text to analyse: ') - else: - text = sys.stdin.read() - print(classifier.check(text)) diff --git a/dist/gptc-0.0.0.tar.gz b/dist/gptc-0.0.0.tar.gz deleted file mode 100644 index db39204..0000000 Binary files a/dist/gptc-0.0.0.tar.gz and /dev/null differ diff --git a/dist/gptc-0.0.1.tar.gz b/dist/gptc-0.0.1.tar.gz deleted file mode 100644 index 926e051..0000000 Binary files a/dist/gptc-0.0.1.tar.gz and /dev/null differ