Use good gitignore
This commit is contained in:
parent
15bc52e9e6
commit
d418da9b86
141
.gitignore
vendored
141
.gitignore
vendored
|
@ -1,3 +1,138 @@
|
||||||
__pycache__
|
# Byte-compiled / optimized / DLL files
|
||||||
*.swp
|
__pycache__/
|
||||||
venv
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
5
MANIFEST
5
MANIFEST
|
@ -1,5 +0,0 @@
|
||||||
# file GENERATED by distutils, do NOT edit
|
|
||||||
README
|
|
||||||
setup.py
|
|
||||||
gptc/__init__.py
|
|
||||||
gptc/__main__.py
|
|
|
@ -1,76 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
import sys
|
|
||||||
import spacy
|
|
||||||
|
|
||||||
nlp = spacy.load('en_core_web_sm')
|
|
||||||
|
|
||||||
def listify(text):
|
|
||||||
return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']
|
|
||||||
|
|
||||||
|
|
||||||
def compile(raw_model):
|
|
||||||
model = {}
|
|
||||||
|
|
||||||
for portion in raw_model:
|
|
||||||
text = listify(portion['text'])
|
|
||||||
category = portion['category']
|
|
||||||
for word in text:
|
|
||||||
try:
|
|
||||||
model[category].append(word)
|
|
||||||
except:
|
|
||||||
model[category] = [word]
|
|
||||||
model[category].sort()
|
|
||||||
all_models = [ { 'text': model, 'stopword': i/10} for i in range(0, 21) ]
|
|
||||||
for test_model in all_models:
|
|
||||||
correct = 0
|
|
||||||
classifier = Classifier(test_model)
|
|
||||||
for text in raw_model:
|
|
||||||
if classifier.check(text['text']) == text['category']:
|
|
||||||
correct += 1
|
|
||||||
test_model['correct'] = correct
|
|
||||||
print('tested a model')
|
|
||||||
best = all_models[0]
|
|
||||||
for test_model in all_models:
|
|
||||||
if test_model['correct'] > best['correct']:
|
|
||||||
best = test_model
|
|
||||||
del best['correct']
|
|
||||||
return best
|
|
||||||
return {'text': model}
|
|
||||||
|
|
||||||
|
|
||||||
class Classifier:
|
|
||||||
def __init__(self, model, supress_uncompiled_model_warning=False):
|
|
||||||
if type(model['text']) == dict:
|
|
||||||
self.model = model
|
|
||||||
else:
|
|
||||||
self.model = compile(model)
|
|
||||||
if not supress_uncompiled_model_warning:
|
|
||||||
print('WARNING: model was not compiled', file=sys.stderr)
|
|
||||||
print('In development, this is OK, but precompiling the model is preferred for production use.', file=sys.stderr)
|
|
||||||
self.warn = supress_uncompiled_model_warning
|
|
||||||
|
|
||||||
def check(self, text):
|
|
||||||
model = self.model
|
|
||||||
stopword_value = 0.5
|
|
||||||
try:
|
|
||||||
stopword_value = model['stopword']
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
stopwords = spacy.lang.en.stop_words.STOP_WORDS
|
|
||||||
model = model['text']
|
|
||||||
text = listify(text)
|
|
||||||
probs = {}
|
|
||||||
for word in text:
|
|
||||||
for category in model.keys():
|
|
||||||
for catword in model[category]:
|
|
||||||
if word == catword:
|
|
||||||
weight = ( stopword_value if word in stopwords else 1 ) / len(model[category])
|
|
||||||
try:
|
|
||||||
probs[category] += weight
|
|
||||||
except:
|
|
||||||
probs[category] = weight
|
|
||||||
most_likely = ['unknown', 0]
|
|
||||||
for category in probs.keys():
|
|
||||||
if probs[category] > most_likely[1]:
|
|
||||||
most_likely = [category, probs[category]]
|
|
||||||
return most_likely[0]
|
|
|
@ -1,24 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
|
|
||||||
parser.add_argument('model', help='model to use')
|
|
||||||
parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
import gptc # PEP 8 violation, but don't fix it
|
|
||||||
# Way better for performance of argparse checking
|
|
||||||
|
|
||||||
with open(args.model, 'r') as f:
|
|
||||||
raw_model = json.load(f)
|
|
||||||
if args.compile:
|
|
||||||
with open(args.compile, 'w+') as f:
|
|
||||||
json.dump(gptc.compile(raw_model), f)
|
|
||||||
else:
|
|
||||||
classifier = gptc.Classifier(raw_model)
|
|
||||||
if sys.stdin.isatty():
|
|
||||||
text = input('Text to analyse: ')
|
|
||||||
else:
|
|
||||||
text = sys.stdin.read()
|
|
||||||
print(classifier.check(text))
|
|
BIN
dist/gptc-0.0.0.tar.gz
vendored
BIN
dist/gptc-0.0.0.tar.gz
vendored
Binary file not shown.
BIN
dist/gptc-0.0.1.tar.gz
vendored
BIN
dist/gptc-0.0.1.tar.gz
vendored
Binary file not shown.
Loading…
Reference in New Issue
Block a user