Use good gitignore
This commit is contained in:
parent
15bc52e9e6
commit
d418da9b86
141
.gitignore
vendored
141
.gitignore
vendored
|
@ -1,3 +1,138 @@
|
|||
__pycache__
|
||||
*.swp
|
||||
venv
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
|
5
MANIFEST
5
MANIFEST
|
@ -1,5 +0,0 @@
|
|||
# file GENERATED by distutils, do NOT edit
|
||||
README
|
||||
setup.py
|
||||
gptc/__init__.py
|
||||
gptc/__main__.py
|
|
@ -1,76 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load('en_core_web_sm')
|
||||
|
||||
def listify(text):
|
||||
return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']
|
||||
|
||||
|
||||
def compile(raw_model):
|
||||
model = {}
|
||||
|
||||
for portion in raw_model:
|
||||
text = listify(portion['text'])
|
||||
category = portion['category']
|
||||
for word in text:
|
||||
try:
|
||||
model[category].append(word)
|
||||
except:
|
||||
model[category] = [word]
|
||||
model[category].sort()
|
||||
all_models = [ { 'text': model, 'stopword': i/10} for i in range(0, 21) ]
|
||||
for test_model in all_models:
|
||||
correct = 0
|
||||
classifier = Classifier(test_model)
|
||||
for text in raw_model:
|
||||
if classifier.check(text['text']) == text['category']:
|
||||
correct += 1
|
||||
test_model['correct'] = correct
|
||||
print('tested a model')
|
||||
best = all_models[0]
|
||||
for test_model in all_models:
|
||||
if test_model['correct'] > best['correct']:
|
||||
best = test_model
|
||||
del best['correct']
|
||||
return best
|
||||
return {'text': model}
|
||||
|
||||
|
||||
class Classifier:
|
||||
def __init__(self, model, supress_uncompiled_model_warning=False):
|
||||
if type(model['text']) == dict:
|
||||
self.model = model
|
||||
else:
|
||||
self.model = compile(model)
|
||||
if not supress_uncompiled_model_warning:
|
||||
print('WARNING: model was not compiled', file=sys.stderr)
|
||||
print('In development, this is OK, but precompiling the model is preferred for production use.', file=sys.stderr)
|
||||
self.warn = supress_uncompiled_model_warning
|
||||
|
||||
def check(self, text):
|
||||
model = self.model
|
||||
stopword_value = 0.5
|
||||
try:
|
||||
stopword_value = model['stopword']
|
||||
except:
|
||||
pass
|
||||
stopwords = spacy.lang.en.stop_words.STOP_WORDS
|
||||
model = model['text']
|
||||
text = listify(text)
|
||||
probs = {}
|
||||
for word in text:
|
||||
for category in model.keys():
|
||||
for catword in model[category]:
|
||||
if word == catword:
|
||||
weight = ( stopword_value if word in stopwords else 1 ) / len(model[category])
|
||||
try:
|
||||
probs[category] += weight
|
||||
except:
|
||||
probs[category] = weight
|
||||
most_likely = ['unknown', 0]
|
||||
for category in probs.keys():
|
||||
if probs[category] > most_likely[1]:
|
||||
most_likely = [category, probs[category]]
|
||||
return most_likely[0]
|
|
@ -1,24 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
|
||||
parser.add_argument('model', help='model to use')
|
||||
parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
|
||||
args = parser.parse_args()
|
||||
|
||||
import gptc # PEP 8 violation, but don't fix it
|
||||
# Way better for performance of argparse checking
|
||||
|
||||
with open(args.model, 'r') as f:
|
||||
raw_model = json.load(f)
|
||||
if args.compile:
|
||||
with open(args.compile, 'w+') as f:
|
||||
json.dump(gptc.compile(raw_model), f)
|
||||
else:
|
||||
classifier = gptc.Classifier(raw_model)
|
||||
if sys.stdin.isatty():
|
||||
text = input('Text to analyse: ')
|
||||
else:
|
||||
text = sys.stdin.read()
|
||||
print(classifier.check(text))
|
BIN
dist/gptc-0.0.0.tar.gz
vendored
BIN
dist/gptc-0.0.0.tar.gz
vendored
Binary file not shown.
BIN
dist/gptc-0.0.1.tar.gz
vendored
BIN
dist/gptc-0.0.1.tar.gz
vendored
Binary file not shown.
Loading…
Reference in New Issue
Block a user