Browse Source

Use good gitignore

master
scoopgracie 4 years ago
parent
commit
d418da9b86
  1. 141
      .gitignore
  2. 5
      MANIFEST
  3. 76
      build/lib/gptc/__init__.py
  4. 24
      build/lib/gptc/__main__.py
  5. BIN
      dist/gptc-0.0.0.tar.gz
  6. BIN
      dist/gptc-0.0.1.tar.gz

141
.gitignore vendored

@ -1,3 +1,138 @@
__pycache__
*.swp
venv
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/

5
MANIFEST

@ -1,5 +0,0 @@
# file GENERATED by distutils, do NOT edit
README
setup.py
gptc/__init__.py
gptc/__main__.py

76
build/lib/gptc/__init__.py

@ -1,76 +0,0 @@
#!/usr/bin/env python3
import sys
import spacy
nlp = spacy.load('en_core_web_sm')
def listify(text):
return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']
def compile(raw_model):
model = {}
for portion in raw_model:
text = listify(portion['text'])
category = portion['category']
for word in text:
try:
model[category].append(word)
except:
model[category] = [word]
model[category].sort()
all_models = [ { 'text': model, 'stopword': i/10} for i in range(0, 21) ]
for test_model in all_models:
correct = 0
classifier = Classifier(test_model)
for text in raw_model:
if classifier.check(text['text']) == text['category']:
correct += 1
test_model['correct'] = correct
print('tested a model')
best = all_models[0]
for test_model in all_models:
if test_model['correct'] > best['correct']:
best = test_model
del best['correct']
return best
return {'text': model}
class Classifier:
def __init__(self, model, supress_uncompiled_model_warning=False):
if type(model['text']) == dict:
self.model = model
else:
self.model = compile(model)
if not supress_uncompiled_model_warning:
print('WARNING: model was not compiled', file=sys.stderr)
print('In development, this is OK, but precompiling the model is preferred for production use.', file=sys.stderr)
self.warn = supress_uncompiled_model_warning
def check(self, text):
model = self.model
stopword_value = 0.5
try:
stopword_value = model['stopword']
except:
pass
stopwords = spacy.lang.en.stop_words.STOP_WORDS
model = model['text']
text = listify(text)
probs = {}
for word in text:
for category in model.keys():
for catword in model[category]:
if word == catword:
weight = ( stopword_value if word in stopwords else 1 ) / len(model[category])
try:
probs[category] += weight
except:
probs[category] = weight
most_likely = ['unknown', 0]
for category in probs.keys():
if probs[category] > most_likely[1]:
most_likely = [category, probs[category]]
return most_likely[0]

24
build/lib/gptc/__main__.py

@ -1,24 +0,0 @@
#!/usr/bin/env python3
import argparse
import json
parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
parser.add_argument('model', help='model to use')
parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
args = parser.parse_args()
import gptc # PEP 8 violation, but don't fix it
# Way better for performance of argparse checking
with open(args.model, 'r') as f:
raw_model = json.load(f)
if args.compile:
with open(args.compile, 'w+') as f:
json.dump(gptc.compile(raw_model), f)
else:
classifier = gptc.Classifier(raw_model)
if sys.stdin.isatty():
text = input('Text to analyse: ')
else:
text = sys.stdin.read()
print(classifier.check(text))

BIN
dist/gptc-0.0.0.tar.gz vendored

Binary file not shown.

BIN
dist/gptc-0.0.1.tar.gz vendored

Binary file not shown.
Loading…
Cancel
Save