Bugfix release

2020-03-30 09:45:26 -07:00 · 2020-03-30 09:45:26 -07:00 · 11b3785f84
commit 11b3785f84
parent 1ca354c7f5
8 changed files with 127 additions and 22 deletions
--- a/2
+++ b/2
@ -1,3 +1,5 @@
 # file GENERATED by distutils, do NOT edit
+README
 setup.py
 gptc/__init__.py
+gptc/__main__.py
--- a/README.md
+++ b/README.md
--- a/build/lib/gptc/init.py
+++ b/build/lib/gptc/init.py
@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+import sys
+import spacy
+
+nlp = spacy.load('en_core_web_sm')
+
+def listify(text):
+    return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']
+
+
+def compile(raw_model):
+    model = {}
+
+    for portion in raw_model:
+        text = listify(portion['text'])
+        category = portion['category']
+        for word in text:
+            try:
+                model[category].append(word)
+            except:
+                model[category] = [word]
+            model[category].sort()
+    all_models = [ { 'text': model, 'stopword': i/10} for i in range(0, 21) ]
+    for test_model in all_models:
+        correct = 0
+        classifier = Classifier(test_model)
+        for text in raw_model:
+            if classifier.check(text['text']) == text['category']:
+                correct += 1
+        test_model['correct'] = correct
+        print('tested a model')
+    best = all_models[0]
+    for test_model in all_models:
+        if test_model['correct'] > best['correct']:
+            best = test_model
+    del best['correct']
+    return best
+    return {'text': model}
+
+
+class Classifier:
+    def __init__(self, model, supress_uncompiled_model_warning=False):
+        if type(model['text']) == dict:
+            self.model = model
+        else:
+            self.model = compile(model)
+            if not supress_uncompiled_model_warning:
+                print('WARNING: model was not compiled', file=sys.stderr)
+                print('In development, this is OK, but precompiling the model is preferred for production use.', file=sys.stderr)
+        self.warn = supress_uncompiled_model_warning
+
+    def check(self, text):
+        model = self.model
+        stopword_value = 0.5
+        try:
+            stopword_value = model['stopword']
+        except:
+            pass
+        stopwords = spacy.lang.en.stop_words.STOP_WORDS
+        model = model['text']
+        text = listify(text)
+        probs = {}
+        for word in text:
+            for category in model.keys():
+                for catword in model[category]:
+                    if word == catword:
+                        weight = ( stopword_value if word in stopwords else 1 ) / len(model[category])
+                        try:
+                            probs[category] += weight 
+                        except:
+                            probs[category] = weight
+        most_likely = ['unknown', 0]
+        for category in probs.keys():
+            if probs[category] > most_likely[1]:
+                most_likely = [category, probs[category]]
+        return most_likely[0]
--- a/build/lib/gptc/main.py
+++ b/build/lib/gptc/main.py
@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+import argparse
+import json
+
+parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
+parser.add_argument('model', help='model to use')
+parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
+args = parser.parse_args()
+
+import gptc # PEP 8 violation, but don't fix it
+# Way better for performance of argparse checking
+
+with open(args.model, 'r') as f:
+    raw_model = json.load(f)
+if args.compile:
+    with open(args.compile, 'w+') as f:
+        json.dump(gptc.compile(raw_model), f)
+else:
+    classifier = gptc.Classifier(raw_model)
+    if sys.stdin.isatty():
+        text = input('Text to analyse: ')
+    else:
+        text = sys.stdin.read()
+    print(classifier.check(text))
--- a/dist/gptc-0.0.1.tar.gz
+++ b/dist/gptc-0.0.1.tar.gz
--- a/gptc/init.py
+++ b/gptc/init.py
@ -1,8 +1,6 @@
 #!/usr/bin/env python3
 import sys
-import json
 import spacy
-import argparse

 nlp = spacy.load('en_core_web_sm')

@ -76,22 +74,3 @@ class Classifier:
            if probs[category] > most_likely[1]:
                most_likely = [category, probs[category]]
        return most_likely[0]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
-    parser.add_argument('model', help='model to use')
-    parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
-    args = parser.parse_args()
-    with open(args.model, 'r') as f:
-        raw_model = json.load(f)
-    if args.compile:
-        with open(args.compile, 'w+') as f:
-            json.dump(compile(raw_model), f)
-    else:
-        classifier = Classifier(raw_model)
-        if sys.stdin.isatty():
-            text = input('Text to analyse: ')
-        else:
-            text = sys.stdin.read()
-        print(classifier.check(text))
--- a/gptc/main.py
+++ b/gptc/main.py
@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+import argparse
+import json
+
+parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
+parser.add_argument('model', help='model to use')
+parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
+args = parser.parse_args()
+
+import gptc # PEP 8 violation, but don't fix it
+# Way better for performance of argparse checking
+
+with open(args.model, 'r') as f:
+    raw_model = json.load(f)
+if args.compile:
+    with open(args.compile, 'w+') as f:
+        json.dump(gptc.compile(raw_model), f)
+else:
+    classifier = gptc.Classifier(raw_model)
+    if sys.stdin.isatty():
+        text = input('Text to analyse: ')
+    else:
+        text = sys.stdin.read()
+    print(classifier.check(text))
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from distutils.core import setup
 setup(
  name = 'gptc',         # How you named your package folder (MyLib)
  packages = ['gptc'],   # Chose the same as "name"
-  version = '0.0.0',      # Start with a small number and increase it with every change you make
+  version = '0.0.1',      # Start with a small number and increase it with every change you make
  license='MIT',        # Chose a license from here: https://help.github.com/articles/licensing-a-repository
  description = 'General-purpose English text classifier',   # Give a short description about your library
  author = 'ScoopGracie',                   # Type in your name