From d418da9b863b021ba5c041bccf13415071f06c4e Mon Sep 17 00:00:00 2001 From: scoopgracie Date: Fri, 14 Aug 2020 16:21:22 -0700 Subject: [PATCH] Use good gitignore --- .gitignore | 141 ++++++++++++++++++++++++++++++++++++- MANIFEST | 5 -- build/lib/gptc/__init__.py | 76 -------------------- build/lib/gptc/__main__.py | 24 ------- dist/gptc-0.0.0.tar.gz | Bin 2263 -> 0 bytes dist/gptc-0.0.1.tar.gz | Bin 3478 -> 0 bytes 6 files changed, 138 insertions(+), 108 deletions(-) delete mode 100644 MANIFEST delete mode 100644 build/lib/gptc/__init__.py delete mode 100644 build/lib/gptc/__main__.py delete mode 100644 dist/gptc-0.0.0.tar.gz delete mode 100644 dist/gptc-0.0.1.tar.gz diff --git a/.gitignore b/.gitignore index d1f3a59..a81c8ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,138 @@ -__pycache__ -*.swp -venv +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index af34275..0000000 --- a/MANIFEST +++ /dev/null @@ -1,5 +0,0 @@ -# file GENERATED by distutils, do NOT edit -README -setup.py -gptc/__init__.py -gptc/__main__.py diff --git a/build/lib/gptc/__init__.py b/build/lib/gptc/__init__.py deleted file mode 100644 index 061c263..0000000 --- a/build/lib/gptc/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -import sys -import spacy - -nlp = spacy.load('en_core_web_sm') - -def listify(text): - return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'] - - -def compile(raw_model): - model = {} - - for portion in raw_model: - text = listify(portion['text']) - category = portion['category'] - for word in text: - try: - model[category].append(word) - except: - model[category] = [word] - model[category].sort() - all_models = [ { 'text': model, 'stopword': i/10} for i in range(0, 21) ] - for test_model in all_models: - correct = 0 - classifier = Classifier(test_model) - for text in raw_model: - if classifier.check(text['text']) == text['category']: - correct += 1 - test_model['correct'] = correct - print('tested a model') - best = all_models[0] - for test_model in all_models: - if test_model['correct'] > best['correct']: - best = test_model - del best['correct'] - return best - return {'text': model} - - -class Classifier: - def __init__(self, model, supress_uncompiled_model_warning=False): - if type(model['text']) == dict: - self.model = model - else: - self.model = compile(model) - if not supress_uncompiled_model_warning: - print('WARNING: model was not compiled', file=sys.stderr) - print('In development, this is OK, but precompiling the model is preferred for production use.', file=sys.stderr) - self.warn = supress_uncompiled_model_warning - - def check(self, text): - model = self.model - stopword_value = 0.5 - try: - stopword_value = model['stopword'] - except: - pass - stopwords = spacy.lang.en.stop_words.STOP_WORDS - model = model['text'] - text = listify(text) - probs = {} - for word in text: - for category in model.keys(): - for catword in model[category]: - if word == catword: - weight = ( stopword_value if word in stopwords else 1 ) / len(model[category]) - try: - probs[category] += weight - except: - probs[category] = weight - most_likely = ['unknown', 0] - for category in probs.keys(): - if probs[category] > most_likely[1]: - most_likely = [category, probs[category]] - return most_likely[0] diff --git a/build/lib/gptc/__main__.py b/build/lib/gptc/__main__.py deleted file mode 100644 index ac04aa7..0000000 --- a/build/lib/gptc/__main__.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json - -parser = argparse.ArgumentParser(description="General Purpose Text Classifier") -parser.add_argument('model', help='model to use') -parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile') -args = parser.parse_args() - -import gptc # PEP 8 violation, but don't fix it -# Way better for performance of argparse checking - -with open(args.model, 'r') as f: - raw_model = json.load(f) -if args.compile: - with open(args.compile, 'w+') as f: - json.dump(gptc.compile(raw_model), f) -else: - classifier = gptc.Classifier(raw_model) - if sys.stdin.isatty(): - text = input('Text to analyse: ') - else: - text = sys.stdin.read() - print(classifier.check(text)) diff --git a/dist/gptc-0.0.0.tar.gz b/dist/gptc-0.0.0.tar.gz deleted file mode 100644 index db39204a28e61c9e09c1159296e875f1e46d13c9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2263 zcmV;|2q^a-iwFojfmL1t|72-%bT4OcbYm?rE-)@IE_7jX0PR_AbK5o&_GkVIob!Rw zRH7uyl3UHFb8eG3ZW7xUr_J4YJRFFGB-A9x0-zOLrvLqR0q{lg`^+_W*D;bYNnrQc z#qMHvVJ~tWb^8JQ>+SqeqYvX?G{WcLVAO9u%ds;Uj`v2xet-X9f2Thf?TyDfWVG`Y zMxiv7gzTs&%kqV!5ofp3J+xkKqj_i{yzGDa4;T?_Vx~T`g?u&+9CaC|Nr6R#b0`blD!!hJtnS6zShf33_YIanbbtB zRr&g>%0xAt(`a3OP`SvdRLt{4l9Q=j43dn-EuV=n$|MU{Y!<52Z+l+M<|N@t^ZB}^ z*=OCJcm#%Ix{!igC@r~I1PM!18Uo0Q$yS@pGfC_U;evn|%S!m1Y%pB(FOk5fvk2rY zmi*VNBo$fyn^d~EUVUEw>+pvkU%q;M^yc`dx9{GcoSyyd!}-UbfBE$HfBe(G^sIJK zmgYQRElF2ln#C+}Dw=oL-$TC(WMgkO{oieCY zkxQmjSO|BPW2;TLqEbM%n7*coV(rcSu5;FM`)&9z=F!%vKuJNF3+hdpLjKmjXrK!#W}nNenO5t-yzC>GqOfiV*95R1nH z7Vs+9+e2sl-Y6u^OkG>QmgUk-cukXnVTuSw-kmA*2DJ;2Drwbu@U{e+L%`mUtm+a8 z2`*qFz?IM}1?L}6&%#fqA6}l{*vv+3-c@3ZAsQJXn<-nd9#=zfwJJtHVPpnZY^_@D zo3ZDpOSr8naFgBzx}k@o2hIv`LR7OtiMtY2PX4 z*21}YGQf~{%`0|vUawiYK2@DZvywRwc?nq1x;BPqWAqUOeq&(=v$$tg~Z}q zwv)l3Tojmt&df?HW-5|A$BZ)FJz|1Mnvk`DU{0asM(^>+NaRL4bLfJ2PO!OB$7WDP=z z-@c~~JKkD^WMHE9RxfpHOV@6DJAdNQxc@6Sx%q$c-ru|$!zKT&`~R@NH@3h3**h4F zhr=CM?e|B|_y4ak<}yp6%Z2-~<_V?&3FXz@@-ZGf^Ropf1#>r?As-_Kl3F=lkT=oP+DM&196Q6s|Hv6zL2kQi0t;1jiguxub!;7K&uF#EYfTIHgw% z1SC9SLb0j;{`h0Pvr;Eot`lRcC6kbCmRjd(((7Rr2o?Zd%rJ%aKp!3@O!X`ebgW(4 zl?;j$*H9-t>|LumhC6bVted+^_DU?EnOoY7RVO6hTIq;iqe5zl{%Io78PpbQ`-IOV zHKJ&tfe@vfWAGlC;BB~$G@SSm?myT+7zng{#UC;#)L2{m#u4~y;~h6%`(y*?Rrfvc z0#qRlLTR>!#Tr1pDzsjZGnrlUm=Ok);7<$;xN=-!$~VXsf#7x4&8*lAjEs6i_fvM- z7rsbxEZwGWhG%I~=v=^VW%Ru*IrNgEVdq@1%}92)R4yQ0z>Qh5-wF;lmFdN%8IQ?= znYv`}O#+<-7bX!m%8_r{f4}3VF7c)IG(egK|7Cp*!$8-CB9jT(CtdOb^b@tgZIn7Y z?G9o6Fv*v6w?lSI;$5`U?g+lls%u#fFlPzd#iNGSjS9HCfhgFYW+a<$Y~N~hEHo1_ zobkgV=1fEk6>@5V;EONgQpOG`xN5u3^9b`pS$CE9(VMAXK#UWaHw!$x_zMA>GtX(>>MSJ2XAVgfu!9H`g_N$b{U}g?O%3n8M+`tox%0`T6AS z$?2yPudMVF{4-JqmF;Y{1m0Of^*obl&C~eJGr7MiZ;(DvE;nc&R|7X#9+A?YN*|1# l%rSm4$H9|1zI`&sch42cQF`4DhqRJ?ND`-CF5yY;E>7*L&L=TPwZ(V12u{LI*2fU}aLV zl+qRH$8j=EnJ@UgaG%?Eg1FB=$(k1S$FFPuANJo`Umw)=|0C=_8i`1#(a1}dU&{W2 zet#pk|N3BarMKSO*w|d5z1sf2`&#{KEtBcmSVU_)x}nKZ&EjZ-hFQl%oA zdLf_B*$9vu?^$$uFr?+0EkP z^547P?Y-K6eenI^4{zQc9iN>3a?gqj?SLZUgifyRYQ!T~3)1-@wF^X6qchx3`@(>PrMY!2g|0b33X2`=I^ zKx&AhAf$<<%j$!i=L?7H(ql=&qo9RY+7)2_(dUW!ECS&00uNu3$EZjUqGj;2Ff>|` zc$9vkM);vQ)1ghN;si0mBeB-+{i>xDMxoJ^w|X7A-fz>(T;+;OWl$nefxqA#JTB!v zYQ9%I3z?K+B6tdq_wpaD0$aPHXKlLst%?gAufXW}Gwxq&m&|QTcXp_GP#E&Ab{(f* z@6dk9YaB~(*Z?$9@%DQTmw@A|RujQ_K$+2K(}*#Khj66O4n)Q?Gzj9|&&5lPx%8we zlOGzhcgKgv2Ucfv!KB9Lurxb#0-rmOvOTE+o~G@GFdasq*c%?k$(%>3L)A=33ja>t zbm%x!0LBegOvY-)Ek%HUoe5wDU(}f~jf2eB$uX1MdyE6EB$N<~$7eJ^a?_ijGj=Z( zl5(alv|rG&>1K4p!i-~z@CM|;6nY2RI!K2vo z&QDH9?@!*nI=drG#Wx=+F0@Xe){^BRjfUcyhu9fhG_&YBiWd=tQm@(+luf3w-fJ>Y+HI0A%QPmR=QgT>A)Cd^h=JXedj0v<-T80MMD*Lwe_NZI56yr5&E9%G|7~t<==rbT zujju%e*RTnmYMZkS4&Z3?a5 z#Lm=ib^%7wIA$iFw29C-rDugV(6><9JwUqnx`Gv##ujArq=iCycPD1(_S&Rqhbz;U zK2jRNic`dvDcdB9$PW%55gI6dDZ zHc59Ao1}F(l|mZ2&Qo0)2w7dFdv6cvc^rr2a6*@HMn8ZOVIYlkoCYzK@tji$ITEt2 zL*nbyNZ}t&l#os!_XRN2M38?nq&VXQ@hZUU0e1b6MbKvB+yjHCO@J@H>UQb-8OIAb zp#+1m2zObjq>y<0f%`Dn;&7&?AfO2CvYsbgy`Wd0y+SxoGk)cfZucefa;1~~3k`x5 z;^mb`4?%1ZhS(|36QvV7v)7d!it<_Mp>0+jAo^ehaM>_rMVxk(Bg;D{=>XOg!TCuJkGGbL)k`5Ae zAt3Gg|H;cKec`lJ)_``0w_=>Kbm_doIRc_Z=R)m3;DEsuctIF0bAyBV^+^w~tQr=| zRl`(_E3yZjxm>2oS>xE)bx3yK-DE~_W!HN55N;Ef4^tsl-#j@w|q zN%eXIJA@X~@KD}FZd@oIfNd8BbHo`rf_81z30XzrqJsrUGlF6nq=PeEO;I4EpGir8 z$lRkp112DkcmkUo0zLmDiCf4gVw$B|F>`F*fV*?Nypr5pL-CIww6OXq`aQ#V$La{+ zXYoQ~m$NtvAxf=-MFHUfD8ujz$lo~>ph6&P@aU^pbT9N13@>yo>FH=3^AJ$$8`znH zADRoTi@aC)R2jh=sOH9yHeR?3>9TQOHab;kw_#3HAag)-7j#L;ZU_O8O|8q#K){g8 zTq0Qx5J_9ecCIwpMpx0I=^b**nr$?vz9@uj8sHsGY8`rxGGK(|ddHD6#P-)`F4fxt z(8U6vb3O)u2~5zta|vAoN`{2g2M(JW?<)YRta)^uZeu*!o=Bqbda%O{fN@Zf8QyaPV1hZmuICIiv!DD5B#f zIb!KGJzp>sRGS8yk7A#`2lEKv@NCAexlA}jA*5}Xl45DIfORV5S?GJuYY1<4!||mD zoyHM2=@CUrvT-PUP#yYX$N2)D0)nV5sg@yH7W4`1PbOezEM+sMOaP(BaV7=$vf_Xq zkvce^qn}^v_m}8P(>M-Dz~?dK^HldfI$4#|k}><0Lh(nkkZ=Sb9?oA^e{;=J$-T`n za`%`y;*tNg|Cdl(lHc;)=ZXFQMsIy9zyBHZ2iX5_Z*SH8|Cd-3=v;NEW=e#3C4i~T zzVSzYAo~9n6qJa~@%2a(J*L^o3(_iPThk2yK!D#;XuSPvHpK!MLJFm=qb2l+IE>p3 z{ifJpy6k8qdoH4>L^@EL8i)^T8f0>b#T{sCM|H$k=^2>LBtdMu=NR!(HXj2;jBRLB zpsoW}nTt8S~Zv)-+l=5SZAo zZAn!H6B}dx*1ESP1W2(A@+n@&G|$j^547`^Oh-#bJYOcdXlWFv`TYEdL%PKKrDiDv z{$Bcyf)_TaAlmO9!C63+r5b}%TS;RjL~FTgt8>JozS^H;1ldhDxPfLVC(KY3`Z&dB@-|`?p##W zcu1#Q4?nJ`Gwe-cXeeB+Hos2%BhR&Uik` z$}GW@_+&cXz?%0-z+2A&-hK}7H_rk7Rs(}Ow(D1Uf5rRX(>Dj*!{gT{U%LP6Z*1K8 z``?YN^}7H266*-cL%Q%3L)vI3hs#m@0^_N@!1-zTG?Ep{! E0KW^_bN~PV