Remove Spacy dependency, remove unneeded files, improve tokenizer
This commit is contained in:
parent
9eb602de65
commit
edb8eb58e7
|
@ -1,20 +0,0 @@
|
||||||
# Minimal makefile for Sphinx documentation
|
|
||||||
#
|
|
||||||
|
|
||||||
# You can set these variables from the command line, and also
|
|
||||||
# from the environment for the first two.
|
|
||||||
SPHINXOPTS ?=
|
|
||||||
SPHINXBUILD ?= sphinx-build
|
|
||||||
SOURCEDIR = .
|
|
||||||
BUILDDIR = _build
|
|
||||||
|
|
||||||
# Put it first so that "make" without argument is like "make help".
|
|
||||||
help:
|
|
||||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
||||||
|
|
||||||
.PHONY: help Makefile
|
|
||||||
|
|
||||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
|
||||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
|
||||||
%: Makefile
|
|
||||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
52
docs/conf.py
52
docs/conf.py
|
@ -1,52 +0,0 @@
|
||||||
# Configuration file for the Sphinx documentation builder.
|
|
||||||
#
|
|
||||||
# This file only contains a selection of the most common options. For a full
|
|
||||||
# list see the documentation:
|
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
|
||||||
|
|
||||||
# -- Path setup --------------------------------------------------------------
|
|
||||||
|
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
|
||||||
# add these directories to sys.path here. If the directory is relative to the
|
|
||||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
|
||||||
#
|
|
||||||
# import os
|
|
||||||
# import sys
|
|
||||||
# sys.path.insert(0, os.path.abspath('.'))
|
|
||||||
|
|
||||||
|
|
||||||
# -- Project information -----------------------------------------------------
|
|
||||||
|
|
||||||
project = 'GPTC'
|
|
||||||
copyright = '2020, ScoopGracie'
|
|
||||||
author = 'ScoopGracie'
|
|
||||||
|
|
||||||
|
|
||||||
# -- General configuration ---------------------------------------------------
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
|
||||||
# ones.
|
|
||||||
extensions = [
|
|
||||||
]
|
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
|
||||||
templates_path = ['_templates']
|
|
||||||
|
|
||||||
# List of patterns, relative to source directory, that match files and
|
|
||||||
# directories to ignore when looking for source files.
|
|
||||||
# This pattern also affects html_static_path and html_extra_path.
|
|
||||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
|
||||||
|
|
||||||
|
|
||||||
# -- Options for HTML output -------------------------------------------------
|
|
||||||
|
|
||||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
|
||||||
# a list of builtin themes.
|
|
||||||
#
|
|
||||||
html_theme = 'alabaster'
|
|
||||||
|
|
||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
|
||||||
# relative to this directory. They are copied after the builtin static files,
|
|
||||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
|
||||||
html_static_path = ['_static']
|
|
|
@ -1,20 +0,0 @@
|
||||||
.. GPTC documentation master file, created by
|
|
||||||
sphinx-quickstart on Fri Aug 14 17:38:37 2020.
|
|
||||||
You can adapt this file completely to your liking, but it should at least
|
|
||||||
contain the root `toctree` directive.
|
|
||||||
|
|
||||||
Welcome to GPTC's documentation!
|
|
||||||
================================
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
:caption: Contents:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Indices and tables
|
|
||||||
==================
|
|
||||||
|
|
||||||
* :ref:`genindex`
|
|
||||||
* :ref:`modindex`
|
|
||||||
* :ref:`search`
|
|
|
@ -1,35 +0,0 @@
|
||||||
@ECHO OFF
|
|
||||||
|
|
||||||
pushd %~dp0
|
|
||||||
|
|
||||||
REM Command file for Sphinx documentation
|
|
||||||
|
|
||||||
if "%SPHINXBUILD%" == "" (
|
|
||||||
set SPHINXBUILD=sphinx-build
|
|
||||||
)
|
|
||||||
set SOURCEDIR=.
|
|
||||||
set BUILDDIR=_build
|
|
||||||
|
|
||||||
if "%1" == "" goto help
|
|
||||||
|
|
||||||
%SPHINXBUILD% >NUL 2>NUL
|
|
||||||
if errorlevel 9009 (
|
|
||||||
echo.
|
|
||||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
|
||||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
|
||||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
|
||||||
echo.may add the Sphinx directory to PATH.
|
|
||||||
echo.
|
|
||||||
echo.If you don't have Sphinx installed, grab it from
|
|
||||||
echo.http://sphinx-doc.org/
|
|
||||||
exit /b 1
|
|
||||||
)
|
|
||||||
|
|
||||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
|
||||||
goto end
|
|
||||||
|
|
||||||
:help
|
|
||||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
|
||||||
|
|
||||||
:end
|
|
||||||
popd
|
|
|
@ -2,15 +2,13 @@
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
|
import gptc
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
|
parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
|
||||||
parser.add_argument('model', help='model to use')
|
parser.add_argument('model', help='model to use')
|
||||||
parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
|
parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
import gptc # PEP 8 violation, but don't fix it
|
|
||||||
# Way better for performance of argparse checking
|
|
||||||
|
|
||||||
with open(args.model, 'r') as f:
|
with open(args.model, 'r') as f:
|
||||||
raw_model = json.load(f)
|
raw_model = json.load(f)
|
||||||
if args.compile:
|
if args.compile:
|
||||||
|
|
12
gptc/gptc.py
12
gptc/gptc.py
|
@ -1,14 +1,18 @@
|
||||||
'''Main module for GPTC.'''
|
'''Main module for GPTC.'''
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import spacy
|
|
||||||
|
|
||||||
nlp = spacy.load('en_core_web_sm')
|
|
||||||
|
|
||||||
def _listify(text):
|
def _listify(text):
|
||||||
"""Convert a string to a list of lemmas."""
|
"""Convert a string to a list of lemmas."""
|
||||||
return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']
|
out = [""]
|
||||||
|
|
||||||
|
for char in text.lower():
|
||||||
|
if char.isalpha() or char == "'":
|
||||||
|
out[-1] += char
|
||||||
|
elif out[-1] != "":
|
||||||
|
out.append("")
|
||||||
|
|
||||||
|
return [string for string in out if string]
|
||||||
|
|
||||||
def compile(raw_model):
|
def compile(raw_model):
|
||||||
"""Compile a raw model.
|
"""Compile a raw model.
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
||||||
spacy
|
|
26
setup.py
26
setup.py
|
@ -1,26 +0,0 @@
|
||||||
from distutils.core import setup
|
|
||||||
setup(
|
|
||||||
name = 'gptc',
|
|
||||||
packages = ['gptc'],
|
|
||||||
version = '2.0.0a',
|
|
||||||
license='MIT',
|
|
||||||
description = 'General-purpose English text classifier',
|
|
||||||
author = 'ScoopGracie',
|
|
||||||
author_email = 'scoopgracie@scoopgracie.com',
|
|
||||||
url = 'https://github.com/scoopgracie/gptc',
|
|
||||||
keywords = ['nlp', 'text', 'classification'],
|
|
||||||
install_requires=[
|
|
||||||
'spacy',
|
|
||||||
],
|
|
||||||
classifiers=[
|
|
||||||
'Development Status :: 4 - Beta',
|
|
||||||
'Intended Audience :: Developers',
|
|
||||||
'License :: OSI Approved :: MIT License',
|
|
||||||
'Programming Language :: Python :: 3',
|
|
||||||
'Programming Language :: Python :: 3.5',
|
|
||||||
'Programming Language :: Python :: 3.6',
|
|
||||||
'Programming Language :: Python :: 3.7',
|
|
||||||
'Programming Language :: Python :: 3.8',
|
|
||||||
'Programming Language :: Python :: 3.9',
|
|
||||||
],
|
|
||||||
)
|
|
Loading…
Reference in New Issue
Block a user