Remove Spacy dependency, remove unneeded files, improve tokenizer

This commit is contained in:
Samuel Sloniker 2021-10-25 08:06:46 -07:00
parent 9eb602de65
commit edb8eb58e7
9 changed files with 10 additions and 162 deletions

View File

@ -1,20 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -1,52 +0,0 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'GPTC'
copyright = '2020, ScoopGracie'
author = 'ScoopGracie'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

View File

@ -1,20 +0,0 @@
.. GPTC documentation master file, created by
sphinx-quickstart on Fri Aug 14 17:38:37 2020.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to GPTC's documentation!
================================
.. toctree::
:maxdepth: 2
:caption: Contents:
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,35 +0,0 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

View File

@ -2,15 +2,13 @@
import argparse import argparse
import json import json
import sys import sys
import gptc
parser = argparse.ArgumentParser(description="General Purpose Text Classifier") parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
parser.add_argument('model', help='model to use') parser.add_argument('model', help='model to use')
parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile') parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
args = parser.parse_args() args = parser.parse_args()
import gptc # PEP 8 violation, but don't fix it
# Way better for performance of argparse checking
with open(args.model, 'r') as f: with open(args.model, 'r') as f:
raw_model = json.load(f) raw_model = json.load(f)
if args.compile: if args.compile:

View File

@ -1,14 +1,18 @@
'''Main module for GPTC.''' '''Main module for GPTC.'''
import sys import sys
import spacy
nlp = spacy.load('en_core_web_sm')
def _listify(text): def _listify(text):
"""Convert a string to a list of lemmas.""" """Convert a string to a list of lemmas."""
return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'] out = [""]
for char in text.lower():
if char.isalpha() or char == "'":
out[-1] += char
elif out[-1] != "":
out.append("")
return [string for string in out if string]
def compile(raw_model): def compile(raw_model):
"""Compile a raw model. """Compile a raw model.

File diff suppressed because one or more lines are too long

View File

@ -1 +0,0 @@
spacy

View File

@ -1,26 +0,0 @@
from distutils.core import setup
setup(
name = 'gptc',
packages = ['gptc'],
version = '2.0.0a',
license='MIT',
description = 'General-purpose English text classifier',
author = 'ScoopGracie',
author_email = 'scoopgracie@scoopgracie.com',
url = 'https://github.com/scoopgracie/gptc',
keywords = ['nlp', 'text', 'classification'],
install_requires=[
'spacy',
],
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
)