Remove Spacy dependency, remove unneeded files, improve tokenizer
This commit is contained in:
parent
9eb602de65
commit
edb8eb58e7
|
@ -1,20 +0,0 @@
|
|||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
52
docs/conf.py
52
docs/conf.py
|
@ -1,52 +0,0 @@
|
|||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
# import os
|
||||
# import sys
|
||||
# sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = 'GPTC'
|
||||
copyright = '2020, ScoopGracie'
|
||||
author = 'ScoopGracie'
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'alabaster'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
|
@ -1,20 +0,0 @@
|
|||
.. GPTC documentation master file, created by
|
||||
sphinx-quickstart on Fri Aug 14 17:38:37 2020.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Welcome to GPTC's documentation!
|
||||
================================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
|
@ -1,35 +0,0 @@
|
|||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=.
|
||||
set BUILDDIR=_build
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.http://sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
|
@ -2,15 +2,13 @@
|
|||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import gptc
|
||||
|
||||
parser = argparse.ArgumentParser(description="General Purpose Text Classifier")
|
||||
parser.add_argument('model', help='model to use')
|
||||
parser.add_argument('-c', '--compile', help='compile raw model model to outfile', metavar='outfile')
|
||||
args = parser.parse_args()
|
||||
|
||||
import gptc # PEP 8 violation, but don't fix it
|
||||
# Way better for performance of argparse checking
|
||||
|
||||
with open(args.model, 'r') as f:
|
||||
raw_model = json.load(f)
|
||||
if args.compile:
|
||||
|
|
12
gptc/gptc.py
12
gptc/gptc.py
|
@ -1,14 +1,18 @@
|
|||
'''Main module for GPTC.'''
|
||||
|
||||
import sys
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load('en_core_web_sm')
|
||||
|
||||
def _listify(text):
|
||||
"""Convert a string to a list of lemmas."""
|
||||
return [string.lemma_.lower() for string in nlp(text) if string.lemma_[0] in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']
|
||||
out = [""]
|
||||
|
||||
for char in text.lower():
|
||||
if char.isalpha() or char == "'":
|
||||
out[-1] += char
|
||||
elif out[-1] != "":
|
||||
out.append("")
|
||||
|
||||
return [string for string in out if string]
|
||||
|
||||
def compile(raw_model):
|
||||
"""Compile a raw model.
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
|||
spacy
|
26
setup.py
26
setup.py
|
@ -1,26 +0,0 @@
|
|||
from distutils.core import setup
|
||||
setup(
|
||||
name = 'gptc',
|
||||
packages = ['gptc'],
|
||||
version = '2.0.0a',
|
||||
license='MIT',
|
||||
description = 'General-purpose English text classifier',
|
||||
author = 'ScoopGracie',
|
||||
author_email = 'scoopgracie@scoopgracie.com',
|
||||
url = 'https://github.com/scoopgracie/gptc',
|
||||
keywords = ['nlp', 'text', 'classification'],
|
||||
install_requires=[
|
||||
'spacy',
|
||||
],
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Developers',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
],
|
||||
)
|
Loading…
Reference in New Issue
Block a user