diff --git a/README.md b/README.md
index cd701d4..391c648 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,13 @@ stdout (or "None" if it cannot determine anything).
This will print the compiled model in JSON to stdout.
+### Packing models
+
+ gptc pack
+
+This will print the raw model in JSON to stdout. See `models/unpacked/` for an
+example of the format. Any exceptions will be printed to stderr.
+
## Library
### `gptc.Classifier(model, max_ngram_length=1)`
@@ -52,12 +59,44 @@ category:probability, ...}`
Classify `text`. Returns the category into which the text is placed (as a
string), or `None` when it cannot classify the text.
+#### `Classifier.model`
+
+The classifier's model.
+
+#### `Classifier.has_emoji`
+
+Check whether emojis are supported by the `Classifier`. (See section "Emoji.")
+Equivalent to `gptc.has_emoji and gptc.model_has_emoji(model)`.
+
### `gptc.compile(raw_model, max_ngram_length=1)`
+
Compile a raw model (as a list, not JSON) and return the compiled model (as a
dict).
For information about `max_ngram_length`, see section "Ngrams."
+### `gptc.pack(directory, print_exceptions=False)
+
+Pack the model in `directory` and return a tuple of the format:
+
+ (raw_model, [(exception,),(exception,)...])
+
+Note that the exceptions are contained in single-item tuples. This is to allow
+more information to be provided without breaking the API in future versions of
+GPTC.
+
+See `models/unpacked/` for an example of the format.
+
+### `gptc.has_emoji`
+
+`True` if the `emoji` package is installed (see section "Emoji"), `False`
+otherwise.
+
+### `gptc.model_has_emoji(compiled_model)`
+
+Returns `True` if `compiled_model` was compiled with emoji support, `False`
+otherwise.
+
## Ngrams
GPTC optionally supports using ngrams to improve classification accuracy. They
@@ -84,6 +123,10 @@ If the [`emoji`](https://pypi.org/project/emoji/) package is installed, GPTC
will automatically handle emojis the same way as words. If it is not installed,
GPTC will still work but will ignore emojis.
+`emoji` must be installed on both the system used to compile the model and the
+system used to classify text. Emojis are ignored if it is missing on either
+system.
+
## Model format
This section explains the raw model format, which is how you should create and
diff --git a/gptc/__init__.py b/gptc/__init__.py
index 6ef26b3..ac1a794 100644
--- a/gptc/__init__.py
+++ b/gptc/__init__.py
@@ -5,6 +5,8 @@
from gptc.compiler import compile as compile
from gptc.classifier import Classifier as Classifier
from gptc.pack import pack as pack
+from gptc.tokenizer import has_emoji as has_emoji
+from gptc.model_info import model_has_emoji as model_has_emoji
from gptc.exceptions import (
GPTCError as GPTCError,
ModelError as ModelError,
diff --git a/gptc/classifier.py b/gptc/classifier.py
index d4cd1dc..22de86c 100755
--- a/gptc/classifier.py
+++ b/gptc/classifier.py
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
-import gptc.tokenizer, gptc.compiler, gptc.exceptions, gptc.weighting
+import gptc.tokenizer, gptc.compiler, gptc.exceptions, gptc.weighting, gptc.model_info
import warnings
from typing import Dict, Union, cast, List
@@ -33,6 +33,7 @@ class Classifier:
self.model = model
model_ngrams = cast(int, model.get("__ngrams__", 1))
self.max_ngram_length = min(max_ngram_length, model_ngrams)
+ self.has_emoji = gptc.tokenizer.has_emoji and gptc.model_info.model_has_emoji(model)
def confidence(self, text: str) -> Dict[str, float]:
"""Classify text with confidence.
diff --git a/gptc/model_info.py b/gptc/model_info.py
new file mode 100755
index 0000000..be9d3b1
--- /dev/null
+++ b/gptc/model_info.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import gptc.compiler
+from typing import Dict, Union, cast, List
+
+
+def model_has_emoji(model: gptc.compiler.MODEL) -> bool:
+ return cast(int, model.get("__emoji__]", 0)) == 1