diff options
author | abc <98614666+xtekky@users.noreply.github.com> | 2023-10-20 20:04:13 +0200 |
---|---|---|
committer | abc <98614666+xtekky@users.noreply.github.com> | 2023-10-20 20:04:13 +0200 |
commit | 8e7e694d81e674db63049145a35972df8ad2e3fa (patch) | |
tree | d6000ee808385a20f85e03e3173e3aa16a605757 /g4f/api/_tokenizer.py | |
parent | ~ (diff) | |
download | gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.gz gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.bz2 gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.lz gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.xz gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.zst gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.zip |
Diffstat (limited to '')
-rw-r--r-- | g4f/api/_tokenizer.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/g4f/api/_tokenizer.py b/g4f/api/_tokenizer.py new file mode 100644 index 00000000..fd8f9d5a --- /dev/null +++ b/g4f/api/_tokenizer.py @@ -0,0 +1,9 @@ +import tiktoken +from typing import Union + +def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]: + encoding = tiktoken.encoding_for_model(model) + encoded = encoding.encode(text) + num_tokens = len(encoded) + + return num_tokens, encoded
\ No newline at end of file |