summaryrefslogtreecommitdiffstats
path: root/g4f/api/_tokenizer.py
diff options
context:
space:
mode:
authorabc <98614666+xtekky@users.noreply.github.com>2023-10-20 20:04:13 +0200
committerabc <98614666+xtekky@users.noreply.github.com>2023-10-20 20:04:13 +0200
commit8e7e694d81e674db63049145a35972df8ad2e3fa (patch)
treed6000ee808385a20f85e03e3173e3aa16a605757 /g4f/api/_tokenizer.py
parent~ (diff)
downloadgpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar
gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.gz
gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.bz2
gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.lz
gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.xz
gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.tar.zst
gpt4free-8e7e694d81e674db63049145a35972df8ad2e3fa.zip
Diffstat (limited to 'g4f/api/_tokenizer.py')
-rw-r--r--g4f/api/_tokenizer.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/g4f/api/_tokenizer.py b/g4f/api/_tokenizer.py
new file mode 100644
index 00000000..fd8f9d5a
--- /dev/null
+++ b/g4f/api/_tokenizer.py
@@ -0,0 +1,9 @@
+import tiktoken
+from typing import Union
+
+def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]:
+ encoding = tiktoken.encoding_for_model(model)
+ encoded = encoding.encode(text)
+ num_tokens = len(encoded)
+
+ return num_tokens, encoded \ No newline at end of file