diff options
Diffstat (limited to '')
-rw-r--r-- | g4f/api/_tokenizer.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/g4f/api/_tokenizer.py b/g4f/api/_tokenizer.py new file mode 100644 index 00000000..fd8f9d5a --- /dev/null +++ b/g4f/api/_tokenizer.py @@ -0,0 +1,9 @@ +import tiktoken +from typing import Union + +def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]: + encoding = tiktoken.encoding_for_model(model) + encoded = encoding.encode(text) + num_tokens = len(encoded) + + return num_tokens, encoded
\ No newline at end of file |