blob: de5877c47ad198c9db58c29c40389af1457c4f81 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
|
# import tiktoken
# from typing import Union
# def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]:
# encoding = tiktoken.encoding_for_model(model)
# encoded = encoding.encode(text)
# num_tokens = len(encoded)
# return num_tokens, encoded
|