|
import json |
|
import re |
|
from typing import Any |
|
|
|
import quickjs |
|
from curl_cffi import requests |
|
|
|
session = requests.Session(impersonate="chrome107") |
|
|
|
|
|
def get_model_info() -> dict[str, Any]: |
|
url = "https://sdk.vercel.ai" |
|
response = session.get(url) |
|
html = response.text |
|
paths_regex = r"static\/chunks.+?\.js" |
|
separator_regex = r'"\]\)<\/script><script>self\.__next_f\.push\(\[.,"' |
|
|
|
paths = re.findall(paths_regex, html) |
|
paths = [re.sub(separator_regex, "", path) for path in paths] |
|
paths = list(set(paths)) |
|
|
|
urls = [f"{url}/_next/{path}" for path in paths] |
|
scripts = [session.get(url).text for url in urls] |
|
|
|
models_regex = r'let .="\\n\\nHuman:\",r=(.+?),.=' |
|
for script in scripts: |
|
|
|
matches = re.findall(models_regex, script) |
|
if matches: |
|
models_str = matches[0] |
|
stop_sequences_regex = r"(?<=stopSequences:{value:\[)\D(?<!\])" |
|
models_str = re.sub( |
|
stop_sequences_regex, re.escape('"\\n\\nHuman:"'), models_str |
|
) |
|
|
|
context = quickjs.Context() |
|
json_str: str = context.eval(f"({models_str})").json() |
|
return json.loads(json_str) |
|
|
|
return {} |
|
|
|
|
|
def convert_model_info(models: dict[str, Any]) -> dict[str, Any]: |
|
model_info: dict[str, Any] = {} |
|
for model_name, params in models.items(): |
|
default_params = params_to_default_params(params["parameters"]) |
|
model_info[model_name] = {"id": params["id"], "default_params": default_params} |
|
return model_info |
|
|
|
|
|
def params_to_default_params(parameters: dict[str, Any]): |
|
defaults: dict[str, Any] = {} |
|
for key, parameter in parameters.items(): |
|
if key == "maximumLength": |
|
key = "maxTokens" |
|
defaults[key] = parameter["value"] |
|
return defaults |
|
|
|
|
|
def get_model_names(model_info: dict[str, Any]): |
|
model_names = model_info.keys() |
|
model_names = [ |
|
name |
|
for name in model_names |
|
if name not in ["openai:gpt-4", "openai:gpt-3.5-turbo"] |
|
] |
|
model_names.sort() |
|
return model_names |
|
|
|
|
|
def print_providers(model_names: list[str]): |
|
for name in model_names: |
|
split_name = re.split(r":|/", name) |
|
base_provider = split_name[0] |
|
variable_name = split_name[-1].replace("-", "_").replace(".", "") |
|
line = f'{variable_name} = Model(name="{name}", base_provider="{base_provider}", best_provider=Vercel,)\n' |
|
print(line) |
|
|
|
|
|
def print_convert(model_names: list[str]): |
|
for name in model_names: |
|
split_name = re.split(r":|/", name) |
|
key = split_name[-1] |
|
variable_name = split_name[-1].replace("-", "_").replace(".", "") |
|
|
|
line = f' "{key}": {variable_name},' |
|
print(line) |
|
|
|
|
|
def main(): |
|
model_info = get_model_info() |
|
model_info = convert_model_info(model_info) |
|
print(json.dumps(model_info, indent=2)) |
|
|
|
model_names = get_model_names(model_info) |
|
print("-------" * 40) |
|
print_providers(model_names) |
|
print("-------" * 40) |
|
print_convert(model_names) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|