Thomas G. Lopes commited on
Commit
e8b5344
·
1 Parent(s): 25c63d0

wip session as store

Browse files
src/lib/components/InferencePlayground/InferencePlayground.svelte CHANGED
@@ -1,10 +1,8 @@
1
  <script lang="ts">
2
- import type { Conversation, ConversationMessage, ModelEntryWithTokenizer, Session } from "./types";
3
 
4
  import { page } from "$app/stores";
5
- import { defaultGenerationConfig } from "./generationConfigSettings";
6
  import {
7
- FEATURED_MODELS_IDS,
8
  handleNonStreamingResponse,
9
  handleStreamingResponse,
10
  isSystemPromptSupported,
@@ -12,6 +10,7 @@
12
 
13
  import { goto } from "$app/navigation";
14
  import { models } from "$lib/stores/models";
 
15
  import { token } from "$lib/stores/token";
16
  import { isMac } from "$lib/utils/platform";
17
  import { HfInference } from "@huggingface/inference";
@@ -23,44 +22,12 @@
23
  import IconThrashcan from "../Icons/IconThrashcan.svelte";
24
  import PlaygroundConversation from "./InferencePlaygroundConversation.svelte";
25
  import PlaygroundConversationHeader from "./InferencePlaygroundConversationHeader.svelte";
26
- import GenerationConfig, { defaultSystemMessage } from "./InferencePlaygroundGenerationConfig.svelte";
27
  import HFTokenModal from "./InferencePlaygroundHFTokenModal.svelte";
28
  import ModelSelector from "./InferencePlaygroundModelSelector.svelte";
29
  import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
30
 
31
  const startMessageUser: ConversationMessage = { role: "user", content: "" };
32
- const modelIdsFromQueryParam = $page.url.searchParams.get("modelId")?.split(",");
33
- const modelsFromQueryParam = modelIdsFromQueryParam?.map(id => $models.find(model => model.id === id));
34
- const systemMessage: ConversationMessage = {
35
- role: "system",
36
- content: modelIdsFromQueryParam ? (defaultSystemMessage?.[modelIdsFromQueryParam[0]] ?? "") : "",
37
- };
38
-
39
- let session: Session = {
40
- conversations: [
41
- {
42
- model: $models.find(m => FEATURED_MODELS_IDS.includes(m.id)) ?? $models[0],
43
- config: { ...defaultGenerationConfig },
44
- messages: [{ ...startMessageUser }],
45
- systemMessage,
46
- streaming: true,
47
- },
48
- ],
49
- };
50
-
51
- if (modelsFromQueryParam?.length) {
52
- const conversations = modelsFromQueryParam.map(model => {
53
- return {
54
- model,
55
- config: { ...defaultGenerationConfig },
56
- messages: [{ ...startMessageUser }],
57
- systemMessage,
58
- streaming: true,
59
- };
60
- }) as [Conversation] | [Conversation, Conversation];
61
- session.conversations = conversations;
62
- session = session;
63
- }
64
 
65
  let viewCode = false;
66
  let viewSettings = false;
@@ -73,36 +40,38 @@
73
  latency: number;
74
  generatedTokensCount: number;
75
  }
76
- let generationStats = session.conversations.map(_ => ({ latency: 0, generatedTokensCount: 0 })) as
77
  | [GenerationStatistics]
78
  | [GenerationStatistics, GenerationStatistics];
79
 
80
- $: systemPromptSupported = session.conversations.some(conversation => isSystemPromptSupported(conversation.model));
81
- $: compareActive = session.conversations.length === 2;
82
 
83
  function addMessage(conversationIdx: number) {
84
- const conversation = session.conversations[conversationIdx];
 
 
85
  conversation.messages = [
86
- ...conversation.messages,
87
  {
88
- role: conversation.messages.at(-1)?.role === "user" ? "assistant" : "user",
89
  content: "",
90
  },
91
  ];
92
- session = session;
93
  }
94
 
95
  function deleteMessage(conversationIdx: number, idx: number) {
96
- session.conversations[conversationIdx].messages.splice(idx, 1)[0];
97
- session = session;
98
  }
99
 
100
  function reset() {
101
- session.conversations.map(conversation => {
102
  conversation.systemMessage.content = "";
103
  conversation.messages = [{ ...startMessageUser }];
104
  });
105
- session = session;
106
  }
107
 
108
  function abort() {
@@ -136,8 +105,9 @@
136
  conversation.messages = [...conversation.messages, streamingMessage];
137
  addStreamingMessage = false;
138
  }
139
- session = session;
140
- generationStats[conversationIdx].generatedTokensCount += 1;
 
141
  }
142
  },
143
  abortController
@@ -151,12 +121,14 @@
151
  // check if the user did not abort the request
152
  if (waitForNonStreaming) {
153
  conversation.messages = [...conversation.messages, newMessage];
154
- generationStats[conversationIdx].generatedTokensCount += newTokensCount;
 
155
  }
156
  }
157
 
158
  const endTime = performance.now();
159
- generationStats[conversationIdx].latency = Math.round(endTime - startTime);
 
160
  }
161
 
162
  async function submit() {
@@ -165,10 +137,10 @@
165
  return;
166
  }
167
 
168
- for (const [idx, conversation] of session.conversations.entries()) {
169
  if (conversation.messages.at(-1)?.role === "assistant") {
170
  let prefix = "";
171
- if (session.conversations.length === 2) {
172
  prefix = `Error on ${idx === 0 ? "left" : "right"} conversation. `;
173
  }
174
  return alert(`${prefix}Messages must alternate between user/assistant roles.`);
@@ -179,15 +151,15 @@
179
  loading = true;
180
 
181
  try {
182
- const promises = session.conversations.map((conversation, idx) => runInference(conversation, idx));
183
  await Promise.all(promises);
184
  } catch (error) {
185
- for (const conversation of session.conversations) {
186
  if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
187
  conversation.messages.pop();
188
  conversation.messages = [...conversation.messages];
189
  }
190
- session = session;
191
  }
192
  if (error instanceof Error) {
193
  if (error.message.includes("token seems invalid")) {
@@ -226,16 +198,16 @@
226
 
227
  function addCompareModel(modelId: ModelEntryWithTokenizer["id"]) {
228
  const model = $models.find(m => m.id === modelId);
229
- if (!model || session.conversations.length === 2) {
230
  return;
231
  }
232
- const newConversation = { ...JSON.parse(JSON.stringify(session.conversations[0])), model };
233
- session.conversations = [...session.conversations, newConversation];
234
  generationStats = [generationStats[0], { latency: 0, generatedTokensCount: 0 }];
235
 
236
  // update query param
237
  const url = new URL($page.url);
238
- const queryParamValue = `${session.conversations[0].model.id},${modelId}`;
239
  url.searchParams.set("modelId", queryParamValue);
240
 
241
  const parentOrigin = "https://huggingface.co";
@@ -244,8 +216,8 @@
244
  }
245
 
246
  function removeCompareModal(conversationIdx: number) {
247
- session.conversations.splice(conversationIdx, 1)[0];
248
- session = session;
249
  generationStats.splice(conversationIdx, 1)[0];
250
  generationStats = generationStats;
251
 
@@ -297,12 +269,12 @@
297
  placeholder={systemPromptSupported
298
  ? "Enter a custom prompt"
299
  : "System prompt is not supported with the chosen model."}
300
- value={systemPromptSupported ? session.conversations[0].systemMessage.content : ""}
301
  on:input={e => {
302
- for (const conversation of session.conversations) {
303
  conversation.systemMessage.content = e.currentTarget.value;
304
  }
305
- session = session;
306
  }}
307
  class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent px-3 pt-10 text-sm outline-hidden"
308
  ></textarea>
@@ -312,7 +284,7 @@
312
  <div
313
  class="flex h-[calc(100dvh-5rem-120px)] divide-x divide-gray-200 overflow-x-auto overflow-y-hidden *:w-full max-sm:w-dvw md:h-[calc(100dvh-5rem)] md:pt-3 dark:divide-gray-800"
314
  >
315
- {#each session.conversations as conversation, conversationIdx}
316
  <div class="max-sm:min-w-full">
317
  {#if compareActive}
318
  <PlaygroundConversationHeader
@@ -382,7 +354,7 @@
382
  {#if loading}
383
  <div class="flex flex-none items-center gap-[3px]">
384
  <span class="mr-2">
385
- {#if session.conversations[0].streaming || session.conversations[1]?.streaming}
386
  Stop
387
  {:else}
388
  Cancel
@@ -417,7 +389,7 @@
417
  class="flex flex-1 flex-col gap-6 overflow-y-hidden rounded-xl border border-gray-200/80 bg-white bg-linear-to-b from-white via-white p-3 shadow-xs dark:border-white/5 dark:bg-gray-900 dark:from-gray-800/40 dark:via-gray-800/40"
418
  >
419
  <div class="flex flex-col gap-2">
420
- <ModelSelector bind:conversation={session.conversations[0]} />
421
  <div class="flex items-center gap-2 self-end px-2 text-xs whitespace-nowrap">
422
  <button
423
  class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
@@ -427,7 +399,7 @@
427
  Compare
428
  </button>
429
  <a
430
- href="https://huggingface.co/{session.conversations[0].model.id}"
431
  target="_blank"
432
  class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
433
  >
@@ -439,7 +411,7 @@
439
  </div>
440
  </div>
441
 
442
- <GenerationConfig bind:conversation={session.conversations[0]} />
443
  {#if $token.value}
444
  <button
445
  on:click={token.reset}
@@ -496,7 +468,7 @@
496
 
497
  {#if selectCompareModelOpen}
498
  <ModelSelectorModal
499
- conversation={session.conversations[0]}
500
  on:modelSelected={e => addCompareModel(e.detail)}
501
  on:close={() => (selectCompareModelOpen = false)}
502
  />
 
1
  <script lang="ts">
2
+ import type { Conversation, ConversationMessage, ModelEntryWithTokenizer } from "./types";
3
 
4
  import { page } from "$app/stores";
 
5
  import {
 
6
  handleNonStreamingResponse,
7
  handleStreamingResponse,
8
  isSystemPromptSupported,
 
10
 
11
  import { goto } from "$app/navigation";
12
  import { models } from "$lib/stores/models";
13
+ import { session } from "$lib/stores/session";
14
  import { token } from "$lib/stores/token";
15
  import { isMac } from "$lib/utils/platform";
16
  import { HfInference } from "@huggingface/inference";
 
22
  import IconThrashcan from "../Icons/IconThrashcan.svelte";
23
  import PlaygroundConversation from "./InferencePlaygroundConversation.svelte";
24
  import PlaygroundConversationHeader from "./InferencePlaygroundConversationHeader.svelte";
25
+ import GenerationConfig from "./InferencePlaygroundGenerationConfig.svelte";
26
  import HFTokenModal from "./InferencePlaygroundHFTokenModal.svelte";
27
  import ModelSelector from "./InferencePlaygroundModelSelector.svelte";
28
  import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
29
 
30
  const startMessageUser: ConversationMessage = { role: "user", content: "" };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  let viewCode = false;
33
  let viewSettings = false;
 
40
  latency: number;
41
  generatedTokensCount: number;
42
  }
43
+ let generationStats = $session.conversations.map(_ => ({ latency: 0, generatedTokensCount: 0 })) as
44
  | [GenerationStatistics]
45
  | [GenerationStatistics, GenerationStatistics];
46
 
47
+ $: systemPromptSupported = $session.conversations.some(conversation => isSystemPromptSupported(conversation.model));
48
+ $: compareActive = $session.conversations.length === 2;
49
 
50
  function addMessage(conversationIdx: number) {
51
+ const conversation = $session.conversations[conversationIdx];
52
+ if (!conversation) return;
53
+ const msgs = conversation.messages.slice();
54
  conversation.messages = [
55
+ ...msgs,
56
  {
57
+ role: msgs.at(-1)?.role === "user" ? "assistant" : "user",
58
  content: "",
59
  },
60
  ];
61
+ $session = $session;
62
  }
63
 
64
  function deleteMessage(conversationIdx: number, idx: number) {
65
+ $session.conversations[conversationIdx]?.messages.splice(idx, 1)[0];
66
+ $session = $session;
67
  }
68
 
69
  function reset() {
70
+ $session.conversations.map(conversation => {
71
  conversation.systemMessage.content = "";
72
  conversation.messages = [{ ...startMessageUser }];
73
  });
74
+ // session = session;
75
  }
76
 
77
  function abort() {
 
105
  conversation.messages = [...conversation.messages, streamingMessage];
106
  addStreamingMessage = false;
107
  }
108
+ $session = $session;
109
+ const c = generationStats[conversationIdx];
110
+ if (c) c.generatedTokensCount += 1;
111
  }
112
  },
113
  abortController
 
121
  // check if the user did not abort the request
122
  if (waitForNonStreaming) {
123
  conversation.messages = [...conversation.messages, newMessage];
124
+ const c = generationStats[conversationIdx];
125
+ if (c) c.generatedTokensCount += newTokensCount;
126
  }
127
  }
128
 
129
  const endTime = performance.now();
130
+ const c = generationStats[conversationIdx];
131
+ if (c) c.latency = Math.round(endTime - startTime);
132
  }
133
 
134
  async function submit() {
 
137
  return;
138
  }
139
 
140
+ for (const [idx, conversation] of $session.conversations.entries()) {
141
  if (conversation.messages.at(-1)?.role === "assistant") {
142
  let prefix = "";
143
+ if ($session.conversations.length === 2) {
144
  prefix = `Error on ${idx === 0 ? "left" : "right"} conversation. `;
145
  }
146
  return alert(`${prefix}Messages must alternate between user/assistant roles.`);
 
151
  loading = true;
152
 
153
  try {
154
+ const promises = $session.conversations.map((conversation, idx) => runInference(conversation, idx));
155
  await Promise.all(promises);
156
  } catch (error) {
157
+ for (const conversation of $session.conversations) {
158
  if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
159
  conversation.messages.pop();
160
  conversation.messages = [...conversation.messages];
161
  }
162
+ $session = $session;
163
  }
164
  if (error instanceof Error) {
165
  if (error.message.includes("token seems invalid")) {
 
198
 
199
  function addCompareModel(modelId: ModelEntryWithTokenizer["id"]) {
200
  const model = $models.find(m => m.id === modelId);
201
+ if (!model || $session.conversations.length === 2) {
202
  return;
203
  }
204
+ const newConversation = { ...JSON.parse(JSON.stringify($session.conversations[0])), model };
205
+ $session.conversations = [...$session.conversations, newConversation];
206
  generationStats = [generationStats[0], { latency: 0, generatedTokensCount: 0 }];
207
 
208
  // update query param
209
  const url = new URL($page.url);
210
+ const queryParamValue = `${$session.conversations[0].model.id},${modelId}`;
211
  url.searchParams.set("modelId", queryParamValue);
212
 
213
  const parentOrigin = "https://huggingface.co";
 
216
  }
217
 
218
  function removeCompareModal(conversationIdx: number) {
219
+ $session.conversations.splice(conversationIdx, 1)[0];
220
+ $session = $session;
221
  generationStats.splice(conversationIdx, 1)[0];
222
  generationStats = generationStats;
223
 
 
269
  placeholder={systemPromptSupported
270
  ? "Enter a custom prompt"
271
  : "System prompt is not supported with the chosen model."}
272
+ value={systemPromptSupported ? $session.conversations[0].systemMessage.content : ""}
273
  on:input={e => {
274
+ for (const conversation of $session.conversations) {
275
  conversation.systemMessage.content = e.currentTarget.value;
276
  }
277
+ $session = $session;
278
  }}
279
  class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent px-3 pt-10 text-sm outline-hidden"
280
  ></textarea>
 
284
  <div
285
  class="flex h-[calc(100dvh-5rem-120px)] divide-x divide-gray-200 overflow-x-auto overflow-y-hidden *:w-full max-sm:w-dvw md:h-[calc(100dvh-5rem)] md:pt-3 dark:divide-gray-800"
286
  >
287
+ {#each $session.conversations as conversation, conversationIdx}
288
  <div class="max-sm:min-w-full">
289
  {#if compareActive}
290
  <PlaygroundConversationHeader
 
354
  {#if loading}
355
  <div class="flex flex-none items-center gap-[3px]">
356
  <span class="mr-2">
357
+ {#if $session.conversations[0].streaming || $session.conversations[1]?.streaming}
358
  Stop
359
  {:else}
360
  Cancel
 
389
  class="flex flex-1 flex-col gap-6 overflow-y-hidden rounded-xl border border-gray-200/80 bg-white bg-linear-to-b from-white via-white p-3 shadow-xs dark:border-white/5 dark:bg-gray-900 dark:from-gray-800/40 dark:via-gray-800/40"
390
  >
391
  <div class="flex flex-col gap-2">
392
+ <ModelSelector bind:conversation={$session.conversations[0]} />
393
  <div class="flex items-center gap-2 self-end px-2 text-xs whitespace-nowrap">
394
  <button
395
  class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
 
399
  Compare
400
  </button>
401
  <a
402
+ href="https://huggingface.co/{$session.conversations[0].model.id}"
403
  target="_blank"
404
  class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
405
  >
 
411
  </div>
412
  </div>
413
 
414
+ <GenerationConfig bind:conversation={$session.conversations[0]} />
415
  {#if $token.value}
416
  <button
417
  on:click={token.reset}
 
468
 
469
  {#if selectCompareModelOpen}
470
  <ModelSelectorModal
471
+ conversation={$session.conversations[0]}
472
  on:modelSelected={e => addCompareModel(e.detail)}
473
  on:close={() => (selectCompareModelOpen = false)}
474
  />
src/lib/components/InferencePlayground/InferencePlaygroundCodeSnippets.svelte CHANGED
@@ -76,7 +76,7 @@
76
  const placeholder = [{ role: "user", content: "Tell me a story" }];
77
 
78
  let messages = [...conversation.messages];
79
- if (messages.length === 1 && messages[0].role === "user" && !messages[0].content) {
80
  messages = placeholder;
81
  }
82
 
@@ -468,9 +468,9 @@ print(completion.choices[0].message)`,
468
  </ul>
469
  </div>
470
 
471
- {#if clientSnippetsByLang[selectedLanguage].length > 1}
472
  <div class="flex gap-x-2 px-2 pt-6">
473
- {#each clientSnippetsByLang[selectedLanguage] as { name }, idx}
474
  <button
475
  class="rounded-md px-1.5 py-0.5 leading-tight {idx === selectedClientIdxByLang[selectedLanguage]
476
  ? 'bg-black text-gray-100 dark:bg-gray-600 dark:text-white'
@@ -481,7 +481,7 @@ print(completion.choices[0].message)`,
481
  </div>
482
  {/if}
483
 
484
- {#each clientSnippetsByLang[selectedLanguage] as { snippets }, idx}
485
  {#if idx === selectedClientIdxByLang[selectedLanguage]}
486
  {#each snippets as { label, code, language, needsToken }}
487
  <div class="flex items-center justify-between px-2 pt-6 pb-4">
 
76
  const placeholder = [{ role: "user", content: "Tell me a story" }];
77
 
78
  let messages = [...conversation.messages];
79
+ if (messages.length === 1 && messages[0]?.role === "user" && !messages[0]?.content) {
80
  messages = placeholder;
81
  }
82
 
 
468
  </ul>
469
  </div>
470
 
471
+ {#if clientSnippetsByLang[selectedLanguage]?.length ?? 0 > 1}
472
  <div class="flex gap-x-2 px-2 pt-6">
473
+ {#each clientSnippetsByLang[selectedLanguage] ?? [] as { name }, idx}
474
  <button
475
  class="rounded-md px-1.5 py-0.5 leading-tight {idx === selectedClientIdxByLang[selectedLanguage]
476
  ? 'bg-black text-gray-100 dark:bg-gray-600 dark:text-white'
 
481
  </div>
482
  {/if}
483
 
484
+ {#each clientSnippetsByLang[selectedLanguage] ?? [] as { snippets }, idx}
485
  {#if idx === selectedClientIdxByLang[selectedLanguage]}
486
  {#each snippets as { label, code, language, needsToken }}
487
  <div class="flex items-center justify-between px-2 pt-6 pb-4">
src/lib/components/InferencePlayground/InferencePlaygroundConversationHeader.svelte CHANGED
@@ -41,7 +41,7 @@
41
  }
42
  }
43
 
44
- $: [nameSpace] = conversation.model.id.split("/");
45
  </script>
46
 
47
  {#if modelSelectorOpen}
 
41
  }
42
  }
43
 
44
+ $: nameSpace = conversation.model.id.split("/")[0] ?? "";
45
  </script>
46
 
47
  {#if modelSelectorOpen}
src/lib/components/InferencePlayground/InferencePlaygroundGenerationConfig.svelte CHANGED
@@ -1,70 +1,12 @@
1
- <script context="module" lang="ts">
2
- export const defaultSystemMessage: { [key: string]: string } = {
3
- "Qwen/QwQ-32B-Preview":
4
- "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
5
- } as const;
6
- </script>
7
-
8
  <script lang="ts">
9
  import type { Conversation } from "$lib/components/InferencePlayground/types";
10
 
11
  import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generationConfigSettings";
 
12
 
13
  export let conversation: Conversation;
14
  export let classNames = "";
15
 
16
- const customMaxTokens: { [key: string]: number } = {
17
- "01-ai/Yi-1.5-34B-Chat": 2048,
18
- "HuggingFaceM4/idefics-9b-instruct": 2048,
19
- "deepseek-ai/DeepSeek-Coder-V2-Instruct": 16384,
20
- "bigcode/starcoder": 8192,
21
- "bigcode/starcoderplus": 8192,
22
- "HuggingFaceH4/starcoderbase-finetuned-oasst1": 8192,
23
- "google/gemma-7b": 8192,
24
- "google/gemma-1.1-7b-it": 8192,
25
- "google/gemma-2b": 8192,
26
- "google/gemma-1.1-2b-it": 8192,
27
- "google/gemma-2-27b-it": 8192,
28
- "google/gemma-2-9b-it": 4096,
29
- "google/gemma-2-2b-it": 8192,
30
- "tiiuae/falcon-7b": 8192,
31
- "tiiuae/falcon-7b-instruct": 8192,
32
- "timdettmers/guanaco-33b-merged": 2048,
33
- "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
34
- "Qwen/Qwen2.5-72B-Instruct": 32768,
35
- "Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
36
- "meta-llama/Meta-Llama-3-70B-Instruct": 8192,
37
- "CohereForAI/c4ai-command-r-plus-08-2024": 32768,
38
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
39
- "meta-llama/Llama-2-70b-chat-hf": 8192,
40
- "HuggingFaceH4/zephyr-7b-alpha": 17432,
41
- "HuggingFaceH4/zephyr-7b-beta": 32768,
42
- "mistralai/Mistral-7B-Instruct-v0.1": 32768,
43
- "mistralai/Mistral-7B-Instruct-v0.2": 32768,
44
- "mistralai/Mistral-7B-Instruct-v0.3": 32768,
45
- "mistralai/Mistral-Nemo-Instruct-2407": 32768,
46
- "meta-llama/Meta-Llama-3-8B-Instruct": 8192,
47
- "mistralai/Mistral-7B-v0.1": 32768,
48
- "bigcode/starcoder2-3b": 16384,
49
- "bigcode/starcoder2-15b": 16384,
50
- "HuggingFaceH4/starchat2-15b-v0.1": 16384,
51
- "codellama/CodeLlama-7b-hf": 8192,
52
- "codellama/CodeLlama-13b-hf": 8192,
53
- "codellama/CodeLlama-34b-Instruct-hf": 8192,
54
- "meta-llama/Llama-2-7b-chat-hf": 8192,
55
- "meta-llama/Llama-2-13b-chat-hf": 8192,
56
- "OpenAssistant/oasst-sft-6-llama-30b": 2048,
57
- "TheBloke/vicuna-7B-v1.5-GPTQ": 2048,
58
- "HuggingFaceH4/starchat-beta": 8192,
59
- "bigcode/octocoder": 8192,
60
- "vwxyzjn/starcoderbase-triviaqa": 8192,
61
- "lvwerra/starcoderbase-gsm8k": 8192,
62
- "NousResearch/Hermes-3-Llama-3.1-8B": 16384,
63
- "microsoft/Phi-3.5-mini-instruct": 32768,
64
- "meta-llama/Llama-3.1-70B-Instruct": 32768,
65
- "meta-llama/Llama-3.1-8B-Instruct": 8192,
66
- } as const;
67
-
68
  $: modelMaxLength = customMaxTokens[conversation.model.id] ?? conversation.model.tokenizerConfig.model_max_length;
69
  $: maxTokens = Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000);
70
  </script>
 
 
 
 
 
 
 
 
1
  <script lang="ts">
2
  import type { Conversation } from "$lib/components/InferencePlayground/types";
3
 
4
  import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generationConfigSettings";
5
+ import { customMaxTokens } from "./inferencePlaygroundUtils";
6
 
7
  export let conversation: Conversation;
8
  export let classNames = "";
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  $: modelMaxLength = customMaxTokens[conversation.model.id] ?? conversation.model.tokenizerConfig.model_max_length;
11
  $: maxTokens = Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000);
12
  </script>
src/lib/components/InferencePlayground/InferencePlaygroundModelSelector.svelte CHANGED
@@ -4,12 +4,13 @@
4
  import { goto } from "$app/navigation";
5
  import { page } from "$app/stores";
6
 
 
 
 
 
7
  import IconCaret from "../Icons/IconCaret.svelte";
8
  import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
9
- import Avatar from "../Avatar.svelte";
10
- import { defaultSystemMessage } from "./InferencePlaygroundGenerationConfig.svelte";
11
- import { models } from "$lib/stores/models";
12
- import { fetchHuggingFaceModel, type Provider } from "$lib/fetchers/providers";
13
 
14
  export let conversation: Conversation;
15
 
@@ -32,12 +33,16 @@
32
  goto(url.toString(), { replaceState: true });
33
  }
34
 
35
- $: [nameSpace, modelName] = conversation.model.id.split("/");
 
36
 
37
  async function loadProviders(modelId: string) {
38
- const providers = await fetchHuggingFaceModel;
 
 
39
  }
40
- let providers: Provider[] = [];
 
41
 
42
  const id = crypto.randomUUID();
43
  </script>
 
4
  import { goto } from "$app/navigation";
5
  import { page } from "$app/stores";
6
 
7
+ import { fetchHuggingFaceModel, type InferenceProviderMapping } from "$lib/fetchers/providers";
8
+ import { models } from "$lib/stores/models";
9
+ import { token } from "$lib/stores/token";
10
+ import Avatar from "../Avatar.svelte";
11
  import IconCaret from "../Icons/IconCaret.svelte";
12
  import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
13
+ import { defaultSystemMessage } from "./inferencePlaygroundUtils";
 
 
 
14
 
15
  export let conversation: Conversation;
16
 
 
33
  goto(url.toString(), { replaceState: true });
34
  }
35
 
36
+ $: nameSpace = conversation.model.id.split("/")[0] ?? "";
37
+ $: modelName = conversation.model.id.split("/")[1] ?? "";
38
 
39
  async function loadProviders(modelId: string) {
40
+ providerMap = {};
41
+ const res = await fetchHuggingFaceModel(modelId, $token.value);
42
+ providerMap = res.inferenceProviderMapping;
43
  }
44
+ let providerMap: InferenceProviderMapping = {};
45
+ // $: loadProviders(conversation.model.id);
46
 
47
  const id = crypto.randomUUID();
48
  </script>
src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts CHANGED
@@ -55,7 +55,7 @@ export async function handleNonStreamingResponse(
55
  }
56
 
57
  export function isSystemPromptSupported(model: ModelEntryWithTokenizer) {
58
- return model.tokenizerConfig?.chat_template?.includes("system");
59
  }
60
 
61
  export const FEATURED_MODELS_IDS = [
@@ -65,3 +65,60 @@ export const FEATURED_MODELS_IDS = [
65
  "Qwen/Qwen2.5-72B-Instruct",
66
  "Qwen/QwQ-32B-Preview",
67
  ];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
 
57
  export function isSystemPromptSupported(model: ModelEntryWithTokenizer) {
58
+ return model?.tokenizerConfig?.chat_template?.includes("system");
59
  }
60
 
61
  export const FEATURED_MODELS_IDS = [
 
65
  "Qwen/Qwen2.5-72B-Instruct",
66
  "Qwen/QwQ-32B-Preview",
67
  ];
68
+
69
+ export const defaultSystemMessage: { [key: string]: string } = {
70
+ "Qwen/QwQ-32B-Preview":
71
+ "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
72
+ } as const;
73
+
74
+ export const customMaxTokens: { [key: string]: number } = {
75
+ "01-ai/Yi-1.5-34B-Chat": 2048,
76
+ "HuggingFaceM4/idefics-9b-instruct": 2048,
77
+ "deepseek-ai/DeepSeek-Coder-V2-Instruct": 16384,
78
+ "bigcode/starcoder": 8192,
79
+ "bigcode/starcoderplus": 8192,
80
+ "HuggingFaceH4/starcoderbase-finetuned-oasst1": 8192,
81
+ "google/gemma-7b": 8192,
82
+ "google/gemma-1.1-7b-it": 8192,
83
+ "google/gemma-2b": 8192,
84
+ "google/gemma-1.1-2b-it": 8192,
85
+ "google/gemma-2-27b-it": 8192,
86
+ "google/gemma-2-9b-it": 4096,
87
+ "google/gemma-2-2b-it": 8192,
88
+ "tiiuae/falcon-7b": 8192,
89
+ "tiiuae/falcon-7b-instruct": 8192,
90
+ "timdettmers/guanaco-33b-merged": 2048,
91
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
92
+ "Qwen/Qwen2.5-72B-Instruct": 32768,
93
+ "Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
94
+ "meta-llama/Meta-Llama-3-70B-Instruct": 8192,
95
+ "CohereForAI/c4ai-command-r-plus-08-2024": 32768,
96
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
97
+ "meta-llama/Llama-2-70b-chat-hf": 8192,
98
+ "HuggingFaceH4/zephyr-7b-alpha": 17432,
99
+ "HuggingFaceH4/zephyr-7b-beta": 32768,
100
+ "mistralai/Mistral-7B-Instruct-v0.1": 32768,
101
+ "mistralai/Mistral-7B-Instruct-v0.2": 32768,
102
+ "mistralai/Mistral-7B-Instruct-v0.3": 32768,
103
+ "mistralai/Mistral-Nemo-Instruct-2407": 32768,
104
+ "meta-llama/Meta-Llama-3-8B-Instruct": 8192,
105
+ "mistralai/Mistral-7B-v0.1": 32768,
106
+ "bigcode/starcoder2-3b": 16384,
107
+ "bigcode/starcoder2-15b": 16384,
108
+ "HuggingFaceH4/starchat2-15b-v0.1": 16384,
109
+ "codellama/CodeLlama-7b-hf": 8192,
110
+ "codellama/CodeLlama-13b-hf": 8192,
111
+ "codellama/CodeLlama-34b-Instruct-hf": 8192,
112
+ "meta-llama/Llama-2-7b-chat-hf": 8192,
113
+ "meta-llama/Llama-2-13b-chat-hf": 8192,
114
+ "OpenAssistant/oasst-sft-6-llama-30b": 2048,
115
+ "TheBloke/vicuna-7B-v1.5-GPTQ": 2048,
116
+ "HuggingFaceH4/starchat-beta": 8192,
117
+ "bigcode/octocoder": 8192,
118
+ "vwxyzjn/starcoderbase-triviaqa": 8192,
119
+ "lvwerra/starcoderbase-gsm8k": 8192,
120
+ "NousResearch/Hermes-3-Llama-3.1-8B": 16384,
121
+ "microsoft/Phi-3.5-mini-instruct": 32768,
122
+ "meta-llama/Llama-3.1-70B-Instruct": 32768,
123
+ "meta-llama/Llama-3.1-8B-Instruct": 8192,
124
+ } as const;
src/lib/stores/models.ts CHANGED
@@ -1,5 +1,8 @@
1
- import { page } from "$app/stores";
2
  import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
 
3
  import { derived } from "svelte/store";
4
 
5
- export const models = derived(page, $page => $page.data.models as ModelEntryWithTokenizer[]);
 
 
 
 
 
1
  import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
2
+ import { safePage } from "$lib/utils/store";
3
  import { derived } from "svelte/store";
4
 
5
+ export const models = derived(safePage, $page => {
6
+ const res: ModelEntryWithTokenizer[] = $page?.data?.models ?? [];
7
+ return res;
8
+ });
src/lib/stores/session.ts ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Conversation, Session } from "$lib/components/InferencePlayground/types";
2
+ import { defaultGenerationConfig } from "$lib/components/InferencePlayground/generationConfigSettings";
3
+ import {
4
+ defaultSystemMessage,
5
+ FEATURED_MODELS_IDS,
6
+ } from "$lib/components/InferencePlayground/inferencePlaygroundUtils";
7
+
8
+ import { models } from "$lib/stores/models";
9
+ import { get, writable } from "svelte/store";
10
+ import type { ChatCompletionInputMessage } from "@huggingface/tasks";
11
+ import { partialSet, safePage } from "$lib/utils/store";
12
+
13
+ export function createSessionStore() {
14
+ const startMessageUser: ChatCompletionInputMessage = { role: "user", content: "" };
15
+ const modelIdsFromQueryParam = get(safePage)?.url?.searchParams?.get("modelId")?.split(",");
16
+ const modelsFromQueryParam = modelIdsFromQueryParam?.map(id => get(models).find(model => model.id === id));
17
+ const systemMessage: ChatCompletionInputMessage = {
18
+ role: "system",
19
+ content: modelIdsFromQueryParam?.[0] ? (defaultSystemMessage?.[modelIdsFromQueryParam[0]] ?? "") : "",
20
+ };
21
+
22
+ const store = writable<Session>({
23
+ conversations: [
24
+ {
25
+ model: get(models).find(m => FEATURED_MODELS_IDS.includes(m.id)) ??
26
+ get(models)[0] ?? {
27
+ id: "",
28
+ downloads: 0,
29
+ gated: false,
30
+ likes: 0,
31
+ name: "",
32
+ private: false,
33
+ tokenizerConfig: {},
34
+ updatedAt: new Date(),
35
+ },
36
+ config: { ...defaultGenerationConfig },
37
+ messages: [{ ...startMessageUser }],
38
+ systemMessage,
39
+ streaming: true,
40
+ },
41
+ ],
42
+ });
43
+
44
+ if (modelsFromQueryParam?.length) {
45
+ const conversations = modelsFromQueryParam.map(model => {
46
+ return {
47
+ model,
48
+ config: { ...defaultGenerationConfig },
49
+ messages: [{ ...startMessageUser }],
50
+ systemMessage,
51
+ streaming: true,
52
+ };
53
+ }) as [Conversation] | [Conversation, Conversation];
54
+ partialSet(store, { conversations });
55
+ }
56
+
57
+ return store;
58
+ }
59
+
60
+ export const session = createSessionStore();
src/lib/utils/store.ts ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import { browser } from "$app/environment";
2
+ import { page } from "$app/stores";
3
+ import { readable, type Writable } from "svelte/store";
4
+
5
+ export function partialSet<T extends Record<string, unknown>>(store: Writable<T>, partial: Partial<T>) {
6
+ store.update(s => ({ ...s, ...partial }));
7
+ }
8
+
9
+ export const safePage = browser ? page : readable(undefined);
src/routes/{+page.server.ts → +layout.server.ts} RENAMED
@@ -1,9 +1,9 @@
1
  import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
2
  import type { ModelEntry } from "@huggingface/hub";
3
- import type { PageServerLoad } from "./$types";
4
  import { env } from "$env/dynamic/private";
5
 
6
- export const load: PageServerLoad = async ({ fetch }) => {
7
  const apiUrl =
8
  "https://huggingface.co/api/models?pipeline_tag=text-generation&inference_provider=hf-inference&filter=conversational";
9
  const HF_TOKEN = env.HF_TOKEN;
@@ -18,7 +18,6 @@ export const load: PageServerLoad = async ({ fetch }) => {
18
  return { models: [] };
19
  }
20
  const compatibleModels: ModelEntry[] = await res.json();
21
- console.log(compatibleModels);
22
  compatibleModels.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()));
23
 
24
  const promises = compatibleModels.map(async model => {
 
1
  import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
2
  import type { ModelEntry } from "@huggingface/hub";
3
+ import type { LayoutServerLoad } from "./$types";
4
  import { env } from "$env/dynamic/private";
5
 
6
+ export const load: LayoutServerLoad = async ({ fetch }) => {
7
  const apiUrl =
8
  "https://huggingface.co/api/models?pipeline_tag=text-generation&inference_provider=hf-inference&filter=conversational";
9
  const HF_TOKEN = env.HF_TOKEN;
 
18
  return { models: [] };
19
  }
20
  const compatibleModels: ModelEntry[] = await res.json();
 
21
  compatibleModels.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()));
22
 
23
  const promises = compatibleModels.map(async model => {
tsconfig.json CHANGED
@@ -9,7 +9,8 @@
9
  "skipLibCheck": true,
10
  "sourceMap": true,
11
  "strict": true,
12
- "target": "ES2018"
 
13
  },
14
  "exclude": ["vite.config.ts"]
15
  // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
 
9
  "skipLibCheck": true,
10
  "sourceMap": true,
11
  "strict": true,
12
+ "target": "ES2018",
13
+ "noUncheckedIndexedAccess": true
14
  },
15
  "exclude": ["vite.config.ts"]
16
  // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias