Update README.md
Browse files
README.md
CHANGED
@@ -28,15 +28,22 @@ GOVERNING TERMS: Use of this model is governed by the [NVIDIA Open Model License
|
|
28 |
|
29 |
## Arena Hard LeaderBoard
|
30 |
|
31 |
-
As of 18 Mar 2025, augmenting models with the Feedback-Edit Inference Time Scaling (ITS) approach
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
| Model | Arena Hard (95% CI) |
|
34 |
|:-----------------------------|:----------------|
|
35 |
-
| Llama-3.3-Nemotron-49B-
|
36 |
| Llama-3.1-Nemotron-70B-Instruct + **Feedback-Edit ITS** | 92.7 (-1.2, 0.9) |
|
37 |
| o1-mini-2024-09-12 | 92.0 (-1.2, 1.0) |
|
38 |
| o1-preview-2024-09-12 | 90.4 (-1.1, 1.3) |
|
39 |
-
| Llama-3.3-Nemotron-49B-
|
40 |
| claude-3-5-sonnet-20241022 | 85.2 (-1.4, 1.6) |
|
41 |
| Llama-3.1-Nemotron-70B-Instruct | 84.9 (-1.7, 1.8) |
|
42 |
|
@@ -102,7 +109,7 @@ This code has been tested on Transformers v4.45.0, torch v2.3.0a0+40ec155e58.nv2
|
|
102 |
import torch
|
103 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
104 |
|
105 |
-
model_name = "nvidia/Llama-3.3-Nemotron-70B-Select
|
106 |
|
107 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
|
108 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
28 |
|
29 |
## Arena Hard LeaderBoard
|
30 |
|
31 |
+
As of 18 Mar 2025, augmenting models with the Feedback-Edit Inference Time Scaling (ITS) approach leads to the highest performance on Arena Hard.
|
32 |
+
|
33 |
+
The Feedback-Edit Inference Time Scaling system comprise of the following models:
|
34 |
+
|
35 |
+
1. [Llama-3.3-Nemotron-70B-Feedback](https://huggingface.co/nvidia/Llama-3.3-Nemotron-70B-Feedback)
|
36 |
+
2. [Llama-3.3-Nemotron-70B-Edit](https://huggingface.co/nvidia/Llama-3.3-Nemotron-70B-Edit)
|
37 |
+
3. [Llama-3.3-Nemotron-70B-Select](https://huggingface.co/nvidia/Llama-3.3-Nemotron-70B-Select)
|
38 |
+
|
39 |
|
40 |
| Model | Arena Hard (95% CI) |
|
41 |
|:-----------------------------|:----------------|
|
42 |
+
| Llama-3.3-Nemotron-Super-49B-v1 + **Feedback-Edit ITS** | **93.4 (-1.1, 1.0)** |
|
43 |
| Llama-3.1-Nemotron-70B-Instruct + **Feedback-Edit ITS** | 92.7 (-1.2, 0.9) |
|
44 |
| o1-mini-2024-09-12 | 92.0 (-1.2, 1.0) |
|
45 |
| o1-preview-2024-09-12 | 90.4 (-1.1, 1.3) |
|
46 |
+
| Llama-3.3-Nemotron-Super-49B-v1 | 88.3 (-1.6, 1.6) |
|
47 |
| claude-3-5-sonnet-20241022 | 85.2 (-1.4, 1.6) |
|
48 |
| Llama-3.1-Nemotron-70B-Instruct | 84.9 (-1.7, 1.8) |
|
49 |
|
|
|
109 |
import torch
|
110 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
111 |
|
112 |
+
model_name = "nvidia/Llama-3.3-Nemotron-70B-Select"
|
113 |
|
114 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
|
115 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|