| 49161 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-150step |
HuggingFace |
1.30 |
unrated |
| 49162 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-15step |
HuggingFace |
1.30 |
unrated |
| 49163 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-30step |
HuggingFace |
1.30 |
unrated |
| 49164 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-45step |
HuggingFace |
1.30 |
unrated |
| 49165 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-60step |
HuggingFace |
1.30 |
unrated |
| 49166 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-75step |
HuggingFace |
1.30 |
unrated |
| 49167 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-90step |
HuggingFace |
1.30 |
unrated |
| 49168 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-105step |
HuggingFace |
1.30 |
unrated |
| 49169 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-120step |
HuggingFace |
1.30 |
unrated |
| 49170 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-135step |
HuggingFace |
1.30 |
unrated |
| 49171 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-150step |
HuggingFace |
1.30 |
unrated |
| 49172 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-15step |
HuggingFace |
1.30 |
unrated |
| 49173 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-30step |
HuggingFace |
1.30 |
unrated |
| 49174 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-45step |
HuggingFace |
1.30 |
unrated |
| 49175 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-60step |
HuggingFace |
1.30 |
unrated |
| 49176 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-75step |
HuggingFace |
1.30 |
unrated |
| 49177 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-90step |
HuggingFace |
1.30 |
unrated |
| 49178 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Qwen2.5-7B-Instruct-105step |
HuggingFace |
1.30 |
unrated |
| 49179 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Qwen2.5-7B-Instruct-120step |
HuggingFace |
1.30 |
unrated |
| 49180 |
verl_agent_webshop-new-GRPO-kl-0.01-from-webshop-20step-v2-Qwen2.5-7B-Instruct-135step |
HuggingFace |
1.30 |
unrated |