| 49181 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c1-135step |
HuggingFace |
1.30 |
unrated |
| 49182 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c1-150step |
HuggingFace |
1.30 |
unrated |
| 49183 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-105step |
HuggingFace |
1.30 |
unrated |
| 49184 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-120step |
HuggingFace |
1.30 |
unrated |
| 49185 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-135step |
HuggingFace |
1.30 |
unrated |
| 49186 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-150step |
HuggingFace |
1.30 |
unrated |
| 49187 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-15step |
HuggingFace |
1.30 |
unrated |
| 49188 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-30step |
HuggingFace |
1.30 |
unrated |
| 49189 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-45step |
HuggingFace |
1.30 |
unrated |
| 49190 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-60step |
HuggingFace |
1.30 |
unrated |
| 49191 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-75step |
HuggingFace |
1.30 |
unrated |
| 49192 |
verl_agent_webshop-new-GRPO-kl-0.01-Llama-3.1-8B-Instruct-webshop-15step-c2-only_16-90step |
HuggingFace |
1.30 |
unrated |
| 49193 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-105step |
HuggingFace |
1.30 |
unrated |
| 49194 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-120step |
HuggingFace |
1.30 |
unrated |
| 49195 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-135step |
HuggingFace |
1.30 |
unrated |
| 49196 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-150step |
HuggingFace |
1.30 |
unrated |
| 49197 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-15step |
HuggingFace |
1.30 |
unrated |
| 49198 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-30step |
HuggingFace |
1.30 |
unrated |
| 49199 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-45step |
HuggingFace |
1.30 |
unrated |
| 49200 |
verl_agent_webshop-new-GRPO-kl-0.01-Qwen2.5-7B-Instruct-webshop-15step-c2-only_16-60step |
HuggingFace |
1.30 |
unrated |