| 49141 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-o16-t-150step |
HuggingFace |
1.30 |
unrated |
| 49142 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-o16-t-15step |
HuggingFace |
1.30 |
unrated |
| 49143 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-o16-t-30step |
HuggingFace |
1.30 |
unrated |
| 49144 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-o16-t-45step |
HuggingFace |
1.30 |
unrated |
| 49145 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-o16-t-60step |
HuggingFace |
1.30 |
unrated |
| 49146 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-o16-t-75step |
HuggingFace |
1.30 |
unrated |
| 49147 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-o16-t-90step |
HuggingFace |
1.30 |
unrated |
| 49148 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-105step |
HuggingFace |
1.30 |
unrated |
| 49149 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-120step |
HuggingFace |
1.30 |
unrated |
| 49150 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-135step |
HuggingFace |
1.30 |
unrated |
| 49151 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-150step |
HuggingFace |
1.30 |
unrated |
| 49152 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-15step |
HuggingFace |
1.30 |
unrated |
| 49153 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-30step |
HuggingFace |
1.30 |
unrated |
| 49154 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-45step |
HuggingFace |
1.30 |
unrated |
| 49155 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-60step |
HuggingFace |
1.30 |
unrated |
| 49156 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-75step |
HuggingFace |
1.30 |
unrated |
| 49157 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-f-90step |
HuggingFace |
1.30 |
unrated |
| 49158 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-105step |
HuggingFace |
1.30 |
unrated |
| 49159 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-120step |
HuggingFace |
1.30 |
unrated |
| 49160 |
verl_agent_webshop-new-GRPO-kl0.01-from-sft-step-Llama-3.2-3B-Instruct-old_repo-135step |
HuggingFace |
1.30 |
unrated |