| 49101 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-90step |
HuggingFace |
1.30 |
unrated |
| 49102 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-105step |
HuggingFace |
1.30 |
unrated |
| 49103 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-120step |
HuggingFace |
1.30 |
unrated |
| 49104 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-135step |
HuggingFace |
1.30 |
unrated |
| 49105 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-150step |
HuggingFace |
1.30 |
unrated |
| 49106 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-15step |
HuggingFace |
1.30 |
unrated |
| 49107 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-30step |
HuggingFace |
1.30 |
unrated |
| 49108 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-45step |
HuggingFace |
1.30 |
unrated |
| 49109 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-60step |
HuggingFace |
1.30 |
unrated |
| 49110 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-75step |
HuggingFace |
1.30 |
unrated |
| 49111 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-90step |
HuggingFace |
1.30 |
unrated |
| 49112 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-105step |
HuggingFace |
1.30 |
unrated |
| 49113 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-120step |
HuggingFace |
1.30 |
unrated |
| 49114 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-135step |
HuggingFace |
1.30 |
unrated |
| 49115 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-150step |
HuggingFace |
1.30 |
unrated |
| 49116 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-15step |
HuggingFace |
1.30 |
unrated |
| 49117 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-30step |
HuggingFace |
1.30 |
unrated |
| 49118 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-45step |
HuggingFace |
1.30 |
unrated |
| 49119 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-60step |
HuggingFace |
1.30 |
unrated |
| 49120 |
verl_agent_webshop-new-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-o16-t-75step |
HuggingFace |
1.30 |
unrated |