| 49001 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-60step |
HuggingFace |
1.30 |
unrated |
| 49002 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-75step |
HuggingFace |
1.30 |
unrated |
| 49003 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-90step |
HuggingFace |
1.30 |
unrated |
| 49004 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-105step |
HuggingFace |
1.30 |
unrated |
| 49005 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-120step |
HuggingFace |
1.30 |
unrated |
| 49006 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-135step |
HuggingFace |
1.30 |
unrated |
| 49007 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-150step |
HuggingFace |
1.30 |
unrated |
| 49008 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-15step |
HuggingFace |
1.30 |
unrated |
| 49009 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-30step |
HuggingFace |
1.30 |
unrated |
| 49010 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-45step |
HuggingFace |
1.30 |
unrated |
| 49011 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-60step |
HuggingFace |
1.30 |
unrated |
| 49012 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-75step |
HuggingFace |
1.30 |
unrated |
| 49013 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-think-90step |
HuggingFace |
1.30 |
unrated |
| 49014 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-nothink-105step |
HuggingFace |
1.30 |
unrated |
| 49015 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-nothink-120step |
HuggingFace |
1.30 |
unrated |
| 49016 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-nothink-135step |
HuggingFace |
1.30 |
unrated |
| 49017 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-nothink-150step |
HuggingFace |
1.30 |
unrated |
| 49018 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-105step |
HuggingFace |
1.30 |
unrated |
| 49019 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-120step |
HuggingFace |
1.30 |
unrated |
| 49020 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-135step |
HuggingFace |
1.30 |
unrated |