| 48981 |
aug_verl_agent_webshop-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info50-60step |
HuggingFace |
1.30 |
unrated |
| 48982 |
aug_verl_agent_webshop-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info50-75step |
HuggingFace |
1.30 |
unrated |
| 48983 |
aug_verl_agent_webshop-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info50-90step |
HuggingFace |
1.30 |
unrated |
| 48984 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-105step |
HuggingFace |
1.30 |
unrated |
| 48985 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-120step |
HuggingFace |
1.30 |
unrated |
| 48986 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-135step |
HuggingFace |
1.30 |
unrated |
| 48987 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-150step |
HuggingFace |
1.30 |
unrated |
| 48988 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-15step |
HuggingFace |
1.30 |
unrated |
| 48989 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-30step |
HuggingFace |
1.30 |
unrated |
| 48990 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-45step |
HuggingFace |
1.30 |
unrated |
| 48991 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-60step |
HuggingFace |
1.30 |
unrated |
| 48992 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-75step |
HuggingFace |
1.30 |
unrated |
| 48993 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-nothink-90step |
HuggingFace |
1.30 |
unrated |
| 48994 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-105step |
HuggingFace |
1.30 |
unrated |
| 48995 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-120step |
HuggingFace |
1.30 |
unrated |
| 48996 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-135step |
HuggingFace |
1.30 |
unrated |
| 48997 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-150step |
HuggingFace |
1.30 |
unrated |
| 48998 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-15step |
HuggingFace |
1.30 |
unrated |
| 48999 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-30step |
HuggingFace |
1.30 |
unrated |
| 49000 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Llama-3.1-8B-Instruct-only16-nothink-45step |
HuggingFace |
1.30 |
unrated |