Commit
·
a326644
1
Parent(s):
39926d0
Add data
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_2.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_3.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_0.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_3.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_0.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_1.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_2.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_3.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_4.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_5.json +1 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_0.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_1.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_2.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_3.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_4.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_5.jsonl +3 -0
- 2b855b4bc4seed1/evaluation/generation/slim.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json +133 -0
- 2b855b4bc4seed1/evaluation/generation/slim.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.39855658833240104, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.026569783418037774}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0681383451903841, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017459951545863855}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2868586326665418, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004824268129831654}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10116791036204026, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002146154058076065}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03126302205288469, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009850036254092948}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.13556136849826403, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0032166746311611994}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.047349099388232035, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013284747595520357}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06437528461062834, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015957361187538223}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2753959775705074, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00466857019137397}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09604330456797407, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019713714851164322}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06439853493257244, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016465025871715334}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.27133410145709913, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004513671060323518}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09560406363234744, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020066099973174915}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.3912276412395352, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.020484250080896618}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06518931479471321, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014967694916187414}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.33171280095627653, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00514509486146305}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.1006872191190483, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001920986262572305}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02909822469430401, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008998133580491436}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1502346730077987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0033848523651528895}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04487032485588369, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011904326422867075}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06065875807260051, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013716176596965028}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3059288650916899, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004623721258942203}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09361543338651233, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017548257333492123}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06183903916740186, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014278699660441308}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3121661527895801, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004757283098586568}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09537017930398344, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001823579905497483}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.3943262562299206, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.018261926096308043}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06568090565271803, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014593667843626683}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.33176044666345866, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0049760255757141485}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10142763892550687, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018695705948765919}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.030064238556505973, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000878026151022135}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.15674156008610293, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034371591659074687}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04634376242269252, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011703869000957733}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06073340922239521, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013391382988114195}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.30301799282679526, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004405838361403633}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09375576998957523, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00171329522247045}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06222811774824065, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001389419033401118}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3122289734770347, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004608484270680722}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.0960005452415318, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017713740893331342}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_3.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.42220231954083376, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02023512696975793}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06294424704272412, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013426038293432201}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.32828293290150784, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005001939985910281}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.09852727869171284, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018203807487138662}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.028779067635461713, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008198408986678626}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.15496999960360197, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034216215513968302}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04503121538891503, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011459084477198273}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05785040573096921, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012115439751522847}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2983337805760926, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004398723198697165}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09052242409400411, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016574959187572178}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0594396242004035, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00126453568792367}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3080884282709691, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004608543541528767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09294906918300895, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001716261400806889}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.4832210304652205, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03147187969017503}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06496573154327573, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014040449532895783}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.34055461469310166, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0050133457039775635}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10154392419352416, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001855573131248702}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.030167551913451496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008561785653855975}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1649962512635775, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003584101763297551}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.047200493334113615, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011802748324519364}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05953514643920731, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012430023753300773}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3091164943758439, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004419039964894518}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09311774371977341, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016746907773452203}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.061471473785615884, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013159018012368767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.32032521201557734, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004674150509866559}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09607749936473489, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017621551957160131}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.45737528578894, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02223362364269596}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06341236195807602, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013025059451882081}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3444381235432288, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005100649283959647}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.09991060215055379, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017580601671848262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.028780682477283995, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007717780565479907}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16550143783892868, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003572409556510635}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04547317077887062, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001083447465046616}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0578485176298394, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011713192716163545}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.311243979779327, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004489723890330991}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09108818436943181, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015903792470751198}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0597173200997438, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012283187843817414}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3220388574311313, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004693107327532801}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09396070402136288, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016577593277067773}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.14511944435822713, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017739222195375815}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.24520002842853666, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024767922959903223}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1694168893882774, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017285872978047574}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.026968037085841123, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006724263803688037}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.04727287142494576, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012840793304952297}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03177170959182661, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007692709280843934}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11308839561178641, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012317612395230065}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.19827948573980483, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020047077764803736}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13367149075100937, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00122579559557816}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.13330979457859984, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016108665517001446}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.226523165276769, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002292830009490771}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.15592593082738226, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015743663299313405}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.3690100977423922, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05886941818468516}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1563220314981603, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019288538487507313}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.26325355614186413, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002686994036630168}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1815429849832861, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018339982664320788}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.030543605372666018, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007754152670159024}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05363235422287617, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014749147033489768}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.035673976193771974, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008626117278733884}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11249260697257467, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013266232430102073}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.19600160136173014, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021032634815257457}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13186989175022087, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012587743127241181}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.14699158833792147, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018030118143166547}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.24830570689715825, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025264893684192524}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.17088095619861712, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017110735494681845}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.7868385249352625, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06643098164400961}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1637259808560354, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002017053083347354}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2675645692770856, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002629826305543504}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.18747800716688756, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001846937162893923}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.03357198574623862, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008471590504777416}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05670287996064233, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014609581024957142}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03838403178488669, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008793468371463138}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.12020443998886943, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014203050912885904}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.20283299498036392, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002086790453159732}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13884152020924362, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012910258507251103}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.15360087717039728, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018828689588982353}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2523571093220897, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025102266090624974}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.17614109076386195, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001727388356327573}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.8963745828853016, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08021880799057668}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_3.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.14301510545517512, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002309076005710306}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.22154076255407457, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003010516998702038}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.15614634812221673, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020703756716238682}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.02998405021751007, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009213928393943472}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.048768948166259624, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014429425871296203}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.032809011988041384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008454225326652912}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.10714061695488505, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017291510979768427}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.16946810310877372, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023670017930872435}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.11710781259448001, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001486425944407185}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.13457044625902037, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0021679948212344493}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.20912294885872634, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002851560713931268}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.14700806948565207, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001940930721840521}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.9646254567397161, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.057075491568922325}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_4.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.04839149402820543, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018154126914704599}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.0763797139827709, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002687114915047913}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.05190882238695101, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017704568053027153}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00961923494025731, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005313682610078681}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.01695713661091346, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001004848518846823}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.010744972904987648, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005635089537785232}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.037150316884056823, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013840484731482293}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.059558806955544184, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021226325282649}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.0397938566579677, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013337234042402404}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.04512978782545739, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001692850076284173}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.07140313383359884, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002513815424881597}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.04847441961045556, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016506599915437933}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.44123714013872695, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03941198298833912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_5.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.008126763474813043, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008143969920360195}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.012724062506916302, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0012549853819996692}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.00845254383401664, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007874363081928194}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.001404947916671783, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0001884209372573544}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0029225050800347897, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00046662061601198307}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0016579937024865626, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00021772158325319047}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0065259191593632915, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006573529238346043}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.010258170625213047, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0010284762210769883}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.006710885785199153, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.000617445059108658}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.0076454293176619215, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007610127888008051}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.012131216596331538, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001202490112618103}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.007972401310488701, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007381135864703859}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 9.820919733542687e-07, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.8968835689959146e-06}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 4.129757228038615, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07014233813245054}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.1549243323143822, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017710465053136317}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.28505359248537765, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030240906814134943}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.1963023731197804, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021177845411018744}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.06094285656359399, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008923016329885139}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.11574793975173864, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001682823395366983}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0781272317594066, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011108678646529282}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.1380786735865104, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001521766772484174}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2560687584672421, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002688080710851207}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.1753767591966602, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018343868630799004}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.13565071522521033, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016036406101874248}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.25048383475986313, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002798617263701007}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.17207329998104542, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019361501401427212}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.299968276953818, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12233321125733444}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.37638385679182335, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002317139736702885}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.42033889127353, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002772945673846739}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.37877480029239974, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019784847616312632}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.14557219664635176, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0016775934171071967}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.16609026065457985, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019605500141973544}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.14751847292773587, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015764623094505865}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2580714671728175, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017766038886166667}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2888777643523076, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021851921933268488}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.25935191152159226, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015444480088272186}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.31216259293253057, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002128295675386538}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.34818131196844593, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002527105759803588}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3138145000350625, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018697138228652053}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.32094088832304, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1408456329896372}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.37673820157456905, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002525885889869433}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.45010798840015825, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002660399505092188}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3891942091560381, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018863285748240007}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.1588674350852028, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001799580410910336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.19126370921881766, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001995920729312549}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.16331509152950488, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015357435865889781}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.25911730218786333, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001898454144269244}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.31272395259229857, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00218960828230586}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2683563508617857, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014877223321086363}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3147064567889733, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002200826709156465}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3793544003572373, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025336496732474654}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3266409070065726, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001793782232998535}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.14287697368746, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09189509790387232}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.36461819536647777, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0026487499518419765}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4543503797519612, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002582206511533246}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3833814965569996, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018909883024634615}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.15958828700253225, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00191846623883205}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.19891031000555218, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020106764519864156}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.16584161893779908, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015384057629127023}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2544311472598548, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002057737430115525}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3197047799413762, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021685729115729683}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.26759530893060857, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014919222068034712}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3090600248711991, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0023136943881649544}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38801660046844827, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002479446128993374}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3261278580937773, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017736884110960225}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.340266904708615, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15882362731641408}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.37296072752446663, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002869109612679922}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4502609383284795, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00251079382775608}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3854358650362702, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019534156712746817}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.1666562401905645, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002038677032860895}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.20086801672222046, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002011662092774619}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.1702076056023139, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016133056911393302}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.26173580474947733, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0021944891414563193}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.31915969202906563, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002133982181573949}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2711757987789853, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015777940102540016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3168756443706945, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0024802113511425464}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3863610636617126, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024230633383428616}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.32924428194860017, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018366992292492765}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.533282452625134, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1596869267996998}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3819741920769809, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030583258079532444}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.44635699494708264, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024922002220329363}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.387500499827696, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002012551837952501}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.17385953576593263, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0021587516126658113}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2013810249422712, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019753430304224476}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.1738222237468194, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016564597048057436}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2715629527377947, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00239642782854299}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3196239920854854, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021068495557586366}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.275755504456721, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001646371536215165}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.32541276849244294, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002661022323727048}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38355980857565064, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023839054579621287}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.33168731107832367, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001885184243072814}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.14969414244704182, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019324896572609615}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3424440467278392, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0040995514469987174}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20320759251494883, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023711645310408895}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.030400425095377902, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001044026311849789}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0739670064304767, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002634989437040311}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.042153838410666736, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014335873797812037}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.11143432626337815, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001432854409975987}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.25753222826743616, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0033024077097477903}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.15159348255296218, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017876823212052383}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11696758071244377, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015447016915361197}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2708708922848335, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003597404966268882}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.15938433020387577, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019723261078191117}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.704976167673198, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07510580344587393}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.11122405018195453, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015797262989822184}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2782856404337479, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0036415883048158764}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.15700622126726985, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021222629533591006}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.017149801531869974, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007672537603847499}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.045241611308698707, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021200374596874057}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.024555822633540537, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010997545789997894}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08609418991835227, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011494360417164816}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.21746800955642373, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002835886740287387}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12180978247573739, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015608663723874288}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.08977133822978285, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012601057903106146}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2265237802974383, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0030682275926886926}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.12701371024428101, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017141477451743852}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.979960173920773, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05173529864353662}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.10922125198022638, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015327381563681863}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2740796461825564, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035621166880206914}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.15439985598556147, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020685303106254766}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.016487055335928664, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007677313542586885}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.04306796853828354, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002091163544492895}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.023587313413704444, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011050020264692577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08557002380659585, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011627881583555256}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.21639702653925905, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002830090102057936}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12120837216731699, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015854487079549203}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.08798412491210018, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001241391026067503}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2224295384317635, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0030332449340733547}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.12464464617600614, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017005400782115032}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9592734153575632, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09382162697466569}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_3.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1090266212279622, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001711662470692957}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.26435524062723414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003899243892653423}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.15100142900440264, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002216876694666884}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.016664166927212225, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007670588317614037}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.043006632410991154, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002080317075223551}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.02358333545532719, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010856412409051479}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08497732398758262, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012974380766172027}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2073530127515781, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0030747619829053943}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11778480413469063, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016849995779788692}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.08853171692794809, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013913431824478968}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.21598097449057452, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0032877874391039636}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.12273430725894952, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018178140650586502}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.0250436676088148, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0897345599425218}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_4.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.03485569508169301, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021856260607702794}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.06463507331652409, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003685938778071234}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.040937760756170734, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002299626505022236}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.005437259038133314, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000802124167524944}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.010111288879263358, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011813527534910925}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.006195379439697378, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007036601029415052}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.02802707208505075, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018488815770964972}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.051579755348292074, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029598852452192514}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.03234035544052198, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001792868048264922}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.02943801405378874, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019325677627601914}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.05386078779110535, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003095591399792792}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.03399814377685756, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019035037454680388}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.5225577211686727, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12959625082450632}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/agg.2b855b4bc4seed1_gem_xsum_article_DOC_summary_5.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0022877323719954707, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0006642644641306185}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0018280680998604062, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0005121323157465823}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.001990674361595793, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0005642781142620792}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.00017152658662092626, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00012780446523672577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00010414114187699093, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 7.474658294086318e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0001287874039080709, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 9.359407071565204e-05}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0018514296874598294, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005384819052908431}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0014950663258536708, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00041902435765448935}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0016185572630217646, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00045897887275809666}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.001905031745778869, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0005530289094934512}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.001547348948891011, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0004349724954627403}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0016694212075392255, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00047283742219582684}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 7.180994433269743e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.511599289322222e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3acb358633f2eac253c4a6d2cddcbbd9c1f67f895a766d245101d3dcbd783392
|
| 3 |
+
size 4188113
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d148e21c54d22fc5ba46dd9914f6bd70a11843564d572310f6395fda5b8045d5
|
| 3 |
+
size 5221161
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b45be435049d723e18ac33583c827d2205e02d35e6d65fe6bed505e88da7254
|
| 3 |
+
size 6114736
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98b722d65118c77b80647f429d4607dae900cb2e9af457b4ed4a29a9d615c667
|
| 3 |
+
size 7042252
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d50f2988c093f0c55a4d3a7ae702a49d789f3bf1fdb39b31e9bf56e6d7a4b042
|
| 3 |
+
size 7950254
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb89648be6004b08bfbb508ea264654d9e85989a891c5bf092616ae7d96587ab
|
| 3 |
+
size 8867397
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b50f925961c2fa98fda1020b195afb6ca9ae269c4b643eabcd1cd6681917704
|
| 3 |
+
size 7695607
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba184994bff58cda34e4b1a9b3be646b556eaa96f48fbcc90422725611030848
|
| 3 |
+
size 13325512
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d9e2fc869f25f39298468843698ec79e2dce6a0dd70b45419e43a63f6231522
|
| 3 |
+
size 18909243
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:196b17c4387777663a7d2868f8011e5bd4cdde5d26e8d40c9d9422d02b7476d1
|
| 3 |
+
size 24322795
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a00703da914f1122fe4c9b7e2c204e1ecc5c94f255cec5c423a4784c98c54649
|
| 3 |
+
size 29469448
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6478d93e9d0b2d79122bdc26fd977e3a543f0fc92c67b923f0944410853e28eb
|
| 3 |
+
size 34799097
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c36da7aab3f2f85c4679e6a3dda2abc7c1e8e3a4f7ddaac8f7a3dd4ae1a8e42e
|
| 3 |
+
size 4350822
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47fbdb3f50363de71f03e7296c549e1e2f1940caeb5e4fc5c40d922c5a6e26df
|
| 3 |
+
size 5172462
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4dea43e3a5156468657d48564f1e68be3cd783629a12276fdb9e63e87a2d4ac
|
| 3 |
+
size 6326756
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc627cf536ebf6e54ba0bd12ba2392e4ee3ce56c29f49fee1a2a9774de373989
|
| 3 |
+
size 7449119
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe8e88bfd38f5b9228d413043c8f9cffdefe23a8131afc86d4057e970256022e
|
| 3 |
+
size 8516378
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ba1ddc5b81837b93a738f389916a6000790f395be6a96600d970c13d134b81c
|
| 3 |
+
size 9588404
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:989b0ec7c0d173967c13001b4e0702656c181839ec31cddfac1c00d36a7339fc
|
| 3 |
+
size 2821694
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c75d314a1fd78241513fd05742fbcb76d25c3e04adb3ac0008856192640c6982
|
| 3 |
+
size 5102083
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f1729efc9c05b7e03f95e1376886a4cb50de0f5713695ef53b2ca5642e8d117
|
| 3 |
+
size 7375330
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6ba84dde8582e619dd42af747645ea9666da72c614bb41e5cdb578f6b52db06
|
| 3 |
+
size 9643627
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_4.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61f25b92427224c8c3d0fa285dc627ac6516361853d5301556d85140adc46c44
|
| 3 |
+
size 11671371
|
2b855b4bc4seed1/evaluation/generation/examples.2b855b4bc4seed1_gem_xsum_article_DOC_summary_5.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aeb0d5146f0202761f5dcc5fe6779e23437808ce22c664de66d904686aa0ea1
|
| 3 |
+
size 13897492
|
2b855b4bc4seed1/evaluation/generation/slim.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/web_nlg_en",
|
| 5 |
+
"prompt_name": "PALM_prompt",
|
| 6 |
+
"bleu": 0.39855658833240104,
|
| 7 |
+
"dataset_path": "GEM/web_nlg",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.026569783418037774
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/web_nlg_en",
|
| 14 |
+
"prompt_name": "PALM_prompt",
|
| 15 |
+
"rouge1_precision": 0.0681383451903841,
|
| 16 |
+
"dataset_path": "GEM/web_nlg",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0017459951545863855
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/web_nlg_en",
|
| 23 |
+
"prompt_name": "PALM_prompt",
|
| 24 |
+
"rouge1_recall": 0.2868586326665418,
|
| 25 |
+
"dataset_path": "GEM/web_nlg",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.004824268129831654
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/web_nlg_en",
|
| 32 |
+
"prompt_name": "PALM_prompt",
|
| 33 |
+
"rouge1_fmeasure": 0.10116791036204026,
|
| 34 |
+
"dataset_path": "GEM/web_nlg",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.002146154058076065
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/web_nlg_en",
|
| 41 |
+
"prompt_name": "PALM_prompt",
|
| 42 |
+
"rouge2_precision": 0.03126302205288469,
|
| 43 |
+
"dataset_path": "GEM/web_nlg",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0009850036254092948
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/web_nlg_en",
|
| 50 |
+
"prompt_name": "PALM_prompt",
|
| 51 |
+
"rouge2_recall": 0.13556136849826403,
|
| 52 |
+
"dataset_path": "GEM/web_nlg",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0032166746311611994
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/web_nlg_en",
|
| 59 |
+
"prompt_name": "PALM_prompt",
|
| 60 |
+
"rouge2_fmeasure": 0.047349099388232035,
|
| 61 |
+
"dataset_path": "GEM/web_nlg",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0013284747595520357
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/web_nlg_en",
|
| 68 |
+
"prompt_name": "PALM_prompt",
|
| 69 |
+
"rougeL_precision": 0.06437528461062834,
|
| 70 |
+
"dataset_path": "GEM/web_nlg",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0015957361187538223
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/web_nlg_en",
|
| 77 |
+
"prompt_name": "PALM_prompt",
|
| 78 |
+
"rougeL_recall": 0.2753959775705074,
|
| 79 |
+
"dataset_path": "GEM/web_nlg",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.00466857019137397
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/web_nlg_en",
|
| 86 |
+
"prompt_name": "PALM_prompt",
|
| 87 |
+
"rougeL_fmeasure": 0.09604330456797407,
|
| 88 |
+
"dataset_path": "GEM/web_nlg",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.0019713714851164322
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/web_nlg_en",
|
| 95 |
+
"prompt_name": "PALM_prompt",
|
| 96 |
+
"rougeLsum_precision": 0.06439853493257244,
|
| 97 |
+
"dataset_path": "GEM/web_nlg",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.0016465025871715334
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/web_nlg_en",
|
| 104 |
+
"prompt_name": "PALM_prompt",
|
| 105 |
+
"rougeLsum_recall": 0.27133410145709913,
|
| 106 |
+
"dataset_path": "GEM/web_nlg",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.004513671060323518
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/web_nlg_en",
|
| 113 |
+
"prompt_name": "PALM_prompt",
|
| 114 |
+
"rougeLsum_fmeasure": 0.09560406363234744,
|
| 115 |
+
"dataset_path": "GEM/web_nlg",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0020066099973174915
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b4bc4seed1/evaluation/generation/slim.2b855b4bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/web_nlg_en",
|
| 5 |
+
"prompt_name": "PALM_prompt",
|
| 6 |
+
"bleu": 0.3912276412395352,
|
| 7 |
+
"dataset_path": "GEM/web_nlg",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.020484250080896618
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/web_nlg_en",
|
| 14 |
+
"prompt_name": "PALM_prompt",
|
| 15 |
+
"rouge1_precision": 0.06518931479471321,
|
| 16 |
+
"dataset_path": "GEM/web_nlg",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0014967694916187414
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/web_nlg_en",
|
| 23 |
+
"prompt_name": "PALM_prompt",
|
| 24 |
+
"rouge1_recall": 0.33171280095627653,
|
| 25 |
+
"dataset_path": "GEM/web_nlg",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.00514509486146305
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/web_nlg_en",
|
| 32 |
+
"prompt_name": "PALM_prompt",
|
| 33 |
+
"rouge1_fmeasure": 0.1006872191190483,
|
| 34 |
+
"dataset_path": "GEM/web_nlg",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.001920986262572305
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/web_nlg_en",
|
| 41 |
+
"prompt_name": "PALM_prompt",
|
| 42 |
+
"rouge2_precision": 0.02909822469430401,
|
| 43 |
+
"dataset_path": "GEM/web_nlg",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0008998133580491436
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/web_nlg_en",
|
| 50 |
+
"prompt_name": "PALM_prompt",
|
| 51 |
+
"rouge2_recall": 0.1502346730077987,
|
| 52 |
+
"dataset_path": "GEM/web_nlg",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0033848523651528895
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/web_nlg_en",
|
| 59 |
+
"prompt_name": "PALM_prompt",
|
| 60 |
+
"rouge2_fmeasure": 0.04487032485588369,
|
| 61 |
+
"dataset_path": "GEM/web_nlg",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0011904326422867075
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/web_nlg_en",
|
| 68 |
+
"prompt_name": "PALM_prompt",
|
| 69 |
+
"rougeL_precision": 0.06065875807260051,
|
| 70 |
+
"dataset_path": "GEM/web_nlg",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0013716176596965028
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/web_nlg_en",
|
| 77 |
+
"prompt_name": "PALM_prompt",
|
| 78 |
+
"rougeL_recall": 0.3059288650916899,
|
| 79 |
+
"dataset_path": "GEM/web_nlg",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.004623721258942203
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/web_nlg_en",
|
| 86 |
+
"prompt_name": "PALM_prompt",
|
| 87 |
+
"rougeL_fmeasure": 0.09361543338651233,
|
| 88 |
+
"dataset_path": "GEM/web_nlg",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.0017548257333492123
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/web_nlg_en",
|
| 95 |
+
"prompt_name": "PALM_prompt",
|
| 96 |
+
"rougeLsum_precision": 0.06183903916740186,
|
| 97 |
+
"dataset_path": "GEM/web_nlg",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.0014278699660441308
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/web_nlg_en",
|
| 104 |
+
"prompt_name": "PALM_prompt",
|
| 105 |
+
"rougeLsum_recall": 0.3121661527895801,
|
| 106 |
+
"dataset_path": "GEM/web_nlg",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.004757283098586568
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/web_nlg_en",
|
| 113 |
+
"prompt_name": "PALM_prompt",
|
| 114 |
+
"rougeLsum_fmeasure": 0.09537017930398344,
|
| 115 |
+
"dataset_path": "GEM/web_nlg",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.001823579905497483
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b4bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|