Spaces:
Sleeping
Sleeping
Make sure to sort only list of strings in unsorted_list responses.
Browse files
test_of_time_accuracy.py
CHANGED
|
@@ -215,7 +215,7 @@ class TestOfTimeAccuracy(evaluate.Metric):
|
|
| 215 |
otherwise returns data unchanged
|
| 216 |
"""
|
| 217 |
if isinstance(data, dict) and "unordered_list" in data:
|
| 218 |
-
return sorted(data["unordered_list"])
|
| 219 |
return data
|
| 220 |
|
| 221 |
@staticmethod
|
|
@@ -279,11 +279,10 @@ class TestOfTimeAccuracy(evaluate.Metric):
|
|
| 279 |
if data is None or not isinstance(data, dict):
|
| 280 |
return data
|
| 281 |
|
| 282 |
-
# Process
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
data[first_key] = [item.lower() for item in data[first_key] if isinstance(item, str)]
|
| 287 |
|
| 288 |
return data
|
| 289 |
|
|
|
|
| 215 |
otherwise returns data unchanged
|
| 216 |
"""
|
| 217 |
if isinstance(data, dict) and "unordered_list" in data:
|
| 218 |
+
return sorted([item for item in data["unordered_list"] if isinstance(item, str)])
|
| 219 |
return data
|
| 220 |
|
| 221 |
@staticmethod
|
|
|
|
| 279 |
if data is None or not isinstance(data, dict):
|
| 280 |
return data
|
| 281 |
|
| 282 |
+
# Process list fields regardless of key order
|
| 283 |
+
for key in ["ordered_list", "unordered_list"]:
|
| 284 |
+
if key in data and isinstance(data[key], list):
|
| 285 |
+
data[key] = [item.lower() for item in data[key] if isinstance(item, str)]
|
|
|
|
| 286 |
|
| 287 |
return data
|
| 288 |
|
tests/test_arithmetic_scoring.py
CHANGED
|
@@ -9,6 +9,7 @@ arithmetic_test_cases = {
|
|
| 9 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "unordered_list": ["berlin","london"]\n}\n```',
|
| 10 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "malformed_unordered_list": ["Berlin","London"]\n}\n```',
|
| 11 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "unordered_list": "Berlin"\n}\n```',
|
|
|
|
| 12 |
' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
|
| 13 |
'\n```json\n{\n "explanation": "Step 1: Determine the time it takes the robot to carry a single box. The robot takes 4 hours, 34 minutes, and 30 seconds to carry 2 boxes. We divide this time by 2 to find the time per box.\\n- Hours: 4 / 2 = 2 hours\\n- Minutes: 34 / 2 = 17 minutes\\n- Seconds: 30 / 2 = 15 seconds\\nSo, it takes the robot 2 hours, 17 minutes, and 15 seconds to carry one box.\\n\\nStep 2: Calculate the total time to carry 25 boxes. We multiply the time per box by the total number of boxes (25).\\n- Total Hours: 2 hours/box * 25 boxes = 50 hours\\n- Total Minutes: 17 minutes/box * 25 boxes = 425 minutes\\n- Total Seconds: 15 seconds/box * 25 boxes = 375 seconds\\n\\nStep 3: Convert the calculated time into the standard H:M:S format by carrying over excess seconds and minutes.\\n- Convert seconds to minutes: 375 seconds is equal to 6 minutes and 15 seconds (since 375 / 60 = 6 with a remainder of 15). We add the 6 minutes to our minutes total.\\n- New total: 50 hours, (425 + 6) minutes, 15 seconds -> 50 hours, 431 minutes, 15 seconds.\\n- Convert minutes to hours: 431 minutes is equal to 7 hours and 11 minutes (since 431 / 60 = 7 with a remainder of 11). We add the 7 hours to our hours total.\\n- New total: (50 + 7) hours, 11 minutes, 15 seconds -> 57 hours, 11 minutes, 15 seconds.\\n\\nThe final time is 57 hours, 11 minutes, and 15 seconds.",\n "H": 57,\n "M": 11,\n "S": 15\n}\n```',
|
| 14 |
'{"explanation": "some explanation", "age": "3319"}',
|
|
@@ -21,13 +22,14 @@ arithmetic_test_cases = {
|
|
| 21 |
'{"unordered_list": ["London", "Berlin"]}',
|
| 22 |
'{"unordered_list": ["London", "Berlin"]}',
|
| 23 |
'{"unordered_list": ["London", "Berlin"]}',
|
|
|
|
| 24 |
'{"answer": "2005-04-07"}',
|
| 25 |
'{"H": 57.0, "M": 11.0, "S": 15.0}',
|
| 26 |
'{"answer": 3319}',
|
| 27 |
'{"answer": 6}',
|
| 28 |
"{'hours': 6, 'minutes': 2, 'seconds': 58}",
|
| 29 |
],
|
| 30 |
-
"result": {"accuracy": 7 /
|
| 31 |
"per_item_accuracy": [
|
| 32 |
True,
|
| 33 |
True,
|
|
@@ -35,6 +37,7 @@ arithmetic_test_cases = {
|
|
| 35 |
False,
|
| 36 |
False,
|
| 37 |
False,
|
|
|
|
| 38 |
True,
|
| 39 |
True,
|
| 40 |
True,
|
|
|
|
| 9 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "unordered_list": ["berlin","london"]\n}\n```',
|
| 10 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "malformed_unordered_list": ["Berlin","London"]\n}\n```',
|
| 11 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "unordered_list": "Berlin"\n}\n```',
|
| 12 |
+
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "unordered_list": ["Berlin", {"unordered_list": ["London", "Berlin"]}]\n}\n```',
|
| 13 |
' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
|
| 14 |
'\n```json\n{\n "explanation": "Step 1: Determine the time it takes the robot to carry a single box. The robot takes 4 hours, 34 minutes, and 30 seconds to carry 2 boxes. We divide this time by 2 to find the time per box.\\n- Hours: 4 / 2 = 2 hours\\n- Minutes: 34 / 2 = 17 minutes\\n- Seconds: 30 / 2 = 15 seconds\\nSo, it takes the robot 2 hours, 17 minutes, and 15 seconds to carry one box.\\n\\nStep 2: Calculate the total time to carry 25 boxes. We multiply the time per box by the total number of boxes (25).\\n- Total Hours: 2 hours/box * 25 boxes = 50 hours\\n- Total Minutes: 17 minutes/box * 25 boxes = 425 minutes\\n- Total Seconds: 15 seconds/box * 25 boxes = 375 seconds\\n\\nStep 3: Convert the calculated time into the standard H:M:S format by carrying over excess seconds and minutes.\\n- Convert seconds to minutes: 375 seconds is equal to 6 minutes and 15 seconds (since 375 / 60 = 6 with a remainder of 15). We add the 6 minutes to our minutes total.\\n- New total: 50 hours, (425 + 6) minutes, 15 seconds -> 50 hours, 431 minutes, 15 seconds.\\n- Convert minutes to hours: 431 minutes is equal to 7 hours and 11 minutes (since 431 / 60 = 7 with a remainder of 11). We add the 7 hours to our hours total.\\n- New total: (50 + 7) hours, 11 minutes, 15 seconds -> 57 hours, 11 minutes, 15 seconds.\\n\\nThe final time is 57 hours, 11 minutes, and 15 seconds.",\n "H": 57,\n "M": 11,\n "S": 15\n}\n```',
|
| 15 |
'{"explanation": "some explanation", "age": "3319"}',
|
|
|
|
| 22 |
'{"unordered_list": ["London", "Berlin"]}',
|
| 23 |
'{"unordered_list": ["London", "Berlin"]}',
|
| 24 |
'{"unordered_list": ["London", "Berlin"]}',
|
| 25 |
+
'{"unordered_list": ["London", "Berlin"]}',
|
| 26 |
'{"answer": "2005-04-07"}',
|
| 27 |
'{"H": 57.0, "M": 11.0, "S": 15.0}',
|
| 28 |
'{"answer": 3319}',
|
| 29 |
'{"answer": 6}',
|
| 30 |
"{'hours': 6, 'minutes': 2, 'seconds': 58}",
|
| 31 |
],
|
| 32 |
+
"result": {"accuracy": 7 / 11},
|
| 33 |
"per_item_accuracy": [
|
| 34 |
True,
|
| 35 |
True,
|
|
|
|
| 37 |
False,
|
| 38 |
False,
|
| 39 |
False,
|
| 40 |
+
False,
|
| 41 |
True,
|
| 42 |
True,
|
| 43 |
True,
|