From da309122d10a0210f66d259304ebcc1f88dab102 Mon Sep 17 00:00:00 2001 From: capjamesg Date: Wed, 4 Sep 2024 01:28:22 +0000 Subject: [PATCH] Update results --- index.html | 43 ++++++---------- results/2024-09-04.json | 106 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 27 deletions(-) create mode 100644 results/2024-09-04.json diff --git a/index.html b/index.html index 76df138..340bd94 100644 --- a/index.html +++ b/index.html @@ -40,7 +40,7 @@

How's GPT-4o Doing?

You can contribute your own tests, too! See the GitHub README for contributing instructions.

-

Tests are run every day at 1am PT. Last updated September 03, 2024.

+

Tests are run every day at 1am PT. Last updated September 04, 2024.

Made with ❤️ by the team at Roboflow.

@@ -58,12 +58,12 @@

How's GPT-4o Doing?

Response Time

-

Today, the average response time to receive results from our tests was 4.09 seconds per request.

+

Today, the average response time to receive results from our tests was 4.08 seconds per request.

This number only accounts for requests made by this application.

-

4.09 s

+

4.08 s

@@ -122,7 +122,7 @@

Prompt

Image

Image of the input into GPT-4

Result

-
8
+
7

Test submitted by Roboflow

@@ -176,7 +176,7 @@

Prompt

Image

Image of the input into GPT-4

Result

-
{'x': 0.367, 'y': 0.411, 'width': 0.255, 'height': 0.394}
+
{'x': 0.422, 'y': 0.303, 'width': 0.298, 'height': 0.335}

Test submitted by Roboflow

@@ -237,16 +237,16 @@

Result

"price": 10 }, "B": { - "quantity": 22, - "price": 25 + "quantity": 20, + "price": 20 }, "C": { - "quantity": 27, + "quantity": 26, "price": 30 }, "D": { - "quantity": 31, - "price": 38 + "quantity": 30, + "price": 40 } } ``` @@ -305,9 +305,9 @@

Image

Result

```json
 {
-  "R": 74,
+  "R": 79,
   "G": 0,
-  "B": 128
+  "B": 162
 }
 ```

Test submitted by Roboflow

@@ -349,7 +349,7 @@

Annotation Quality Assurance

Of the last 7 tests, conducted daily, this test has passed 0% of the time.

-

Today's request cost $0.019

+

Today's request cost $0.015

@@ -363,18 +363,7 @@

Prompt

Image

Image of the input into GPT-4

Result

-
To determine if there are any missing annotations, we need to check if there are any cars in the image that are not enclosed within red bounding boxes. 
-
-Upon inspecting the image thoroughly:
-1. The car on the left side of the image has a bounding box.
-2. All cars in the middle section of the image have bounding boxes.
-3. However, there is a white car on the far right side of the image that is not enclosed within a bounding box.
-
-Given this, it appears there is one missing annotation.
-
-Here is the JSON object indicating the number of missing annotations:
-
-```json
+                                        
```json
 {
   "missing": 1
 }
@@ -432,7 +421,7 @@ 

Prompt

Image

Image of the input into GPT-4

Result

-
Based on the ruler in the image, the square sticker appears to be 3 inches in length and 3 inches in width. Here's the JSON representation:
+                                        
Based on the ruler in the image, the square sticker has an approximate length and width of 3 inches. Here is the JSON representation:
 
 ```json
 {
@@ -664,7 +653,7 @@ 

Prompt

Image

Image of the input into GPT-4

Result

-
[{'name': 'MARY THOMAS', 'time_per_day': 1, 'medication': 'ATENOLOL', 'dosage': 100, 'rx_number': '1234567-12345'}]
+
[{'name': 'Mary Thomas', 'time_per_day': 1, 'medication': 'Atenolol', 'dosage': 100, 'rx_number': '1234567-12345'}]

Test submitted by Roboflow

diff --git a/results/2024-09-04.json b/results/2024-09-04.json new file mode 100644 index 0000000..f707f73 --- /dev/null +++ b/results/2024-09-04.json @@ -0,0 +1,106 @@ +{ + "zero_shot_classification": { + "score": 1, + "success": true, + "price": 0.00481, + "pass_fail": "Pass", + "response_time": 8.074975490570068, + "result": "Toyota Camry" + }, + "count_fruit": { + "score": 0, + "success": false, + "price": 0.007870000000000002, + "pass_fail": "Fail", + "response_time": 1.939542531967163, + "result": "7" + }, + "document_ocr": { + "score": 1, + "success": true, + "price": 0.008539999999999999, + "pass_fail": "Pass", + "response_time": 2.269551992416382, + "result": "I was thinking earlier today that I have gone through, to use the lingo, eras of listening to each of Swift's Eras. Meta indeed. I started listening to Ms. Swift's music after hearing the Midnights album. A few weeks after hearing the album for the first time, I found myself playing various songs on repeat. I listened to the album in order multiple times." + }, + "handwriting_ocr": { + "score": 1, + "success": true, + "price": 0.00876, + "pass_fail": "Pass", + "response_time": 12.98446798324585, + "result": "The words of songs on the album have been echoing in my head all week. \"Fades into the grey of my day old tea.\"" + }, + "extraction_ocr": { + "score": 1.0, + "success": true, + "price": 0.00719, + "pass_fail": "Pass", + "response_time": 2.791367769241333, + "result": "[{'name': 'Mary Thomas', 'time_per_day': 1, 'medication': 'Atenolol', 'dosage': 100, 'rx_number': '1234567-12345'}]" + }, + "math_ocr": { + "score": 1.0, + "success": true, + "price": 0.015290000000000002, + "pass_fail": "Pass", + "response_time": 2.5956640243530273, + "result": "3x^2-6x+2" + }, + "object_detection": { + "score": 0.4106057983683838, + "success": false, + "price": 0.009490000000000002, + "pass_fail": "Fail", + "response_time": 2.753748893737793, + "result": "{'x': 0.422, 'y': 0.303, 'width': 0.298, 'height': 0.335}" + }, + "graph_understanding": { + "score": 0.905, + "success": false, + "price": 0.01079, + "pass_fail": "Fail", + "response_time": 2.510911226272583, + "result": "```json\n{\n \"A\": {\n \"quantity\": 15,\n \"price\": 10\n },\n \"B\": {\n \"quantity\": 20,\n \"price\": 20\n },\n \"C\": {\n \"quantity\": 26,\n \"price\": 30\n },\n \"D\": {\n \"quantity\": 30,\n \"price\": 40\n }\n}\n```" + }, + "color_recognition": { + "score": 0.9816993464052287, + "success": false, + "price": 0.008870000000000001, + "pass_fail": "Fail", + "response_time": 1.616657018661499, + "result": "```json\n{\n \"R\": 79,\n \"G\": 0,\n \"B\": 162\n}\n```" + }, + "annotation_qa": { + "score": 0.33333333333333337, + "success": false, + "price": 0.015300000000000001, + "pass_fail": "Fail", + "response_time": 2.7375917434692383, + "result": "```json\n{\n \"missing\": 1\n}\n```" + }, + "measurement": { + "score": 0.8571428571428572, + "success": false, + "price": 0.00961, + "pass_fail": "Fail", + "response_time": 5.825538396835327, + "result": "Based on the ruler in the image, the square sticker has an approximate length and width of 3 inches. Here is the JSON representation:\n\n```json\n{\n \"length\": 3.0,\n \"width\": 3.0\n}\n```" + }, + "easy_captcha": { + "score": 1, + "success": true, + "price": 0.004790000000000001, + "pass_fail": "Pass", + "response_time": 1.0824611186981201, + "result": "charybdis indubitable" + }, + "easy_captcha_persuade": { + "score": 1, + "success": true, + "price": 0.00529, + "pass_fail": "Pass", + "response_time": 1.2216310501098633, + "result": "charybdis indubitable" + } +} \ No newline at end of file