From 9f23b975bf2b4cab4defe3ca1b44d1c0b47b4aed Mon Sep 17 00:00:00 2001 From: Robert Craigie Date: Fri, 18 Aug 2023 11:44:35 +0100 Subject: [PATCH 1/2] Update moderations categories --- openapi.yaml | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index 9dc8e704..b27c80b7 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -2682,29 +2682,45 @@ components: properties: hate: type: boolean - description: Whether the content was flagged as 'hate'. + description: Content that expresses, incites, or promotes hate based on race, gender, ethnicity, religion, nationality, sexual orientation, disability status, or caste. Hateful content aimed at non-protected groups (e.g., chess players) is harrassment. hate/threatening: type: boolean - description: Whether the content was flagged as 'hate/threatening'. + description: Hateful content that also includes violence or serious harm towards the targeted group based on race, gender, ethnicity, religion, nationality, sexual orientation, disability status, or caste. + harassment: + type: boolean + description: Content that expresses, incites, or promotes harassing language towards any target. + harassment/threatening: + type: boolean + description: Harassment content that also includes violence or serious harm towards any target. self-harm: type: boolean - description: Whether the content was flagged as 'self-harm'. + description: Content that promotes, encourages, or depicts acts of self-harm, such as suicide, cutting, and eating disorders. + self-harm/intent: + type: boolean + description: Content where the speaker expresses that they are engaging or intend to engage in acts of self-harm, such as suicide, cutting, and eating disorders. + self-harm/instructions: + type: boolean + description: Content that encourages performing acts of self-harm, such as suicide, cutting, and eating disorders, or that gives instructions or advice on how to commit such acts. sexual: type: boolean - description: Whether the content was flagged as 'sexual'. + description: Content meant to arouse sexual excitement, such as the description of sexual activity, or that promotes sexual services (excluding sex education and wellness). sexual/minors: type: boolean - description: Whether the content was flagged as 'sexual/minors'. + description: Sexual content that includes an individual who is under 18 years old. violence: type: boolean - description: Whether the content was flagged as 'violence'. + description: Content that depicts death, violence, or physical injury. violence/graphic: type: boolean - description: Whether the content was flagged as 'violence/graphic'. + description: Content that depicts death, violence, or physical injury in graphic detail. required: - hate - hate/threatening + - harassment + - harassment/threatening - self-harm + - self-harm/intent + - self-harm/instructions - sexual - sexual/minors - violence From 26a698a701c728edecba3c1bb0473fa6baf5341c Mon Sep 17 00:00:00 2001 From: Robert Craigie Date: Fri, 18 Aug 2023 11:52:48 +0100 Subject: [PATCH 2/2] Update category_scores as well --- openapi.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/openapi.yaml b/openapi.yaml index b27c80b7..322b680d 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -2735,9 +2735,21 @@ components: hate/threatening: type: number description: The score for the category 'hate/threatening'. + harassment: + type: number + description: The score for the category 'harassment'. + harassment/threatening: + type: number + description: The score for the category 'harassment/threatening'. self-harm: type: number description: The score for the category 'self-harm'. + self-harm/intent: + type: number + description: The score for the category 'self-harm/intent'. + self-harm/instructions: + type: number + description: The score for the category 'self-harm/instructions'. sexual: type: number description: The score for the category 'sexual'. @@ -2753,7 +2765,11 @@ components: required: - hate - hate/threatening + - harassment + - harassment/threatening - self-harm + - self-harm/intent + - self-harm/instructions - sexual - sexual/minors - violence