AI content moderation protects users and ensures safe online spaces by detecting harmful content automatically.
OpenAI Moderation API
import openai
# Check content for violations
response = openai.moderations.create(
input="Sample text to moderate"
)
result = response.results[0]
if result.flagged:
print("Content flagged!")
print(f"Categories: {result.categories}")
print(f"Scores: {result.category_scores}")
else:
print("Content is safe")
# Check specific categories
if result.categories.hate:
print("Contains hate speech")
if result.categories.violence:
print("Contains violent content")
Custom Moderation Pipeline
from transformers import pipeline
# Toxic comment classifier
toxicity_model = pipeline("text-classification",
model="unitary/toxic-bert")
def moderate_content(text):
# Check toxicity
toxicity = toxicity_model(text)[0]
# Check with OpenAI
openai_result = openai.moderations.create(input=text)
# Combine results
is_safe = (
toxicity['label'] == 'non-toxic' and
not openai_result.results[0].flagged
)
return {
'safe': is_safe,
'toxicity_score': toxicity['score'],
'openai_flagged': openai_result.results[0].flagged
}
result = moderate_content("Your text here")
print(result)
Image Moderation
from google.cloud import vision
# Google Vision AI for image moderation
client = vision.ImageAnnotatorClient()
with open('image.jpg', 'rb') as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.safe_search_detection(image=image)
safe = response.safe_search_annotation
# Check safety levels
if safe.adult > 3 or safe.violence > 3:
print("Inappropriate content detected")
else:
print("Image is safe")
Real-Time Moderation System
class ContentModerator:
def __init__(self):
self.toxicity_model = pipeline("text-classification",
model="unitary/toxic-bert")
def moderate_text(self, text):
# Quick toxicity check
toxicity = self.toxicity_model(text)[0]
if toxicity['score'] > 0.8:
return {'approved': False, 'reason': 'High toxicity'}
# OpenAI moderation
result = openai.moderations.create(input=text)
if result.results[0].flagged:
categories = [k for k, v in result.results[0].categories.items() if v]
return {'approved': False, 'reason': f'Violations: {categories}'}
return {'approved': True}
moderator = ContentModerator()
result = moderator.moderate_text("User comment here")
AI moderation creates safer online communities while scaling efficiently!