{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreiaots3oiqft3efkixb7ukmuljgic26tn7t3s6hf53x6khr7zrysva",
"uri": "at://did:plc:5v5xdyhb3zf74bcdmcnazkrk/app.bsky.feed.post/3mkkgljfuej32"
},
"coverImage": {
"$type": "blob",
"ref": {
"$link": "bafkreierjps3m6wdtg2zojk7fc4pjpfzna4ujhjsz4m3kyrrcivrgp74bq"
},
"mimeType": "image/png",
"size": 135567
},
"path": "/research/technology/ai-models-refused-harmful-requests-until-researchers-hid-them-in-fiction-and-theology/",
"publishedAt": "2026-04-28T09:59:08.000Z",
"site": "https://www.zmescience.com",
"tags": [
"Technology",
"adversarial prompts",
"AI agents",
"AI alignment",
"AI safety",
"anthropic",
"arXiv",
"benchmark testing",
"Cybersecurity",
"Google",
"jailbreaks",
"large language models",
"machine learning",
"OpenAI"
],
"textContent": "Advanced AI guardrails collapse when confronted with humanistic literature.",
"title": "AI Models Refused Harmful Requests Until Researchers Hid Them in Fiction and Theology"
}