{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreibilwn3b6k76pl73npgfd4khc2lnzqt2mrdtx66mavgt6wkpkvwvy",
"uri": "at://did:plc:lk3jfj3zq4k4wxnk474axylu/app.bsky.feed.post/3mildnhpkxgd2"
},
"path": "/t/measuring-hallucinations-in-a-rag-pipeline/732205#post_5",
"publishedAt": "2026-04-03T07:17:02.000Z",
"site": "https://community.openai.com",
"textContent": "Great discussion. I recently built a lightweight open source library\nthat addresses exactly this — HallucinationBench uses GPT-4o-mini as\na structured judge to classify individual claims as grounded or\nhallucinated, returning a faithfulness score and a verdict of\nPASS / WARN / FAIL.\n\nIt requires no embeddings, no vector DB, no infrastructure — just\npip install hallucinationbench and two lines of code.\n\nHappy to discuss the judge prompt design if anyone is interested.",
"title": "Measuring hallucinations in a RAG pipeline"
}