{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreidbjll3oyrbunvqhierypxoc3pilbkhgy5vtdjsf4qqgdt3c2pq5u",
"uri": "at://did:plc:l5eubmmg57bpj76f4frmgy3r/app.bsky.feed.post/3mo5yharcidf2"
},
"coverImage": {
"$type": "blob",
"ref": {
"$link": "bafkreibhuafblgztdowb73svcu3jhxwrdhu5slg3razt2e3lbonnh54spm"
},
"mimeType": "image/png",
"size": 159713
},
"path": "/blog/olmo-eval",
"publishedAt": "2026-06-12T08:00:00.000Z",
"site": "https://allenai.org",
"textContent": "olmo-eval is an open evaluation workbench that helps model developers add, run, and analyze benchmarks across changing LLM checkpoints, extending OLMES from final-score reproducibility into the day-to-day model development loop.",
"title": "olmo-eval: An evaluation workbench for the model development loop"
}