{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreiamot3mj5hha4ybmztbrxitfyerp4y2pqfe3kobywsdu2kwp4cdci",
"uri": "at://did:plc:ctcqaqa3tlbhfwmfefti346h/app.bsky.feed.post/3mlob3h5nmxs2"
},
"coverImage": {
"$type": "blob",
"ref": {
"$link": "bafkreiem3o63fxja7xadnnvoeuwqv5qerv5ygaeksytl3viziyqt2vjjv4"
},
"mimeType": "image/jpeg",
"size": 108028
},
"path": "/pro/current-llms-introduce-substantial-errors-when-editing-work-documents-microsoft-scientists-find-most-ai-models-struggle-with-long-running-tasks-so-maybe-dont-trust-them-completely-just-yet",
"publishedAt": "2026-05-12T15:35:00.000Z",
"site": "https://www.techradar.com",
"tags": [
"Pro"
],
"textContent": "The more interactions an AI model has, the less reliable it becomes, experts find, as even the best only scored 80.9% – and the worst scoring just 10.0%.",
"title": "'Current LLMs introduce substantial errors when editing work documents': Microsoft scientists find most AI models struggle with long-running tasks — so maybe don't trust them completely just yet"
}