Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreihluwvpqlyewulwgzwiumi7srqnkm6xdacov7widusrqasubedbxm",
    "uri": "at://did:plc:avkh7zze5iapdkk6naaunrjn/app.bsky.feed.post/3mmthafjcatc2"
  },
  "path": "/260527/p13#a260527p13",
  "publishedAt": "2026-05-27T10:05:01.000Z",
  "site": "https://www.techmeme.com",
  "tags": [
    "VentureBeat",
    "Datacurve releases the DeepSWE coding benchmark, a 113-task test across 91 open-source repositories and five languages, and says GPT-5.5 is the leader at 70%"
  ],
  "textContent": "Michael Nuñez / VentureBeat:\n**Datacurve releases the DeepSWE coding benchmark, a 113-task test across 91 open-source repositories and five languages, and says GPT-5.5 is the leader at 70%** — For months, the leading AI coding benchmarks have told enterprise buyers a comforting but misleading story: the top models are all roughly the same.",
  "title": "Datacurve releases the DeepSWE coding benchmark, a 113-task test across 91 open-source repositories and five languages, and says GPT-5.5 is the leader at 70% (Michael Nuñez/VentureBeat)"
}