Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreihytxjuldahx76jw7whbo6ohd4dcntuccg5p4pehu7cgdzjlw4g2q",
    "uri": "at://did:plc:avkh7zze5iapdkk6naaunrjn/app.bsky.feed.post/3meek2jz36n72"
  },
  "path": "/260208/p12#a260208p12",
  "publishedAt": "2026-02-08T16:40:00.000Z",
  "site": "http://www.techmeme.com",
  "tags": [
    "New York Times",
    "Q&A with mathematicians behind the “First Proof” experiment, which tests AI's mathematical competence on questions drawn from the authors' unpublished research"
  ],
  "textContent": "\n\n Siobhan Roberts / New York Times:\n**Q&A with mathematicians behind the “First Proof” experiment, which tests AI's mathematical competence on questions drawn from the authors' unpublished research**  —  Large language models struggle to solve research-level math questions.  It takes a human to measure just how poorly they perform.",
  "title": "Q&A with mathematicians behind the \"First Proof\" experiment, which tests AI's mathematical competence on questions drawn from the authors' unpublished research (Siobhan Roberts/New York Times)"
}