Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreiao4rbz2jgq4admdybvfj2i63agkpwunjxeaoov6zkuddtxs2pa2a",
    "uri": "at://did:plc:lk3jfj3zq4k4wxnk474axylu/app.bsky.feed.post/3mohlwfjvhl32"
  },
  "path": "/t/are-there-any-good-benchmarks-comparing-openai-api-models/1383961#post_1",
  "publishedAt": "2026-06-17T04:49:48.000Z",
  "site": "https://community.openai.com",
  "textContent": "I’m looking for benchmark results that compare OpenAI models specifically on mathematical reasoning. Most of the discussions I find are focused on coding or general reasoning, but I’m interested in seeing how the current models perform on benchmarks such as AIME, FrontierMath, or other math-focused evaluations.\n\nDoes anyone have links to benchmark comparisons or personal experience using OpenAI models for math-heavy workloads?",
  "title": "Are There Any Good Benchmarks Comparing OpenAI API Models?"
}