{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreiao4rbz2jgq4admdybvfj2i63agkpwunjxeaoov6zkuddtxs2pa2a",
"uri": "at://did:plc:lk3jfj3zq4k4wxnk474axylu/app.bsky.feed.post/3mohlwfjvhl32"
},
"path": "/t/are-there-any-good-benchmarks-comparing-openai-api-models/1383961#post_1",
"publishedAt": "2026-06-17T04:49:48.000Z",
"site": "https://community.openai.com",
"textContent": "I’m looking for benchmark results that compare OpenAI models specifically on mathematical reasoning. Most of the discussions I find are focused on coding or general reasoning, but I’m interested in seeing how the current models perform on benchmarks such as AIME, FrontierMath, or other math-focused evaluations.\n\nDoes anyone have links to benchmark comparisons or personal experience using OpenAI models for math-heavy workloads?",
"title": "Are There Any Good Benchmarks Comparing OpenAI API Models?"
}