{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreiepajufgi27h35mhmpsrcfbmda3bmkbilzzg5sxoo74s5xkwyqs4e",
    "uri": "at://did:plc:ibeorkuxddnwy45ii5pezbgb/app.bsky.feed.post/3mjnvexw57k32"
  },
  "coverImage": {
    "$type": "blob",
    "ref": {
      "$link": "bafkreieyxghunodyqiirgoyvk3w2liqzp5od6dpnk3l575rbeaxddkyxdi"
    },
    "mimeType": "image/png",
    "size": 170405
  },
  "path": "/high-performance-llms/",
  "publishedAt": "2026-04-16T14:00:00.000Z",
  "site": "https://blog.cloudflare.com",
  "tags": [
    "Agents Week",
    "Agents",
    "AI",
    "Developer Platform",
    "Developers",
    "Infrastructure",
    "Workers AI"
  ],
  "textContent": "We built a custom technology stack to run fast large language models on Cloudflare’s infrastructure. This post explores the engineering trade-offs and technical optimizations required to make high-performance AI inference accessible.",
  "title": "Building the foundation for running extra-large language models"
}