Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreiejft4c4c7w2uzqwzcjgb7k2ldtqetmtkadwzp5klul7dk4yjf3z4",
    "uri": "at://did:plc:hktb7775qjyt6dh4h5dilmcp/app.bsky.feed.post/3mepnatco7332"
  },
  "coverImage": {
    "$type": "blob",
    "ref": {
      "$link": "bafkreibzhi3a25ch22bdngfcr2u5pyeeanvru4s5mg3awcmqalbe2ehyky"
    },
    "mimeType": "image/jpeg",
    "size": 345929
  },
  "path": "/blog/inference-open-source-models-blackwell-reduce-cost-per-token/",
  "publishedAt": "2026-02-12T16:00:46.000Z",
  "site": "https://blogs.nvidia.com",
  "tags": [
    "Data Center",
    "Generative AI",
    "Agentic AI",
    "Dynamo",
    "Inference",
    "NVIDIA Blackwell",
    "Open Source",
    "TensorRT",
    "Think SMART",
    "Read Article"
  ],
  "textContent": "A diagnostic insight in healthcare. A character’s dialogue in an interactive game. An autonomous resolution from a customer service agent. Each of these AI-powered interactions is built on the same unit of intelligence: a token. Scaling these AI interactions requires businesses to consider whether they can afford more tokens. The answer lies in better tokenomics  Read Article  ",
  "title": "Leading Inference Providers Cut AI Costs by up to 10x With Open Source Models on NVIDIA Blackwell"
}