{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreiejft4c4c7w2uzqwzcjgb7k2ldtqetmtkadwzp5klul7dk4yjf3z4",
"uri": "at://did:plc:hktb7775qjyt6dh4h5dilmcp/app.bsky.feed.post/3mepnatco7332"
},
"coverImage": {
"$type": "blob",
"ref": {
"$link": "bafkreibzhi3a25ch22bdngfcr2u5pyeeanvru4s5mg3awcmqalbe2ehyky"
},
"mimeType": "image/jpeg",
"size": 345929
},
"path": "/blog/inference-open-source-models-blackwell-reduce-cost-per-token/",
"publishedAt": "2026-02-12T16:00:46.000Z",
"site": "https://blogs.nvidia.com",
"tags": [
"Data Center",
"Generative AI",
"Agentic AI",
"Dynamo",
"Inference",
"NVIDIA Blackwell",
"Open Source",
"TensorRT",
"Think SMART",
"Read Article"
],
"textContent": "A diagnostic insight in healthcare. A character’s dialogue in an interactive game. An autonomous resolution from a customer service agent. Each of these AI-powered interactions is built on the same unit of intelligence: a token. Scaling these AI interactions requires businesses to consider whether they can afford more tokens. The answer lies in better tokenomics Read Article ",
"title": "Leading Inference Providers Cut AI Costs by up to 10x With Open Source Models on NVIDIA Blackwell"
}