{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreiemvwwlm6gk5b27hti2viltzfdg2repmhhxrjr3a55sbhmlu5mp7u",
"uri": "at://did:plc:bbwgwecw2vdijmj42eealwtt/app.bsky.feed.post/3mlaatajer2e2"
},
"coverImage": {
"$type": "blob",
"ref": {
"$link": "bafkreieslqxaohzweyaf5vedmmli7qxkgnfx6sau35inpdujkswdpqvzpu"
},
"mimeType": "image/webp",
"size": 30022
},
"path": "/google-made-gemma-4-models-3x-faster-with-mtp-drafters/",
"publishedAt": "2026-05-06T20:35:09.000Z",
"site": "https://www.testingcatalog.com",
"tags": [
"Gemini News",
"Latest AI News",
"AI Announcements"
],
"textContent": "What's new? Speculative decoding pairs a heavy main model with a light drafter to pre-generate tokens; Gemma 4 models now run on consumer GPUs and edge devices;",
"title": "Google made Gemma 4 models 3x faster with MTP Drafters"
}