{
"$type": "site.standard.document",
"coverImage": {
"$type": "blob",
"ref": {
"$link": "bafkreicouqgpjxsk5iaefugjn2cncdprqy3bwwmowevw7y6nush4pcu2jq"
},
"mimeType": "image/png",
"size": 76681
},
"description": "Systems, methods and non-transitory computer-readable media are presented. These describe operations comprising obtaining, visual data acquired by a first image sensor and associated with a traffic scene and obtaining, audio data acquired by a first audio sensor and associated with the traffic…",
"path": "/patents/1385566",
"publishedAt": "2026-06-04T00:00:00.000Z",
"site": "at://did:plc:oql6ds5vnff4ugar6rruliwd/site.standard.publication/3mn3ohu7oxx5w",
"tags": [
"G06V20/58",
"Zoox, Inc."
],
"textContent": "Systems, methods and non-transitory computer-readable media are presented. These describe operations comprising obtaining, visual data acquired by a first image sensor and associated with a traffic scene and obtaining, audio data acquired by a first audio sensor and associated with the traffic scene. The first audio sensor and the first image sensor are mutually independent. The operations further comprise determining, based at least in part on the visual data and the audio data, embeddings for the traffic scene that represent a joint representation space for the different modalities; and determining, by a first machine learning model, and based at least in part on the embeddings, presence of an emergency vehicle in the traffic scene.",
"title": "OBJECT DETECTION"
}