{
  "$type": "site.standard.document",
  "description": "A method for controlling a vehicle in an environment includes generating, via a cross-attention model, a cross-attention cost volume based on a current image of the environment and a previous image of the environment in a sequence of images. The method also includes generating combined features by…",
  "path": "/patents/1360732",
  "publishedAt": "2024-03-14T00:00:00.000Z",
  "site": "at://did:plc:oql6ds5vnff4ugar6rruliwd/site.standard.publication/3mn3ohu7oxx5w",
  "tags": [
    "G06T7/55",
    "TOYOTA RESEARCH INSTITUTE, INC."
  ],
  "textContent": "A method for controlling a vehicle in an environment includes generating, via a cross-attention model, a cross-attention cost volume based on a current image of the environment and a previous image of the environment in a sequence of images. The method also includes generating combined features by combining cost volume features of the cross-attention cost volume with single-frame features associated with the current image. The single-frame features may be generated via a single-frame encoding model. The method further includes generating a depth estimate of the current image based on the combined features. The method still further includes controlling an action of the vehicle based on the depth estimate.",
  "title": "SELF-SUPERVISED TRAINING FROM A TEACHER NETWORK FOR COST VOLUME BASED DEPTH ESTIMATES"
}