{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreieydsm6htxu2xfi5hiktezcyitx7aoxhha2ml6r7tapsmsoykfusm",
"uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mku37dagtop2"
},
"path": "/t/how-can-i-tell-the-size-of-a-model-before-downloading-it/174108#post_8",
"publishedAt": "2026-05-02T06:04:26.000Z",
"site": "https://discuss.huggingface.co",
"textContent": "Or depending on what hf_hub you hace\n\n#!/usr/bin/env python3\n\nfrom **future** import annotations\n\nfrom huggingface_hub import HfApi\nfrom huggingface_hub.utils import HfHubHTTPError\n\ndef human_size(num_bytes: int | None) → str:\nif not num_bytes:\nreturn “0 B”\n\n\n units = [\"B\", \"KB\", \"MB\", \"GB\", \"TB\"]\n size = float(num_bytes)\n\n for unit in units:\n if size < 1024 or unit == units[-1]:\n return f\"{size:.2f} {unit}\"\n size /= 1024\n\n return f\"{size:.2f} TB\"\n\n\ndef prompt_int(label: str, default: int) → int:\nvalue = input(f\"{label} [{default}]: \").strip()\n\n\n if not value:\n return default\n\n try:\n parsed = int(value)\n if parsed <= 0:\n print(f\"Using default: {default}\")\n return default\n return parsed\n except ValueError:\n print(f\"Invalid number. Using default: {default}\")\n return default\n\n\ndef prompt_bool(label: str, default: bool = False) → bool:\ndefault_text = “y” if default else “n”\nvalue = input(f\"{label} [y/n, default {default_text}]: \").strip().lower()\n\n\n if not value:\n return default\n\n return value in {\"y\", \"yes\", \"true\", \"1\"}\n\n\ndef prompt_choice(label: str, choices: list[str], default: str) → str:\nprint(f\"{label}:“)\nfor index, choice in enumerate(choices, start=1):\nmarker = \" default” if choice == default else “”\nprint(f\" {index}. {choice}{marker}\")\n\n\n value = input(f\"Choose 1-{len(choices)} [{default}]: \").strip()\n\n if not value:\n return default\n\n if value in choices:\n return value\n\n try:\n index = int(value)\n if 1 <= index <= len(choices):\n return choices[index - 1]\n except ValueError:\n pass\n\n print(f\"Invalid choice. Using default: {default}\")\n return default\n\n\ndef get_repo_size(api: HfApi, repo_id: str) → tuple[int, list[tuple[str, int]]]:\ninfo = api.model_info(repo_id, files_metadata=True)\n\n\n files: list[tuple[str, int]] = []\n total = 0\n\n for sibling in info.siblings or []:\n name = getattr(sibling, \"rfilename\", None) or getattr(sibling, \"filename\", None) or \"<unknown>\"\n size = getattr(sibling, \"size\", None) or 0\n\n size = int(size)\n files.append((name, size))\n total += size\n\n files.sort(key=lambda x: x[1], reverse=True)\n return total, files\n\n\ndef get_model_id(model) → str | None:\nreturn getattr(model, “modelId”, None) or getattr(model, “id”, None)\n\ndef sort_models_locally(models: list, sort: str, ascending: bool) → list:\nreverse = not ascending\n\n\n if sort == \"downloads\":\n models.sort(key=lambda m: getattr(m, \"downloads\", 0) or 0, reverse=reverse)\n elif sort == \"likes\":\n models.sort(key=lambda m: getattr(m, \"likes\", 0) or 0, reverse=reverse)\n elif sort == \"lastModified\":\n models.sort(key=lambda m: str(getattr(m, \"lastModified\", \"\") or \"\"), reverse=reverse)\n elif sort == \"createdAt\":\n models.sort(key=lambda m: str(getattr(m, \"createdAt\", \"\") or \"\"), reverse=reverse)\n elif sort == \"trendingScore\":\n models.sort(key=lambda m: getattr(m, \"trendingScore\", 0) or 0, reverse=reverse)\n\n return models\n\n\ndef main() → int:\nprint()\nprint(“Hugging Face model size search”)\nprint()\n\n\n query = input(\"Search term, partial match is OK: \").strip()\n\n if not query:\n print(\"No search term entered. Exiting.\")\n return 1\n\n limit = prompt_int(\"How many model results should I check?\", 20)\n\n sort = prompt_choice(\n \"Sort Hugging Face search results by\",\n [\"downloads\", \"likes\", \"lastModified\", \"createdAt\", \"trendingScore\"],\n \"downloads\",\n )\n\n ascending = prompt_bool(\"Ascending sort?\", False)\n show_files = prompt_bool(\"Show largest files for each model?\", True)\n\n top_files = 5\n if show_files:\n top_files = prompt_int(\"How many largest files per model?\", 5)\n\n size_sort = prompt_choice(\n \"Sort final output by computed repo size\",\n [\"desc\", \"asc\", \"none\"],\n \"desc\",\n )\n\n api = HfApi()\n\n print()\n print(f\"Searching Hugging Face for: {query}\")\n print()\n\n try:\n models = list(api.list_models(\n search=query,\n limit=limit,\n sort=sort,\n full=True,\n ))\n except TypeError:\n models = list(api.list_models(\n search=query,\n limit=limit,\n full=True,\n ))\n\n models = sort_models_locally(models, sort=sort, ascending=ascending)\n\n if not models:\n print(f\"No models found for search term: {query}\")\n return 1\n\n rows = []\n\n total_models = len(models)\n\n for index, model in enumerate(models, start=1):\n repo_id = get_model_id(model)\n\n if not repo_id:\n continue\n\n print(f\"[{index}/{total_models}] Checking {repo_id}...\")\n\n try:\n total_size, files = get_repo_size(api, repo_id)\n error = None\n except HfHubHTTPError as e:\n total_size = None\n files = []\n error = str(e)\n except Exception as e:\n total_size = None\n files = []\n error = f\"{type(e).__name__}: {e}\"\n\n rows.append({\n \"repo_id\": repo_id,\n \"size\": total_size,\n \"downloads\": getattr(model, \"downloads\", None),\n \"likes\": getattr(model, \"likes\", None),\n \"pipeline\": getattr(model, \"pipeline_tag\", None),\n \"error\": error,\n \"files\": files,\n })\n\n if size_sort == \"asc\":\n rows.sort(key=lambda r: r[\"size\"] or 0)\n elif size_sort == \"desc\":\n rows.sort(key=lambda r: r[\"size\"] or 0, reverse=True)\n\n print()\n print(f\"Search: {query}\")\n print()\n\n print(f\"{'SIZE':>12} {'DOWNLOADS':>10} {'LIKES':>7} {'TYPE':<24} MODEL\")\n print(\"-\" * 110)\n\n for row in rows:\n size = human_size(row[\"size\"])\n downloads = row[\"downloads\"] if row[\"downloads\"] is not None else \"-\"\n likes = row[\"likes\"] if row[\"likes\"] is not None else \"-\"\n pipeline = row[\"pipeline\"] or \"-\"\n\n print(f\"{size:>12} {downloads:>10} {likes:>7} {pipeline:<24} {row['repo_id']}\")\n\n if row[\"error\"]:\n print(f\"{'':>12} error: {row['error']}\")\n\n if show_files and row[\"files\"]:\n for filename, file_size in row[\"files\"][:top_files]:\n print(f\"{'':>12} {human_size(file_size):>10} {filename}\")\n print()\n\n return 0\n\n\nif **name** == “**main** ”:\nraise SystemExit(main())",
"title": "How can I tell the size of a model before downloading it?"
}