External Publication
Visit Post

How can I tell the size of a model before downloading it?

Hugging Face Forums [Unofficial] May 2, 2026
Source

Use Hugging Face’s generic search= parameter. It behaves like the site search: partial/fuzzy-ish matching across model names and metadata.

#!/usr/bin/env python3

from future import annotations

import argparse from huggingface_hub import HfApi from huggingface_hub.utils import HfHubHTTPError

def human_size(num_bytes: int | None) → str: if not num_bytes: return “0 B”

units = ["B", "KB", "MB", "GB", "TB"]
size = float(num_bytes)

for unit in units:
    if size < 1024 or unit == units[-1]:
        return f"{size:.2f} {unit}"
    size /= 1024

return f"{size:.2f} TB"

def get_repo_size(api: HfApi, repo_id: str) → tuple[int, list[tuple[str, int]]]: info = api.model_info(repo_id, files_metadata=True)

files = []
total = 0

for sibling in info.siblings or []:
    name = sibling.rfilename
    size = sibling.size or 0
    files.append((name, size))
    total += size

files.sort(key=lambda x: x[1], reverse=True)
return total, files

def main() → int: parser = argparse.ArgumentParser( description=“Search Hugging Face models by partial term and show approximate repo sizes.” ) parser.add_argument(“query”, help=“Partial search term, e.g. qwen, llama, flux, embedding, whisper”) parser.add_argument(“-n”, “–limit”, type=int, default=20, help=“Number of model results to check”) parser.add_argument(“–show-files”, action=“store_true”, help=“Show largest files per repo”) parser.add_argument(“–top-files”, type=int, default=5, help=“Number of files to show with --show-files”) parser.add_argument(“–sort”, default=“downloads”, help=“HF sort field: downloads, likes, lastModified, createdAt”) parser.add_argument(“–ascending”, action=“store_true”, help=“Sort HF results ascending”)

args = parser.parse_args()

api = HfApi()

models = api.list_models(
    search=args.query,
    limit=args.limit,
    sort=args.sort,
    direction=1 if args.ascending else -1,
    full=True,
)

rows = []

for model in models:
    repo_id = model.modelId

    try:
        total_size, files = get_repo_size(api, repo_id)
    except HfHubHTTPError as e:
        rows.append({
            "repo_id": repo_id,
            "size": None,
            "downloads": getattr(model, "downloads", None),
            "likes": getattr(model, "likes", None),
            "pipeline": getattr(model, "pipeline_tag", None),
            "error": str(e),
            "files": [],
        })
        continue

    rows.append({
        "repo_id": repo_id,
        "size": total_size,
        "downloads": getattr(model, "downloads", None),
        "likes": getattr(model, "likes", None),
        "pipeline": getattr(model, "pipeline_tag", None),
        "error": None,
        "files": files,
    })

rows.sort(key=lambda r: r["size"] or 0, reverse=True)

print()
print(f"Search: {args.query}")
print()

print(f"{'SIZE':>12}  {'DOWNLOADS':>10}  {'LIKES':>7}  {'TYPE':<24}  MODEL")
print("-" * 90)

for row in rows:
    size = human_size(row["size"])
    downloads = row["downloads"] if row["downloads"] is not None else "-"
    likes = row["likes"] if row["likes"] is not None else "-"
    pipeline = row["pipeline"] or "-"

    print(f"{size:>12}  {downloads:>10}  {likes:>7}  {pipeline:<24}  {row['repo_id']}")

    if row["error"]:
        print(f"{'':>12}  error: {row['error']}")

    if args.show_files and row["files"]:
        for filename, file_size in row["files"][:args.top_files]:
            print(f"{'':>12}  {human_size(file_size):>10}  {filename}")
        print()

return 0

if name == “main ”: raise SystemExit(main())

Discussion in the ATmosphere

Loading comments...