External Publication
Visit Post

How can I tell the size of a model before downloading it?

Hugging Face Forums [Unofficial] May 2, 2026
Source

Or depending on what hf_hub you hace

#!/usr/bin/env python3

from future import annotations

from huggingface_hub import HfApi from huggingface_hub.utils import HfHubHTTPError

def human_size(num_bytes: int | None) → str: if not num_bytes: return “0 B”

units = ["B", "KB", "MB", "GB", "TB"]
size = float(num_bytes)

for unit in units:
    if size < 1024 or unit == units[-1]:
        return f"{size:.2f} {unit}"
    size /= 1024

return f"{size:.2f} TB"

def prompt_int(label: str, default: int) → int: value = input(f"{label} [{default}]: ").strip()

if not value:
    return default

try:
    parsed = int(value)
    if parsed <= 0:
        print(f"Using default: {default}")
        return default
    return parsed
except ValueError:
    print(f"Invalid number. Using default: {default}")
    return default

def prompt_bool(label: str, default: bool = False) → bool: default_text = “y” if default else “n” value = input(f"{label} [y/n, default {default_text}]: ").strip().lower()

if not value:
    return default

return value in {"y", "yes", "true", "1"}

def prompt_choice(label: str, choices: list[str], default: str) → str: print(f"{label}:“) for index, choice in enumerate(choices, start=1): marker = " default” if choice == default else “” print(f" {index}. {choice}{marker}")

value = input(f"Choose 1-{len(choices)} [{default}]: ").strip()

if not value:
    return default

if value in choices:
    return value

try:
    index = int(value)
    if 1 <= index <= len(choices):
        return choices[index - 1]
except ValueError:
    pass

print(f"Invalid choice. Using default: {default}")
return default

def get_repo_size(api: HfApi, repo_id: str) → tuple[int, list[tuple[str, int]]]: info = api.model_info(repo_id, files_metadata=True)

files: list[tuple[str, int]] = []
total = 0

for sibling in info.siblings or []:
    name = getattr(sibling, "rfilename", None) or getattr(sibling, "filename", None) or "<unknown>"
    size = getattr(sibling, "size", None) or 0

    size = int(size)
    files.append((name, size))
    total += size

files.sort(key=lambda x: x[1], reverse=True)
return total, files

def get_model_id(model) → str | None: return getattr(model, “modelId”, None) or getattr(model, “id”, None)

def sort_models_locally(models: list, sort: str, ascending: bool) → list: reverse = not ascending

if sort == "downloads":
    models.sort(key=lambda m: getattr(m, "downloads", 0) or 0, reverse=reverse)
elif sort == "likes":
    models.sort(key=lambda m: getattr(m, "likes", 0) or 0, reverse=reverse)
elif sort == "lastModified":
    models.sort(key=lambda m: str(getattr(m, "lastModified", "") or ""), reverse=reverse)
elif sort == "createdAt":
    models.sort(key=lambda m: str(getattr(m, "createdAt", "") or ""), reverse=reverse)
elif sort == "trendingScore":
    models.sort(key=lambda m: getattr(m, "trendingScore", 0) or 0, reverse=reverse)

return models

def main() → int: print() print(“Hugging Face model size search”) print()

query = input("Search term, partial match is OK: ").strip()

if not query:
    print("No search term entered. Exiting.")
    return 1

limit = prompt_int("How many model results should I check?", 20)

sort = prompt_choice(
    "Sort Hugging Face search results by",
    ["downloads", "likes", "lastModified", "createdAt", "trendingScore"],
    "downloads",
)

ascending = prompt_bool("Ascending sort?", False)
show_files = prompt_bool("Show largest files for each model?", True)

top_files = 5
if show_files:
    top_files = prompt_int("How many largest files per model?", 5)

size_sort = prompt_choice(
    "Sort final output by computed repo size",
    ["desc", "asc", "none"],
    "desc",
)

api = HfApi()

print()
print(f"Searching Hugging Face for: {query}")
print()

try:
    models = list(api.list_models(
        search=query,
        limit=limit,
        sort=sort,
        full=True,
    ))
except TypeError:
    models = list(api.list_models(
        search=query,
        limit=limit,
        full=True,
    ))

models = sort_models_locally(models, sort=sort, ascending=ascending)

if not models:
    print(f"No models found for search term: {query}")
    return 1

rows = []

total_models = len(models)

for index, model in enumerate(models, start=1):
    repo_id = get_model_id(model)

    if not repo_id:
        continue

    print(f"[{index}/{total_models}] Checking {repo_id}...")

    try:
        total_size, files = get_repo_size(api, repo_id)
        error = None
    except HfHubHTTPError as e:
        total_size = None
        files = []
        error = str(e)
    except Exception as e:
        total_size = None
        files = []
        error = f"{type(e).__name__}: {e}"

    rows.append({
        "repo_id": repo_id,
        "size": total_size,
        "downloads": getattr(model, "downloads", None),
        "likes": getattr(model, "likes", None),
        "pipeline": getattr(model, "pipeline_tag", None),
        "error": error,
        "files": files,
    })

if size_sort == "asc":
    rows.sort(key=lambda r: r["size"] or 0)
elif size_sort == "desc":
    rows.sort(key=lambda r: r["size"] or 0, reverse=True)

print()
print(f"Search: {query}")
print()

print(f"{'SIZE':>12}  {'DOWNLOADS':>10}  {'LIKES':>7}  {'TYPE':<24}  MODEL")
print("-" * 110)

for row in rows:
    size = human_size(row["size"])
    downloads = row["downloads"] if row["downloads"] is not None else "-"
    likes = row["likes"] if row["likes"] is not None else "-"
    pipeline = row["pipeline"] or "-"

    print(f"{size:>12}  {downloads:>10}  {likes:>7}  {pipeline:<24}  {row['repo_id']}")

    if row["error"]:
        print(f"{'':>12}  error: {row['error']}")

    if show_files and row["files"]:
        for filename, file_size in row["files"][:top_files]:
            print(f"{'':>12}  {human_size(file_size):>10}  {filename}")
        print()

return 0

if name == “main ”: raise SystemExit(main())

Discussion in the ATmosphere

Loading comments...