Folder Size Analyzer

import os

import json

import shutil

from pathlib import Path

from collections import defaultdict

from datetime import datetime


# ============================================================

# CONFIGURATION

# ============================================================


REPORT_FILE = "folder_size_report.json"

BAR_WIDTH   = 30    # width of ASCII bar chart

TOP_N       = 10    # default top N items to show


# ============================================================

# FORMAT FILE SIZE

# ============================================================


def format_size(size_bytes):

    if size_bytes < 0:

        return "0 B"

    for unit in ["B", "KB", "MB", "GB", "TB"]:

        if size_bytes < 1024:

            return f"{size_bytes:.1f} {unit}"

        size_bytes /= 1024

    return f"{size_bytes:.1f} PB"



# ============================================================

# DRAW ASCII BAR

# ============================================================


def draw_bar(value, max_value, width=BAR_WIDTH):

    if max_value == 0:

        return ""

    filled = int((value / max_value) * width)

    return "█" * filled + "░" * (width - filled)



# ============================================================

# GET FOLDER SIZE (Recursive)

# ============================================================


def get_folder_size(path):

    """Recursively compute total size of a folder."""

    total = 0

    try:

        for entry in os.scandir(path):

            try:

                if entry.is_file(follow_symlinks=False):

                    total += entry.stat().st_size

                elif entry.is_dir(follow_symlinks=False):

                    total += get_folder_size(entry.path)

            except (PermissionError, OSError):

                continue

    except (PermissionError, OSError):

        pass

    return total



# ============================================================

# SCAN: TOP-LEVEL BREAKDOWN

# ============================================================


def scan_top_level(root_path):

    """

    Returns size of each immediate child (file or folder)

    inside root_path.

    """

    root  = Path(root_path)

    items = []


    entries = sorted(root.iterdir(), key=lambda e: e.name)

    total   = len(entries)


    print(f"\n  Scanning {total} item(s) in '{root_path}'...")


    for i, entry in enumerate(entries, 1):

        print(f"  [{i}/{total}] {entry.name[:40]}...", end="\r")

        try:

            if entry.is_file(follow_symlinks=False):

                size     = entry.stat().st_size

                is_dir   = False

                modified = datetime.fromtimestamp(

                    entry.stat().st_mtime).strftime("%d-%m-%Y")

            elif entry.is_dir(follow_symlinks=False):

                size     = get_folder_size(entry)

                is_dir   = True

                modified = datetime.fromtimestamp(

                    entry.stat().st_mtime).strftime("%d-%m-%Y")

            else:

                continue


            items.append({

                "name":     entry.name,

                "path":     str(entry),

                "size":     size,

                "is_dir":   is_dir,

                "modified": modified

            })

        except (PermissionError, OSError):

            continue


    print(" " * 60, end="\r")  # clear progress line

    return sorted(items, key=lambda x: x["size"], reverse=True)



# ============================================================

# SCAN: FILE EXTENSION BREAKDOWN

# ============================================================


def scan_by_extension(root_path, recursive=True):

    """

    Group total size by file extension across the folder.

    """

    ext_map   = defaultdict(lambda: {"size": 0, "count": 0})

    root      = Path(root_path)

    all_files = root.rglob("*") if recursive else root.glob("*")


    for f in all_files:

        if f.is_file():

            try:

                ext  = f.suffix.lower() or "(no extension)"

                size = f.stat().st_size

                ext_map[ext]["size"]  += size

                ext_map[ext]["count"] += 1

            except (PermissionError, OSError):

                continue


    return dict(sorted(ext_map.items(),

                        key=lambda x: x[1]["size"], reverse=True))



# ============================================================

# SCAN: LARGEST FILES

# ============================================================


def find_largest_files(root_path, top_n=20, recursive=True):

    """Find the top N largest individual files."""

    root      = Path(root_path)

    all_files = root.rglob("*") if recursive else root.glob("*")

    files     = []


    for f in all_files:

        if f.is_file():

            try:

                size = f.stat().st_size

                mtime = datetime.fromtimestamp(

                    f.stat().st_mtime).strftime("%d-%m-%Y")

                files.append((size, str(f), mtime))

            except (PermissionError, OSError):

                continue


    return sorted(files, reverse=True)[:top_n]



# ============================================================

# SCAN: OLDEST / LARGEST FILES COMBO

# ============================================================


def find_old_large_files(root_path, days_old=365, min_size_mb=10):

    """Find files older than N days AND larger than min_size_mb."""

    root       = Path(root_path)

    cutoff     = datetime.now().timestamp() - (days_old * 86400)

    min_bytes  = min_size_mb * 1024 * 1024

    results    = []


    for f in root.rglob("*"):

        if f.is_file():

            try:

                stat = f.stat()

                if stat.st_mtime < cutoff and stat.st_size >= min_bytes:

                    results.append({

                        "path":     str(f),

                        "size":     stat.st_size,

                        "modified": datetime.fromtimestamp(

                            stat.st_mtime).strftime("%d-%m-%Y")

                    })

            except (PermissionError, OSError):

                continue


    return sorted(results, key=lambda x: x["size"], reverse=True)



# ============================================================

# DISPLAY: TOP-LEVEL BREAKDOWN

# ============================================================


def display_top_level(items, top_n=TOP_N):

    if not items:

        print("\n  Folder is empty.")

        return


    shown    = items[:top_n]

    max_size = shown[0]["size"] if shown else 1

    total    = sum(i["size"] for i in items)


    print("\n" + "="*70)

    print(f"  FOLDER SIZE BREAKDOWN  (Top {min(top_n, len(items))} of {len(items)} items)")

    print("="*70)

    print(f"  {'NAME':<28} {'SIZE':>10}  {'%':>5}  VISUAL")

    print("  " + "-"*66)


    for item in shown:

        name     = item["name"]

        icon     = "D" if item["is_dir"] else "F"

        pct      = (item["size"] / total * 100) if total > 0 else 0

        bar      = draw_bar(item["size"], max_size)

        size_str = format_size(item["size"])

        # Truncate long names

        display_name = f"[{icon}] {name}"

        if len(display_name) > 28:

            display_name = display_name[:25] + "..."


        print(f"  {display_name:<28} {size_str:>10}  {pct:>4.1f}%  {bar}")


    print("  " + "-"*66)

    print(f"  {'TOTAL':<28} {format_size(total):>10}")

    print("="*70)


    # Disk usage summary

    try:

        usage = shutil.disk_usage(items[0]["path"].rsplit(os.sep, 1)[0])

        print(f"\n  Disk Total : {format_size(usage.total)}")

        print(f"  Disk Used  : {format_size(usage.used)}  "

              f"({usage.used/usage.total*100:.1f}%)")

        print(f"  Disk Free  : {format_size(usage.free)}")

    except Exception:

        pass



# ============================================================

# DISPLAY: EXTENSION BREAKDOWN

# ============================================================


def display_extensions(ext_map, top_n=15):

    if not ext_map:

        print("\n  No files found.")

        return


    items    = list(ext_map.items())[:top_n]

    max_size = items[0][1]["size"] if items else 1

    total    = sum(v["size"] for v in ext_map.values())


    print("\n" + "="*65)

    print(f"  SIZE BY FILE TYPE  (Top {min(top_n, len(ext_map))} of {len(ext_map)} types)")

    print("="*65)

    print(f"  {'EXT':<14} {'SIZE':>10}  {'COUNT':>7}  {'%':>5}  VISUAL")

    print("  " + "-"*60)


    for ext, data in items:

        pct      = (data["size"] / total * 100) if total > 0 else 0

        bar      = draw_bar(data["size"], max_size, width=20)

        size_str = format_size(data["size"])

        print(f"  {ext:<14} {size_str:>10}  {data['count']:>7}  "

              f"{pct:>4.1f}%  {bar}")


    print("  " + "-"*60)

    print(f"  {'TOTAL':<14} {format_size(total):>10}  "

          f"{sum(v['count'] for v in ext_map.values()):>7}")

    print("="*65)



# ============================================================

# DISPLAY: LARGEST FILES

# ============================================================


def display_largest_files(files, top_n=20):

    if not files:

        print("\n  No files found.")

        return


    max_size = files[0][0] if files else 1


    print("\n" + "="*70)

    print(f"  TOP {len(files)} LARGEST FILES")

    print("="*70)

    print(f"  {'#':<4} {'SIZE':>10}  {'MODIFIED':<12}  FILE")

    print("  " + "-"*65)


    for i, (size, path, mtime) in enumerate(files, 1):

        bar      = draw_bar(size, max_size, width=12)

        size_str = format_size(size)

        name     = Path(path).name

        # Truncate long paths

        display_path = path if len(path) <= 45 else "..." + path[-42:]

        print(f"  {i:<4} {size_str:>10}  {mtime:<12}  {display_path}")


    print("="*70)



# ============================================================

# DISPLAY: OLD + LARGE FILES

# ============================================================


def display_old_large(results, days_old, min_size_mb):

    if not results:

        print(f"\n  No files found older than {days_old} days "

              f"and larger than {min_size_mb} MB.")

        return


    total_size = sum(r["size"] for r in results)


    print("\n" + "="*70)

    print(f"  OLD + LARGE FILES  (>{days_old} days old, >{min_size_mb} MB)")

    print("="*70)

    print(f"  Found {len(results)} file(s) using {format_size(total_size)} total\n")


    for i, r in enumerate(results, 1):

        print(f"  [{i:02d}] {format_size(r['size']):>10}  "

              f"Modified: {r['modified']}  {r['path']}")


    print("\n" + "="*70)

    print(f"  Potential space to reclaim: {format_size(total_size)}")

    print("="*70)



# ============================================================

# SAVE REPORT

# ============================================================


def save_report(data, report_type, folder):

    report = {

        "report_type":   report_type,

        "folder":        folder,

        "generated_at":  datetime.now().strftime("%d-%m-%Y %H:%M:%S"),

        "data":          data

    }

    with open(REPORT_FILE, "w", encoding="utf-8") as f:

        json.dump(report, f, indent=2, default=str)

    print(f"\n  Report saved: {REPORT_FILE}")



# ============================================================

# MAIN MENU

# ============================================================


def print_menu():

    print("\n" + "-"*48)

    print("  FOLDER SIZE ANALYZER")

    print("-"*48)

    print("  1. Top-level size breakdown")

    print("  2. Breakdown by file type/extension")

    print("  3. Find largest files")

    print("  4. Find old + large files (cleanup hints)")

    print("  5. Full analysis (all of the above)")

    print("  6. Save last report to JSON")

    print("  0. Exit")

    print("-"*48)



def main():

    print("\n" + "="*55)

    print("     FOLDER SIZE ANALYZER")

    print("="*55)

    print("\n  Visualize what's eating your disk space.")

    print("  Uses ASCII bar charts for instant insight.\n")


    last_data   = {}

    last_folder = ""


    while True:

        print_menu()

        choice = input("  > ").strip()


        if choice not in ["0", "6"] and not last_folder or choice in ["1","2","3","4","5"]:

            folder = input("\n  Enter folder path to analyze: ").strip()

            if not os.path.isdir(folder):

                print("  Invalid folder path.")

                continue

            last_folder = folder


        if choice == "1":

            items = scan_top_level(last_folder)

            top_n = input(f"  Show top N items (default {TOP_N}): ").strip()

            top_n = int(top_n) if top_n.isdigit() else TOP_N

            display_top_level(items, top_n)

            last_data = {"items": [

                {**i, "size": format_size(i["size"])} for i in items

            ]}


        elif choice == "2":

            recursive = input("  Include subfolders? (y/n, default y): ").strip().lower()

            recursive = recursive != "n"

            ext_map   = scan_by_extension(last_folder, recursive)

            display_extensions(ext_map)

            last_data = {

                ext: {"size": format_size(v["size"]), "count": v["count"]}

                for ext, v in ext_map.items()

            }


        elif choice == "3":

            top_n = input(f"  How many largest files to show (default 20): ").strip()

            top_n = int(top_n) if top_n.isdigit() else 20

            files = find_largest_files(last_folder, top_n)

            display_largest_files(files, top_n)

            last_data = [

                {"size": format_size(s), "path": p, "modified": m}

                for s, p, m in files

            ]


        elif choice == "4":

            days = input("  Older than how many days? (default 365): ").strip()

            days = int(days) if days.isdigit() else 365

            size = input("  Minimum file size in MB (default 10): ").strip()

            size = int(size) if size.isdigit() else 10

            results = find_old_large_files(last_folder, days, size)

            display_old_large(results, days, size)

            last_data = [

                {**r, "size": format_size(r["size"])} for r in results

            ]


        elif choice == "5":

            print("\n  Running full analysis...\n")


            items = scan_top_level(last_folder)

            display_top_level(items, TOP_N)


            ext_map = scan_by_extension(last_folder)

            display_extensions(ext_map)


            files = find_largest_files(last_folder, 10)

            display_largest_files(files, 10)


            results = find_old_large_files(last_folder, 365, 10)

            display_old_large(results, 365, 10)


            last_data = {

                "top_level":    [

                    {**i, "size": format_size(i["size"])} for i in items

                ],

                "by_extension": {

                    ext: {"size": format_size(v["size"]), "count": v["count"]}

                    for ext, v in ext_map.items()

                },

                "largest_files": [

                    {"size": format_size(s), "path": p, "modified": m}

                    for s, p, m in files

                ],

                "old_large_files": [

                    {**r, "size": format_size(r["size"])} for r in results

                ]

            }


        elif choice == "6":

            if not last_data:

                print("\n  No analysis data yet. Run an analysis first.")

            else:

                save_report(last_data, "folder_analysis", last_folder)


        elif choice == "0":

            print("\n  Goodbye!\n")

            break


        else:

            print("  Invalid choice.")



# ============================================================

# RUN

# ============================================================


if __name__ == "__main__":

    main()

No comments: