import os
import json
import shutil
from pathlib import Path
from collections import defaultdict
from datetime import datetime
# ============================================================
# CONFIGURATION
# ============================================================
REPORT_FILE = "folder_size_report.json"
BAR_WIDTH = 30 # width of ASCII bar chart
TOP_N = 10 # default top N items to show
# ============================================================
# FORMAT FILE SIZE
# ============================================================
def format_size(size_bytes):
if size_bytes < 0:
return "0 B"
for unit in ["B", "KB", "MB", "GB", "TB"]:
if size_bytes < 1024:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024
return f"{size_bytes:.1f} PB"
# ============================================================
# DRAW ASCII BAR
# ============================================================
def draw_bar(value, max_value, width=BAR_WIDTH):
if max_value == 0:
return ""
filled = int((value / max_value) * width)
return "█" * filled + "░" * (width - filled)
# ============================================================
# GET FOLDER SIZE (Recursive)
# ============================================================
def get_folder_size(path):
"""Recursively compute total size of a folder."""
total = 0
try:
for entry in os.scandir(path):
try:
if entry.is_file(follow_symlinks=False):
total += entry.stat().st_size
elif entry.is_dir(follow_symlinks=False):
total += get_folder_size(entry.path)
except (PermissionError, OSError):
continue
except (PermissionError, OSError):
pass
return total
# ============================================================
# SCAN: TOP-LEVEL BREAKDOWN
# ============================================================
def scan_top_level(root_path):
"""
Returns size of each immediate child (file or folder)
inside root_path.
"""
root = Path(root_path)
items = []
entries = sorted(root.iterdir(), key=lambda e: e.name)
total = len(entries)
print(f"\n Scanning {total} item(s) in '{root_path}'...")
for i, entry in enumerate(entries, 1):
print(f" [{i}/{total}] {entry.name[:40]}...", end="\r")
try:
if entry.is_file(follow_symlinks=False):
size = entry.stat().st_size
is_dir = False
modified = datetime.fromtimestamp(
entry.stat().st_mtime).strftime("%d-%m-%Y")
elif entry.is_dir(follow_symlinks=False):
size = get_folder_size(entry)
is_dir = True
modified = datetime.fromtimestamp(
entry.stat().st_mtime).strftime("%d-%m-%Y")
else:
continue
items.append({
"name": entry.name,
"path": str(entry),
"size": size,
"is_dir": is_dir,
"modified": modified
})
except (PermissionError, OSError):
continue
print(" " * 60, end="\r") # clear progress line
return sorted(items, key=lambda x: x["size"], reverse=True)
# ============================================================
# SCAN: FILE EXTENSION BREAKDOWN
# ============================================================
def scan_by_extension(root_path, recursive=True):
"""
Group total size by file extension across the folder.
"""
ext_map = defaultdict(lambda: {"size": 0, "count": 0})
root = Path(root_path)
all_files = root.rglob("*") if recursive else root.glob("*")
for f in all_files:
if f.is_file():
try:
ext = f.suffix.lower() or "(no extension)"
size = f.stat().st_size
ext_map[ext]["size"] += size
ext_map[ext]["count"] += 1
except (PermissionError, OSError):
continue
return dict(sorted(ext_map.items(),
key=lambda x: x[1]["size"], reverse=True))
# ============================================================
# SCAN: LARGEST FILES
# ============================================================
def find_largest_files(root_path, top_n=20, recursive=True):
"""Find the top N largest individual files."""
root = Path(root_path)
all_files = root.rglob("*") if recursive else root.glob("*")
files = []
for f in all_files:
if f.is_file():
try:
size = f.stat().st_size
mtime = datetime.fromtimestamp(
f.stat().st_mtime).strftime("%d-%m-%Y")
files.append((size, str(f), mtime))
except (PermissionError, OSError):
continue
return sorted(files, reverse=True)[:top_n]
# ============================================================
# SCAN: OLDEST / LARGEST FILES COMBO
# ============================================================
def find_old_large_files(root_path, days_old=365, min_size_mb=10):
"""Find files older than N days AND larger than min_size_mb."""
root = Path(root_path)
cutoff = datetime.now().timestamp() - (days_old * 86400)
min_bytes = min_size_mb * 1024 * 1024
results = []
for f in root.rglob("*"):
if f.is_file():
try:
stat = f.stat()
if stat.st_mtime < cutoff and stat.st_size >= min_bytes:
results.append({
"path": str(f),
"size": stat.st_size,
"modified": datetime.fromtimestamp(
stat.st_mtime).strftime("%d-%m-%Y")
})
except (PermissionError, OSError):
continue
return sorted(results, key=lambda x: x["size"], reverse=True)
# ============================================================
# DISPLAY: TOP-LEVEL BREAKDOWN
# ============================================================
def display_top_level(items, top_n=TOP_N):
if not items:
print("\n Folder is empty.")
return
shown = items[:top_n]
max_size = shown[0]["size"] if shown else 1
total = sum(i["size"] for i in items)
print("\n" + "="*70)
print(f" FOLDER SIZE BREAKDOWN (Top {min(top_n, len(items))} of {len(items)} items)")
print("="*70)
print(f" {'NAME':<28} {'SIZE':>10} {'%':>5} VISUAL")
print(" " + "-"*66)
for item in shown:
name = item["name"]
icon = "D" if item["is_dir"] else "F"
pct = (item["size"] / total * 100) if total > 0 else 0
bar = draw_bar(item["size"], max_size)
size_str = format_size(item["size"])
# Truncate long names
display_name = f"[{icon}] {name}"
if len(display_name) > 28:
display_name = display_name[:25] + "..."
print(f" {display_name:<28} {size_str:>10} {pct:>4.1f}% {bar}")
print(" " + "-"*66)
print(f" {'TOTAL':<28} {format_size(total):>10}")
print("="*70)
# Disk usage summary
try:
usage = shutil.disk_usage(items[0]["path"].rsplit(os.sep, 1)[0])
print(f"\n Disk Total : {format_size(usage.total)}")
print(f" Disk Used : {format_size(usage.used)} "
f"({usage.used/usage.total*100:.1f}%)")
print(f" Disk Free : {format_size(usage.free)}")
except Exception:
pass
# ============================================================
# DISPLAY: EXTENSION BREAKDOWN
# ============================================================
def display_extensions(ext_map, top_n=15):
if not ext_map:
print("\n No files found.")
return
items = list(ext_map.items())[:top_n]
max_size = items[0][1]["size"] if items else 1
total = sum(v["size"] for v in ext_map.values())
print("\n" + "="*65)
print(f" SIZE BY FILE TYPE (Top {min(top_n, len(ext_map))} of {len(ext_map)} types)")
print("="*65)
print(f" {'EXT':<14} {'SIZE':>10} {'COUNT':>7} {'%':>5} VISUAL")
print(" " + "-"*60)
for ext, data in items:
pct = (data["size"] / total * 100) if total > 0 else 0
bar = draw_bar(data["size"], max_size, width=20)
size_str = format_size(data["size"])
print(f" {ext:<14} {size_str:>10} {data['count']:>7} "
f"{pct:>4.1f}% {bar}")
print(" " + "-"*60)
print(f" {'TOTAL':<14} {format_size(total):>10} "
f"{sum(v['count'] for v in ext_map.values()):>7}")
print("="*65)
# ============================================================
# DISPLAY: LARGEST FILES
# ============================================================
def display_largest_files(files, top_n=20):
if not files:
print("\n No files found.")
return
max_size = files[0][0] if files else 1
print("\n" + "="*70)
print(f" TOP {len(files)} LARGEST FILES")
print("="*70)
print(f" {'#':<4} {'SIZE':>10} {'MODIFIED':<12} FILE")
print(" " + "-"*65)
for i, (size, path, mtime) in enumerate(files, 1):
bar = draw_bar(size, max_size, width=12)
size_str = format_size(size)
name = Path(path).name
# Truncate long paths
display_path = path if len(path) <= 45 else "..." + path[-42:]
print(f" {i:<4} {size_str:>10} {mtime:<12} {display_path}")
print("="*70)
# ============================================================
# DISPLAY: OLD + LARGE FILES
# ============================================================
def display_old_large(results, days_old, min_size_mb):
if not results:
print(f"\n No files found older than {days_old} days "
f"and larger than {min_size_mb} MB.")
return
total_size = sum(r["size"] for r in results)
print("\n" + "="*70)
print(f" OLD + LARGE FILES (>{days_old} days old, >{min_size_mb} MB)")
print("="*70)
print(f" Found {len(results)} file(s) using {format_size(total_size)} total\n")
for i, r in enumerate(results, 1):
print(f" [{i:02d}] {format_size(r['size']):>10} "
f"Modified: {r['modified']} {r['path']}")
print("\n" + "="*70)
print(f" Potential space to reclaim: {format_size(total_size)}")
print("="*70)
# ============================================================
# SAVE REPORT
# ============================================================
def save_report(data, report_type, folder):
report = {
"report_type": report_type,
"folder": folder,
"generated_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),
"data": data
}
with open(REPORT_FILE, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2, default=str)
print(f"\n Report saved: {REPORT_FILE}")
# ============================================================
# MAIN MENU
# ============================================================
def print_menu():
print("\n" + "-"*48)
print(" FOLDER SIZE ANALYZER")
print("-"*48)
print(" 1. Top-level size breakdown")
print(" 2. Breakdown by file type/extension")
print(" 3. Find largest files")
print(" 4. Find old + large files (cleanup hints)")
print(" 5. Full analysis (all of the above)")
print(" 6. Save last report to JSON")
print(" 0. Exit")
print("-"*48)
def main():
print("\n" + "="*55)
print(" FOLDER SIZE ANALYZER")
print("="*55)
print("\n Visualize what's eating your disk space.")
print(" Uses ASCII bar charts for instant insight.\n")
last_data = {}
last_folder = ""
while True:
print_menu()
choice = input(" > ").strip()
if choice not in ["0", "6"] and not last_folder or choice in ["1","2","3","4","5"]:
folder = input("\n Enter folder path to analyze: ").strip()
if not os.path.isdir(folder):
print(" Invalid folder path.")
continue
last_folder = folder
if choice == "1":
items = scan_top_level(last_folder)
top_n = input(f" Show top N items (default {TOP_N}): ").strip()
top_n = int(top_n) if top_n.isdigit() else TOP_N
display_top_level(items, top_n)
last_data = {"items": [
{**i, "size": format_size(i["size"])} for i in items
]}
elif choice == "2":
recursive = input(" Include subfolders? (y/n, default y): ").strip().lower()
recursive = recursive != "n"
ext_map = scan_by_extension(last_folder, recursive)
display_extensions(ext_map)
last_data = {
ext: {"size": format_size(v["size"]), "count": v["count"]}
for ext, v in ext_map.items()
}
elif choice == "3":
top_n = input(f" How many largest files to show (default 20): ").strip()
top_n = int(top_n) if top_n.isdigit() else 20
files = find_largest_files(last_folder, top_n)
display_largest_files(files, top_n)
last_data = [
{"size": format_size(s), "path": p, "modified": m}
for s, p, m in files
]
elif choice == "4":
days = input(" Older than how many days? (default 365): ").strip()
days = int(days) if days.isdigit() else 365
size = input(" Minimum file size in MB (default 10): ").strip()
size = int(size) if size.isdigit() else 10
results = find_old_large_files(last_folder, days, size)
display_old_large(results, days, size)
last_data = [
{**r, "size": format_size(r["size"])} for r in results
]
elif choice == "5":
print("\n Running full analysis...\n")
items = scan_top_level(last_folder)
display_top_level(items, TOP_N)
ext_map = scan_by_extension(last_folder)
display_extensions(ext_map)
files = find_largest_files(last_folder, 10)
display_largest_files(files, 10)
results = find_old_large_files(last_folder, 365, 10)
display_old_large(results, 365, 10)
last_data = {
"top_level": [
{**i, "size": format_size(i["size"])} for i in items
],
"by_extension": {
ext: {"size": format_size(v["size"]), "count": v["count"]}
for ext, v in ext_map.items()
},
"largest_files": [
{"size": format_size(s), "path": p, "modified": m}
for s, p, m in files
],
"old_large_files": [
{**r, "size": format_size(r["size"])} for r in results
]
}
elif choice == "6":
if not last_data:
print("\n No analysis data yet. Run an analysis first.")
else:
save_report(last_data, "folder_analysis", last_folder)
elif choice == "0":
print("\n Goodbye!\n")
break
else:
print(" Invalid choice.")
# ============================================================
# RUN
# ============================================================
if __name__ == "__main__":
main()
No comments:
Post a Comment