merge: add -x/--exclude flag for directory exclusion

This commit is contained in:
Jeff Smith 2026-04-06 14:32:17 -06:00
commit d323190866
6 changed files with 49 additions and 18 deletions

View file

@ -38,17 +38,21 @@ def _progress(label):
return on_file, finish
def scan(target, depth=3, show_hidden=False):
def scan(target, depth=3, show_hidden=False, exclude=None):
"""Run all analyses on the target directory and return a report dict."""
report = {}
exclude = exclude or []
print(f" [scan] Building directory tree (depth={depth})...", file=sys.stderr)
tree = build_tree(target, max_depth=depth, show_hidden=show_hidden)
tree = build_tree(target, max_depth=depth, show_hidden=show_hidden,
exclude=exclude)
report["tree"] = tree
report["tree_rendered"] = render_tree(tree)
on_file, finish = _progress("Classifying files")
classified = classify_files(target, show_hidden=show_hidden, on_file=on_file)
classified = classify_files(target, show_hidden=show_hidden,
exclude=exclude, on_file=on_file)
finish()
report["file_categories"] = summarize_categories(classified)
report["classified_files"] = classified
@ -64,10 +68,11 @@ def scan(target, depth=3, show_hidden=False):
finish()
print(" [scan] Finding recently modified files...", file=sys.stderr)
report["recent_files"] = find_recent_files(target, show_hidden=show_hidden)
report["recent_files"] = find_recent_files(target, show_hidden=show_hidden,
exclude=exclude)
print(" [scan] Calculating disk usage...", file=sys.stderr)
usage = get_disk_usage(target, show_hidden=show_hidden)
usage = get_disk_usage(target, show_hidden=show_hidden, exclude=exclude)
report["disk_usage"] = usage
report["top_directories"] = top_directories(usage, n=5)
@ -101,6 +106,10 @@ def main():
help="Force a new AI investigation (ignore cached results)")
parser.add_argument("--install-extras", action="store_true",
help="Show status of optional AI dependencies")
parser.add_argument("-x", "--exclude", metavar="DIR", action="append",
default=[],
help="Exclude a directory name from scan and analysis "
"(repeatable, e.g. -x .git -x node_modules)")
args = parser.parse_args()
@ -126,17 +135,22 @@ def main():
file=sys.stderr)
sys.exit(1)
if args.exclude:
print(f" [scan] Excluding: {', '.join(args.exclude)}", file=sys.stderr)
if args.watch:
watch_loop(target, depth=args.depth, show_hidden=args.all,
json_output=args.json_output)
return
report = scan(target, depth=args.depth, show_hidden=args.all)
report = scan(target, depth=args.depth, show_hidden=args.all,
exclude=args.exclude)
flags = []
if args.ai:
from luminos_lib.ai import analyze_directory
brief, detailed, flags = analyze_directory(report, target, fresh=args.fresh)
brief, detailed, flags = analyze_directory(
report, target, fresh=args.fresh, exclude=args.exclude)
report["ai_brief"] = brief
report["ai_detailed"] = detailed
report["flags"] = flags

View file

@ -643,14 +643,16 @@ def _call_api_streaming(client, system, messages, tools, tracker):
# Directory discovery
# ---------------------------------------------------------------------------
def _discover_directories(target, show_hidden=False):
def _discover_directories(target, show_hidden=False, exclude=None):
"""Walk the target and return all directories sorted leaves-first."""
extra = set(exclude or [])
dirs = []
target_real = os.path.realpath(target)
for root, subdirs, _files in os.walk(target_real, topdown=True):
subdirs[:] = [
d for d in subdirs
if not _should_skip_dir(d)
and d not in extra
and (show_hidden or not d.startswith("."))
]
dirs.append(root)
@ -1001,7 +1003,7 @@ def _synthesize_from_cache(cache):
# ---------------------------------------------------------------------------
def _run_investigation(client, target, report, show_hidden=False,
fresh=False, verbose=False):
fresh=False, verbose=False, exclude=None):
"""Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
cache = _CacheManager(investigation_id, target)
@ -1014,7 +1016,8 @@ def _run_investigation(client, target, report, show_hidden=False,
f"{'' if is_new else ' (resumed)'}", file=sys.stderr)
print(f" [AI] Cache: {cache.root}/", file=sys.stderr)
all_dirs = _discover_directories(target, show_hidden=show_hidden)
all_dirs = _discover_directories(target, show_hidden=show_hidden,
exclude=exclude)
to_investigate = []
cached_count = 0
@ -1087,7 +1090,8 @@ def _run_investigation(client, target, report, show_hidden=False,
# Public interface
# ---------------------------------------------------------------------------
def analyze_directory(report, target, verbose_tools=False, fresh=False):
def analyze_directory(report, target, verbose_tools=False, fresh=False,
exclude=None):
"""Run AI analysis on the directory. Returns (brief, detailed, flags).
Returns ("", "", []) if the API key is missing or dependencies are not met.
@ -1106,6 +1110,7 @@ def analyze_directory(report, target, verbose_tools=False, fresh=False):
try:
brief, detailed, flags = _run_investigation(
client, target, report, fresh=fresh, verbose=verbose_tools,
exclude=exclude,
)
except Exception as e:
print(f"Warning: AI analysis failed: {e}", file=sys.stderr)

View file

@ -3,12 +3,15 @@
import subprocess
def get_disk_usage(target, show_hidden=False):
def get_disk_usage(target, show_hidden=False, exclude=None):
"""Get per-directory disk usage via du.
Returns a list of dicts: {path, size_bytes, size_human}.
"""
cmd = ["du", "-b", "--max-depth=2", target]
cmd = ["du", "-b", "--max-depth=2"]
for name in (exclude or []):
cmd.append(f"--exclude={name}")
cmd.append(target)
try:
result = subprocess.run(

View file

@ -86,7 +86,8 @@ def _classify_one(filepath):
return "unknown", desc
def classify_files(target, show_hidden=False, on_file=None):
def classify_files(target, show_hidden=False, exclude=None, on_file=None):
exclude = exclude or []
"""Walk the target directory and classify every file.
Returns a list of dicts: {path, name, category, size, description}.
@ -94,8 +95,10 @@ def classify_files(target, show_hidden=False, on_file=None):
"""
results = []
for root, dirs, files in os.walk(target):
dirs[:] = [d for d in dirs
if d not in exclude
and (show_hidden or not d.startswith("."))]
if not show_hidden:
dirs[:] = [d for d in dirs if not d.startswith(".")]
files = [f for f in files if not f.startswith(".")]
for fname in files:
full = os.path.join(root, fname)

View file

@ -5,7 +5,7 @@ import os
from datetime import datetime
def find_recent_files(target, n=10, show_hidden=False):
def find_recent_files(target, n=10, show_hidden=False, exclude=None):
"""Find the n most recently modified files using find and stat.
Returns a list of dicts: {path, name, modified, modified_human}.
@ -14,6 +14,9 @@ def find_recent_files(target, n=10, show_hidden=False):
cmd = ["find", target, "-type", "f"]
if not show_hidden:
cmd.extend(["-not", "-path", "*/.*"])
for name in (exclude or []):
cmd.extend(["-not", "-path", f"*/{name}/*",
"-not", "-path", f"*/{name}"])
cmd.extend(["-printf", "%T@\t%p\n"])
try:

View file

@ -3,7 +3,8 @@
import os
def build_tree(path, max_depth=3, show_hidden=False, _depth=0):
def build_tree(path, max_depth=3, show_hidden=False, exclude=None, _depth=0):
exclude = exclude or []
"""Build a nested dict representing the directory tree with file sizes."""
name = os.path.basename(path) or path
node = {"name": name, "path": path, "type": "directory", "children": []}
@ -17,10 +18,12 @@ def build_tree(path, max_depth=3, show_hidden=False, _depth=0):
for entry in entries:
if not show_hidden and entry.startswith("."):
continue
if entry in exclude:
continue
full = os.path.join(path, entry)
if os.path.isdir(full):
if _depth < max_depth:
child = build_tree(full, max_depth, show_hidden, _depth + 1)
child = build_tree(full, max_depth, show_hidden, exclude, _depth + 1)
node["children"].append(child)
else:
node["children"].append({