merge: add -x/--exclude flag for directory exclusion

This commit is contained in:
Jeff Smith 2026-04-06 14:32:17 -06:00
commit d323190866
6 changed files with 49 additions and 18 deletions

View file

@ -38,17 +38,21 @@ def _progress(label):
return on_file, finish return on_file, finish
def scan(target, depth=3, show_hidden=False): def scan(target, depth=3, show_hidden=False, exclude=None):
"""Run all analyses on the target directory and return a report dict.""" """Run all analyses on the target directory and return a report dict."""
report = {} report = {}
exclude = exclude or []
print(f" [scan] Building directory tree (depth={depth})...", file=sys.stderr) print(f" [scan] Building directory tree (depth={depth})...", file=sys.stderr)
tree = build_tree(target, max_depth=depth, show_hidden=show_hidden) tree = build_tree(target, max_depth=depth, show_hidden=show_hidden,
exclude=exclude)
report["tree"] = tree report["tree"] = tree
report["tree_rendered"] = render_tree(tree) report["tree_rendered"] = render_tree(tree)
on_file, finish = _progress("Classifying files") on_file, finish = _progress("Classifying files")
classified = classify_files(target, show_hidden=show_hidden, on_file=on_file) classified = classify_files(target, show_hidden=show_hidden,
exclude=exclude, on_file=on_file)
finish() finish()
report["file_categories"] = summarize_categories(classified) report["file_categories"] = summarize_categories(classified)
report["classified_files"] = classified report["classified_files"] = classified
@ -64,10 +68,11 @@ def scan(target, depth=3, show_hidden=False):
finish() finish()
print(" [scan] Finding recently modified files...", file=sys.stderr) print(" [scan] Finding recently modified files...", file=sys.stderr)
report["recent_files"] = find_recent_files(target, show_hidden=show_hidden) report["recent_files"] = find_recent_files(target, show_hidden=show_hidden,
exclude=exclude)
print(" [scan] Calculating disk usage...", file=sys.stderr) print(" [scan] Calculating disk usage...", file=sys.stderr)
usage = get_disk_usage(target, show_hidden=show_hidden) usage = get_disk_usage(target, show_hidden=show_hidden, exclude=exclude)
report["disk_usage"] = usage report["disk_usage"] = usage
report["top_directories"] = top_directories(usage, n=5) report["top_directories"] = top_directories(usage, n=5)
@ -101,6 +106,10 @@ def main():
help="Force a new AI investigation (ignore cached results)") help="Force a new AI investigation (ignore cached results)")
parser.add_argument("--install-extras", action="store_true", parser.add_argument("--install-extras", action="store_true",
help="Show status of optional AI dependencies") help="Show status of optional AI dependencies")
parser.add_argument("-x", "--exclude", metavar="DIR", action="append",
default=[],
help="Exclude a directory name from scan and analysis "
"(repeatable, e.g. -x .git -x node_modules)")
args = parser.parse_args() args = parser.parse_args()
@ -126,17 +135,22 @@ def main():
file=sys.stderr) file=sys.stderr)
sys.exit(1) sys.exit(1)
if args.exclude:
print(f" [scan] Excluding: {', '.join(args.exclude)}", file=sys.stderr)
if args.watch: if args.watch:
watch_loop(target, depth=args.depth, show_hidden=args.all, watch_loop(target, depth=args.depth, show_hidden=args.all,
json_output=args.json_output) json_output=args.json_output)
return return
report = scan(target, depth=args.depth, show_hidden=args.all) report = scan(target, depth=args.depth, show_hidden=args.all,
exclude=args.exclude)
flags = [] flags = []
if args.ai: if args.ai:
from luminos_lib.ai import analyze_directory from luminos_lib.ai import analyze_directory
brief, detailed, flags = analyze_directory(report, target, fresh=args.fresh) brief, detailed, flags = analyze_directory(
report, target, fresh=args.fresh, exclude=args.exclude)
report["ai_brief"] = brief report["ai_brief"] = brief
report["ai_detailed"] = detailed report["ai_detailed"] = detailed
report["flags"] = flags report["flags"] = flags

View file

@ -643,14 +643,16 @@ def _call_api_streaming(client, system, messages, tools, tracker):
# Directory discovery # Directory discovery
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _discover_directories(target, show_hidden=False): def _discover_directories(target, show_hidden=False, exclude=None):
"""Walk the target and return all directories sorted leaves-first.""" """Walk the target and return all directories sorted leaves-first."""
extra = set(exclude or [])
dirs = [] dirs = []
target_real = os.path.realpath(target) target_real = os.path.realpath(target)
for root, subdirs, _files in os.walk(target_real, topdown=True): for root, subdirs, _files in os.walk(target_real, topdown=True):
subdirs[:] = [ subdirs[:] = [
d for d in subdirs d for d in subdirs
if not _should_skip_dir(d) if not _should_skip_dir(d)
and d not in extra
and (show_hidden or not d.startswith(".")) and (show_hidden or not d.startswith("."))
] ]
dirs.append(root) dirs.append(root)
@ -1001,7 +1003,7 @@ def _synthesize_from_cache(cache):
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _run_investigation(client, target, report, show_hidden=False, def _run_investigation(client, target, report, show_hidden=False,
fresh=False, verbose=False): fresh=False, verbose=False, exclude=None):
"""Orchestrate the multi-pass investigation. Returns (brief, detailed, flags).""" """Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
investigation_id, is_new = _get_investigation_id(target, fresh=fresh) investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
cache = _CacheManager(investigation_id, target) cache = _CacheManager(investigation_id, target)
@ -1014,7 +1016,8 @@ def _run_investigation(client, target, report, show_hidden=False,
f"{'' if is_new else ' (resumed)'}", file=sys.stderr) f"{'' if is_new else ' (resumed)'}", file=sys.stderr)
print(f" [AI] Cache: {cache.root}/", file=sys.stderr) print(f" [AI] Cache: {cache.root}/", file=sys.stderr)
all_dirs = _discover_directories(target, show_hidden=show_hidden) all_dirs = _discover_directories(target, show_hidden=show_hidden,
exclude=exclude)
to_investigate = [] to_investigate = []
cached_count = 0 cached_count = 0
@ -1087,7 +1090,8 @@ def _run_investigation(client, target, report, show_hidden=False,
# Public interface # Public interface
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def analyze_directory(report, target, verbose_tools=False, fresh=False): def analyze_directory(report, target, verbose_tools=False, fresh=False,
exclude=None):
"""Run AI analysis on the directory. Returns (brief, detailed, flags). """Run AI analysis on the directory. Returns (brief, detailed, flags).
Returns ("", "", []) if the API key is missing or dependencies are not met. Returns ("", "", []) if the API key is missing or dependencies are not met.
@ -1106,6 +1110,7 @@ def analyze_directory(report, target, verbose_tools=False, fresh=False):
try: try:
brief, detailed, flags = _run_investigation( brief, detailed, flags = _run_investigation(
client, target, report, fresh=fresh, verbose=verbose_tools, client, target, report, fresh=fresh, verbose=verbose_tools,
exclude=exclude,
) )
except Exception as e: except Exception as e:
print(f"Warning: AI analysis failed: {e}", file=sys.stderr) print(f"Warning: AI analysis failed: {e}", file=sys.stderr)

View file

@ -3,12 +3,15 @@
import subprocess import subprocess
def get_disk_usage(target, show_hidden=False): def get_disk_usage(target, show_hidden=False, exclude=None):
"""Get per-directory disk usage via du. """Get per-directory disk usage via du.
Returns a list of dicts: {path, size_bytes, size_human}. Returns a list of dicts: {path, size_bytes, size_human}.
""" """
cmd = ["du", "-b", "--max-depth=2", target] cmd = ["du", "-b", "--max-depth=2"]
for name in (exclude or []):
cmd.append(f"--exclude={name}")
cmd.append(target)
try: try:
result = subprocess.run( result = subprocess.run(

View file

@ -86,7 +86,8 @@ def _classify_one(filepath):
return "unknown", desc return "unknown", desc
def classify_files(target, show_hidden=False, on_file=None): def classify_files(target, show_hidden=False, exclude=None, on_file=None):
exclude = exclude or []
"""Walk the target directory and classify every file. """Walk the target directory and classify every file.
Returns a list of dicts: {path, name, category, size, description}. Returns a list of dicts: {path, name, category, size, description}.
@ -94,8 +95,10 @@ def classify_files(target, show_hidden=False, on_file=None):
""" """
results = [] results = []
for root, dirs, files in os.walk(target): for root, dirs, files in os.walk(target):
dirs[:] = [d for d in dirs
if d not in exclude
and (show_hidden or not d.startswith("."))]
if not show_hidden: if not show_hidden:
dirs[:] = [d for d in dirs if not d.startswith(".")]
files = [f for f in files if not f.startswith(".")] files = [f for f in files if not f.startswith(".")]
for fname in files: for fname in files:
full = os.path.join(root, fname) full = os.path.join(root, fname)

View file

@ -5,7 +5,7 @@ import os
from datetime import datetime from datetime import datetime
def find_recent_files(target, n=10, show_hidden=False): def find_recent_files(target, n=10, show_hidden=False, exclude=None):
"""Find the n most recently modified files using find and stat. """Find the n most recently modified files using find and stat.
Returns a list of dicts: {path, name, modified, modified_human}. Returns a list of dicts: {path, name, modified, modified_human}.
@ -14,6 +14,9 @@ def find_recent_files(target, n=10, show_hidden=False):
cmd = ["find", target, "-type", "f"] cmd = ["find", target, "-type", "f"]
if not show_hidden: if not show_hidden:
cmd.extend(["-not", "-path", "*/.*"]) cmd.extend(["-not", "-path", "*/.*"])
for name in (exclude or []):
cmd.extend(["-not", "-path", f"*/{name}/*",
"-not", "-path", f"*/{name}"])
cmd.extend(["-printf", "%T@\t%p\n"]) cmd.extend(["-printf", "%T@\t%p\n"])
try: try:

View file

@ -3,7 +3,8 @@
import os import os
def build_tree(path, max_depth=3, show_hidden=False, _depth=0): def build_tree(path, max_depth=3, show_hidden=False, exclude=None, _depth=0):
exclude = exclude or []
"""Build a nested dict representing the directory tree with file sizes.""" """Build a nested dict representing the directory tree with file sizes."""
name = os.path.basename(path) or path name = os.path.basename(path) or path
node = {"name": name, "path": path, "type": "directory", "children": []} node = {"name": name, "path": path, "type": "directory", "children": []}
@ -17,10 +18,12 @@ def build_tree(path, max_depth=3, show_hidden=False, _depth=0):
for entry in entries: for entry in entries:
if not show_hidden and entry.startswith("."): if not show_hidden and entry.startswith("."):
continue continue
if entry in exclude:
continue
full = os.path.join(path, entry) full = os.path.join(path, entry)
if os.path.isdir(full): if os.path.isdir(full):
if _depth < max_depth: if _depth < max_depth:
child = build_tree(full, max_depth, show_hidden, _depth + 1) child = build_tree(full, max_depth, show_hidden, exclude, _depth + 1)
node["children"].append(child) node["children"].append(child)
else: else:
node["children"].append({ node["children"].append({