feat: add -x/--exclude flag to exclude directories from scan and AI analysis
This commit is contained in:
parent
78f80c31ed
commit
78f9a396dd
6 changed files with 49 additions and 18 deletions
28
luminos.py
28
luminos.py
|
|
@ -38,17 +38,21 @@ def _progress(label):
|
||||||
return on_file, finish
|
return on_file, finish
|
||||||
|
|
||||||
|
|
||||||
def scan(target, depth=3, show_hidden=False):
|
def scan(target, depth=3, show_hidden=False, exclude=None):
|
||||||
"""Run all analyses on the target directory and return a report dict."""
|
"""Run all analyses on the target directory and return a report dict."""
|
||||||
report = {}
|
report = {}
|
||||||
|
|
||||||
|
exclude = exclude or []
|
||||||
|
|
||||||
print(f" [scan] Building directory tree (depth={depth})...", file=sys.stderr)
|
print(f" [scan] Building directory tree (depth={depth})...", file=sys.stderr)
|
||||||
tree = build_tree(target, max_depth=depth, show_hidden=show_hidden)
|
tree = build_tree(target, max_depth=depth, show_hidden=show_hidden,
|
||||||
|
exclude=exclude)
|
||||||
report["tree"] = tree
|
report["tree"] = tree
|
||||||
report["tree_rendered"] = render_tree(tree)
|
report["tree_rendered"] = render_tree(tree)
|
||||||
|
|
||||||
on_file, finish = _progress("Classifying files")
|
on_file, finish = _progress("Classifying files")
|
||||||
classified = classify_files(target, show_hidden=show_hidden, on_file=on_file)
|
classified = classify_files(target, show_hidden=show_hidden,
|
||||||
|
exclude=exclude, on_file=on_file)
|
||||||
finish()
|
finish()
|
||||||
report["file_categories"] = summarize_categories(classified)
|
report["file_categories"] = summarize_categories(classified)
|
||||||
report["classified_files"] = classified
|
report["classified_files"] = classified
|
||||||
|
|
@ -64,10 +68,11 @@ def scan(target, depth=3, show_hidden=False):
|
||||||
finish()
|
finish()
|
||||||
|
|
||||||
print(" [scan] Finding recently modified files...", file=sys.stderr)
|
print(" [scan] Finding recently modified files...", file=sys.stderr)
|
||||||
report["recent_files"] = find_recent_files(target, show_hidden=show_hidden)
|
report["recent_files"] = find_recent_files(target, show_hidden=show_hidden,
|
||||||
|
exclude=exclude)
|
||||||
|
|
||||||
print(" [scan] Calculating disk usage...", file=sys.stderr)
|
print(" [scan] Calculating disk usage...", file=sys.stderr)
|
||||||
usage = get_disk_usage(target, show_hidden=show_hidden)
|
usage = get_disk_usage(target, show_hidden=show_hidden, exclude=exclude)
|
||||||
report["disk_usage"] = usage
|
report["disk_usage"] = usage
|
||||||
report["top_directories"] = top_directories(usage, n=5)
|
report["top_directories"] = top_directories(usage, n=5)
|
||||||
|
|
||||||
|
|
@ -101,6 +106,10 @@ def main():
|
||||||
help="Force a new AI investigation (ignore cached results)")
|
help="Force a new AI investigation (ignore cached results)")
|
||||||
parser.add_argument("--install-extras", action="store_true",
|
parser.add_argument("--install-extras", action="store_true",
|
||||||
help="Show status of optional AI dependencies")
|
help="Show status of optional AI dependencies")
|
||||||
|
parser.add_argument("-x", "--exclude", metavar="DIR", action="append",
|
||||||
|
default=[],
|
||||||
|
help="Exclude a directory name from scan and analysis "
|
||||||
|
"(repeatable, e.g. -x .git -x node_modules)")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
@ -126,17 +135,22 @@ def main():
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
if args.exclude:
|
||||||
|
print(f" [scan] Excluding: {', '.join(args.exclude)}", file=sys.stderr)
|
||||||
|
|
||||||
if args.watch:
|
if args.watch:
|
||||||
watch_loop(target, depth=args.depth, show_hidden=args.all,
|
watch_loop(target, depth=args.depth, show_hidden=args.all,
|
||||||
json_output=args.json_output)
|
json_output=args.json_output)
|
||||||
return
|
return
|
||||||
|
|
||||||
report = scan(target, depth=args.depth, show_hidden=args.all)
|
report = scan(target, depth=args.depth, show_hidden=args.all,
|
||||||
|
exclude=args.exclude)
|
||||||
|
|
||||||
flags = []
|
flags = []
|
||||||
if args.ai:
|
if args.ai:
|
||||||
from luminos_lib.ai import analyze_directory
|
from luminos_lib.ai import analyze_directory
|
||||||
brief, detailed, flags = analyze_directory(report, target, fresh=args.fresh)
|
brief, detailed, flags = analyze_directory(
|
||||||
|
report, target, fresh=args.fresh, exclude=args.exclude)
|
||||||
report["ai_brief"] = brief
|
report["ai_brief"] = brief
|
||||||
report["ai_detailed"] = detailed
|
report["ai_detailed"] = detailed
|
||||||
report["flags"] = flags
|
report["flags"] = flags
|
||||||
|
|
|
||||||
|
|
@ -643,14 +643,16 @@ def _call_api_streaming(client, system, messages, tools, tracker):
|
||||||
# Directory discovery
|
# Directory discovery
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _discover_directories(target, show_hidden=False):
|
def _discover_directories(target, show_hidden=False, exclude=None):
|
||||||
"""Walk the target and return all directories sorted leaves-first."""
|
"""Walk the target and return all directories sorted leaves-first."""
|
||||||
|
extra = set(exclude or [])
|
||||||
dirs = []
|
dirs = []
|
||||||
target_real = os.path.realpath(target)
|
target_real = os.path.realpath(target)
|
||||||
for root, subdirs, _files in os.walk(target_real, topdown=True):
|
for root, subdirs, _files in os.walk(target_real, topdown=True):
|
||||||
subdirs[:] = [
|
subdirs[:] = [
|
||||||
d for d in subdirs
|
d for d in subdirs
|
||||||
if not _should_skip_dir(d)
|
if not _should_skip_dir(d)
|
||||||
|
and d not in extra
|
||||||
and (show_hidden or not d.startswith("."))
|
and (show_hidden or not d.startswith("."))
|
||||||
]
|
]
|
||||||
dirs.append(root)
|
dirs.append(root)
|
||||||
|
|
@ -1001,7 +1003,7 @@ def _synthesize_from_cache(cache):
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _run_investigation(client, target, report, show_hidden=False,
|
def _run_investigation(client, target, report, show_hidden=False,
|
||||||
fresh=False, verbose=False):
|
fresh=False, verbose=False, exclude=None):
|
||||||
"""Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
|
"""Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
|
||||||
investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
|
investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
|
||||||
cache = _CacheManager(investigation_id, target)
|
cache = _CacheManager(investigation_id, target)
|
||||||
|
|
@ -1014,7 +1016,8 @@ def _run_investigation(client, target, report, show_hidden=False,
|
||||||
f"{'' if is_new else ' (resumed)'}", file=sys.stderr)
|
f"{'' if is_new else ' (resumed)'}", file=sys.stderr)
|
||||||
print(f" [AI] Cache: {cache.root}/", file=sys.stderr)
|
print(f" [AI] Cache: {cache.root}/", file=sys.stderr)
|
||||||
|
|
||||||
all_dirs = _discover_directories(target, show_hidden=show_hidden)
|
all_dirs = _discover_directories(target, show_hidden=show_hidden,
|
||||||
|
exclude=exclude)
|
||||||
|
|
||||||
to_investigate = []
|
to_investigate = []
|
||||||
cached_count = 0
|
cached_count = 0
|
||||||
|
|
@ -1087,7 +1090,8 @@ def _run_investigation(client, target, report, show_hidden=False,
|
||||||
# Public interface
|
# Public interface
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def analyze_directory(report, target, verbose_tools=False, fresh=False):
|
def analyze_directory(report, target, verbose_tools=False, fresh=False,
|
||||||
|
exclude=None):
|
||||||
"""Run AI analysis on the directory. Returns (brief, detailed, flags).
|
"""Run AI analysis on the directory. Returns (brief, detailed, flags).
|
||||||
|
|
||||||
Returns ("", "", []) if the API key is missing or dependencies are not met.
|
Returns ("", "", []) if the API key is missing or dependencies are not met.
|
||||||
|
|
@ -1106,6 +1110,7 @@ def analyze_directory(report, target, verbose_tools=False, fresh=False):
|
||||||
try:
|
try:
|
||||||
brief, detailed, flags = _run_investigation(
|
brief, detailed, flags = _run_investigation(
|
||||||
client, target, report, fresh=fresh, verbose=verbose_tools,
|
client, target, report, fresh=fresh, verbose=verbose_tools,
|
||||||
|
exclude=exclude,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: AI analysis failed: {e}", file=sys.stderr)
|
print(f"Warning: AI analysis failed: {e}", file=sys.stderr)
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,15 @@
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
def get_disk_usage(target, show_hidden=False):
|
def get_disk_usage(target, show_hidden=False, exclude=None):
|
||||||
"""Get per-directory disk usage via du.
|
"""Get per-directory disk usage via du.
|
||||||
|
|
||||||
Returns a list of dicts: {path, size_bytes, size_human}.
|
Returns a list of dicts: {path, size_bytes, size_human}.
|
||||||
"""
|
"""
|
||||||
cmd = ["du", "-b", "--max-depth=2", target]
|
cmd = ["du", "-b", "--max-depth=2"]
|
||||||
|
for name in (exclude or []):
|
||||||
|
cmd.append(f"--exclude={name}")
|
||||||
|
cmd.append(target)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,8 @@ def _classify_one(filepath):
|
||||||
return "unknown", desc
|
return "unknown", desc
|
||||||
|
|
||||||
|
|
||||||
def classify_files(target, show_hidden=False, on_file=None):
|
def classify_files(target, show_hidden=False, exclude=None, on_file=None):
|
||||||
|
exclude = exclude or []
|
||||||
"""Walk the target directory and classify every file.
|
"""Walk the target directory and classify every file.
|
||||||
|
|
||||||
Returns a list of dicts: {path, name, category, size, description}.
|
Returns a list of dicts: {path, name, category, size, description}.
|
||||||
|
|
@ -94,8 +95,10 @@ def classify_files(target, show_hidden=False, on_file=None):
|
||||||
"""
|
"""
|
||||||
results = []
|
results = []
|
||||||
for root, dirs, files in os.walk(target):
|
for root, dirs, files in os.walk(target):
|
||||||
|
dirs[:] = [d for d in dirs
|
||||||
|
if d not in exclude
|
||||||
|
and (show_hidden or not d.startswith("."))]
|
||||||
if not show_hidden:
|
if not show_hidden:
|
||||||
dirs[:] = [d for d in dirs if not d.startswith(".")]
|
|
||||||
files = [f for f in files if not f.startswith(".")]
|
files = [f for f in files if not f.startswith(".")]
|
||||||
for fname in files:
|
for fname in files:
|
||||||
full = os.path.join(root, fname)
|
full = os.path.join(root, fname)
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
def find_recent_files(target, n=10, show_hidden=False):
|
def find_recent_files(target, n=10, show_hidden=False, exclude=None):
|
||||||
"""Find the n most recently modified files using find and stat.
|
"""Find the n most recently modified files using find and stat.
|
||||||
|
|
||||||
Returns a list of dicts: {path, name, modified, modified_human}.
|
Returns a list of dicts: {path, name, modified, modified_human}.
|
||||||
|
|
@ -14,6 +14,9 @@ def find_recent_files(target, n=10, show_hidden=False):
|
||||||
cmd = ["find", target, "-type", "f"]
|
cmd = ["find", target, "-type", "f"]
|
||||||
if not show_hidden:
|
if not show_hidden:
|
||||||
cmd.extend(["-not", "-path", "*/.*"])
|
cmd.extend(["-not", "-path", "*/.*"])
|
||||||
|
for name in (exclude or []):
|
||||||
|
cmd.extend(["-not", "-path", f"*/{name}/*",
|
||||||
|
"-not", "-path", f"*/{name}"])
|
||||||
cmd.extend(["-printf", "%T@\t%p\n"])
|
cmd.extend(["-printf", "%T@\t%p\n"])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,8 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
def build_tree(path, max_depth=3, show_hidden=False, _depth=0):
|
def build_tree(path, max_depth=3, show_hidden=False, exclude=None, _depth=0):
|
||||||
|
exclude = exclude or []
|
||||||
"""Build a nested dict representing the directory tree with file sizes."""
|
"""Build a nested dict representing the directory tree with file sizes."""
|
||||||
name = os.path.basename(path) or path
|
name = os.path.basename(path) or path
|
||||||
node = {"name": name, "path": path, "type": "directory", "children": []}
|
node = {"name": name, "path": path, "type": "directory", "children": []}
|
||||||
|
|
@ -17,10 +18,12 @@ def build_tree(path, max_depth=3, show_hidden=False, _depth=0):
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if not show_hidden and entry.startswith("."):
|
if not show_hidden and entry.startswith("."):
|
||||||
continue
|
continue
|
||||||
|
if entry in exclude:
|
||||||
|
continue
|
||||||
full = os.path.join(path, entry)
|
full = os.path.join(path, entry)
|
||||||
if os.path.isdir(full):
|
if os.path.isdir(full):
|
||||||
if _depth < max_depth:
|
if _depth < max_depth:
|
||||||
child = build_tree(full, max_depth, show_hidden, _depth + 1)
|
child = build_tree(full, max_depth, show_hidden, exclude, _depth + 1)
|
||||||
node["children"].append(child)
|
node["children"].append(child)
|
||||||
else:
|
else:
|
||||||
node["children"].append({
|
node["children"].append({
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue