From 07d96faf90185e207199f60147d1f3c181aea64c Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Mon, 30 Mar 2026 09:57:22 -0600 Subject: [PATCH] feat: add code detection and line counting Detects programming languages, counts lines of code per language via wc -l, and flags unusually large files (>1000 lines or >10MB). Co-Authored-By: Claude Opus 4.6 (1M context) --- luminos_lib/code.py | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 luminos_lib/code.py diff --git a/luminos_lib/code.py b/luminos_lib/code.py new file mode 100644 index 0000000..6d2571c --- /dev/null +++ b/luminos_lib/code.py @@ -0,0 +1,72 @@ +"""Code detection — languages, line counts, large file flagging.""" + +import os +import subprocess + +LANG_EXTENSIONS = { + ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", + ".jsx": "JavaScript (JSX)", ".tsx": "TypeScript (TSX)", + ".java": "Java", ".c": "C", ".cpp": "C++", ".cc": "C++", + ".h": "C/C++ Header", ".hpp": "C++ Header", + ".go": "Go", ".rs": "Rust", ".rb": "Ruby", ".php": "PHP", + ".swift": "Swift", ".kt": "Kotlin", ".scala": "Scala", + ".sh": "Shell", ".bash": "Bash", ".zsh": "Zsh", + ".pl": "Perl", ".lua": "Lua", ".r": "R", ".m": "Objective-C", + ".cs": "C#", ".hs": "Haskell", ".ex": "Elixir", ".exs": "Elixir", + ".erl": "Erlang", ".clj": "Clojure", ".sql": "SQL", +} + +LARGE_LINE_THRESHOLD = 1000 +LARGE_SIZE_THRESHOLD = 10 * 1024 * 1024 # 10 MB + + +def _count_lines(filepath): + """Count lines in a file using wc -l.""" + try: + result = subprocess.run( + ["wc", "-l", filepath], + capture_output=True, text=True, timeout=10, + ) + if result.returncode == 0: + return int(result.stdout.strip().split()[0]) + except (subprocess.TimeoutExpired, FileNotFoundError, ValueError): + pass + return 0 + + +def detect_languages(classified_files): + """Detect languages present and count lines of code per language. + + Returns (languages_set, loc_by_language). + """ + source_files = [f for f in classified_files if f["category"] == "source"] + languages = set() + loc = {} + + for f in source_files: + ext = os.path.splitext(f["name"])[1].lower() + lang = LANG_EXTENSIONS.get(ext, "Other") + languages.add(lang) + lines = _count_lines(f["path"]) + loc[lang] = loc.get(lang, 0) + lines + + return sorted(languages), loc + + +def find_large_files(classified_files): + """Find files that are unusually large (>1000 lines or >10MB).""" + source_files = [f for f in classified_files if f["category"] == "source"] + large = [] + + for f in source_files: + reasons = [] + if f["size"] > LARGE_SIZE_THRESHOLD: + reasons.append(f"size: {f['size'] / (1024*1024):.1f} MB") + lines = _count_lines(f["path"]) + if lines > LARGE_LINE_THRESHOLD: + reasons.append(f"lines: {lines}") + if reasons: + large.append({"path": f["path"], "name": f["name"], + "reasons": reasons}) + + return large