luminos/tests/test_filetypes.py

"""Tests for luminos_lib/filetypes.py"""

import os
import tempfile
import unittest
from unittest.mock import patch

from luminos_lib.filetypes import (
    EXTENSION_MAP,
    _classify_one,
    classify_files,
    summarize_categories,
    survey_signals,
)


class TestExtensionMap(unittest.TestCase):
    def test_python_is_source(self):
        self.assertEqual(EXTENSION_MAP[".py"], "source")

    def test_json_is_config(self):
        self.assertEqual(EXTENSION_MAP[".json"], "config")

    def test_csv_is_data(self):
        self.assertEqual(EXTENSION_MAP[".csv"], "data")

    def test_png_is_media(self):
        self.assertEqual(EXTENSION_MAP[".png"], "media")

    def test_md_is_document(self):
        self.assertEqual(EXTENSION_MAP[".md"], "document")

    def test_zip_is_archive(self):
        self.assertEqual(EXTENSION_MAP[".zip"], "archive")


class TestClassifyOne(unittest.TestCase):
    def test_known_extension(self):
        category, desc = _classify_one("script.py")
        self.assertEqual(category, "source")
        self.assertIsNone(desc)

    def test_known_extension_case_insensitive(self):
        category, desc = _classify_one("image.PNG")
        self.assertEqual(category, "media")
        self.assertIsNone(desc)

    def test_unknown_extension_falls_back_to_file_command(self):
        with patch("luminos_lib.filetypes._file_command", return_value="ASCII text"):
            category, desc = _classify_one("README")
            self.assertEqual(category, "source")
            self.assertEqual(desc, "ASCII text")

    def test_unknown_extension_unrecognized_file_output(self):
        with patch("luminos_lib.filetypes._file_command", return_value="data"):
            category, desc = _classify_one("somefile.xyz")
            self.assertEqual(category, "unknown")

    def test_file_command_timeout_returns_unknown(self):
        with patch("luminos_lib.filetypes._file_command", return_value=""):
            category, desc = _classify_one("oddfile")
            self.assertEqual(category, "unknown")


class TestSummarizeCategories(unittest.TestCase):
    def test_empty(self):
        self.assertEqual(summarize_categories([]), {})

    def test_single_category(self):
        files = [{"category": "source"}, {"category": "source"}]
        result = summarize_categories(files)
        self.assertEqual(result, {"source": 2})

    def test_multiple_categories(self):
        files = [
            {"category": "source"},
            {"category": "config"},
            {"category": "source"},
            {"category": "media"},
        ]
        result = summarize_categories(files)
        self.assertEqual(result["source"], 2)
        self.assertEqual(result["config"], 1)
        self.assertEqual(result["media"], 1)


class TestClassifyFiles(unittest.TestCase):
    def setUp(self):
        self.tmpdir = tempfile.mkdtemp()

    def _make_file(self, name, content=""):
        path = os.path.join(self.tmpdir, name)
        with open(path, "w") as f:
            f.write(content)
        return path

    def test_classifies_python_file(self):
        self._make_file("script.py", "print('hello')")
        results = classify_files(self.tmpdir)
        names = [r["name"] for r in results]
        self.assertIn("script.py", names)
        py = next(r for r in results if r["name"] == "script.py")
        self.assertEqual(py["category"], "source")

    def test_excludes_hidden_files_by_default(self):
        self._make_file(".hidden.py")
        self._make_file("visible.py")
        results = classify_files(self.tmpdir)
        names = [r["name"] for r in results]
        self.assertNotIn(".hidden.py", names)
        self.assertIn("visible.py", names)

    def test_includes_hidden_files_when_requested(self):
        self._make_file(".hidden.py")
        results = classify_files(self.tmpdir, show_hidden=True)
        names = [r["name"] for r in results]
        self.assertIn(".hidden.py", names)

    def test_excludes_directories(self):
        excluded_dir = os.path.join(self.tmpdir, "node_modules")
        os.makedirs(excluded_dir)
        with open(os.path.join(excluded_dir, "pkg.js"), "w") as f:
            f.write("")
        self._make_file("main.py")
        results = classify_files(self.tmpdir, exclude=["node_modules"])
        names = [r["name"] for r in results]
        self.assertNotIn("pkg.js", names)
        self.assertIn("main.py", names)

    def test_on_file_callback(self):
        self._make_file("a.py")
        self._make_file("b.py")
        seen = []
        classify_files(self.tmpdir, on_file=seen.append)
        self.assertEqual(len(seen), 2)

    def test_size_is_populated(self):
        self._make_file("data.json", '{"key": "value"}')
        results = classify_files(self.tmpdir)
        item = next(r for r in results if r["name"] == "data.json")
        self.assertGreater(item["size"], 0)


class TestSurveySignals(unittest.TestCase):
    def _f(self, name, description="", category="source"):
        return {"name": name, "path": f"/x/{name}", "category": category,
                "size": 10, "description": description}

    def test_empty_input(self):
        s = survey_signals([])
        self.assertEqual(s["total_files"], 0)
        self.assertEqual(s["extension_histogram"], {})
        self.assertEqual(s["file_descriptions"], {})
        self.assertEqual(s["filename_samples"], [])

    def test_extension_histogram_uses_lowercase_and_keeps_none(self):
        files = [
            self._f("a.PY"), self._f("b.py"), self._f("c.py"),
            self._f("README"), self._f("Makefile"),
        ]
        s = survey_signals(files)
        self.assertEqual(s["extension_histogram"][".py"], 3)
        self.assertEqual(s["extension_histogram"]["(none)"], 2)

    def test_file_descriptions_aggregated_and_truncated(self):
        long_desc = "x" * 200
        files = [
            self._f("a.py", "Python script, ASCII text"),
            self._f("b.py", "Python script, ASCII text"),
            self._f("c.bin", long_desc),
        ]
        s = survey_signals(files)
        self.assertEqual(s["file_descriptions"]["Python script, ASCII text"], 2)
        # The long description was truncated and still counted once
        truncated_keys = [k for k in s["file_descriptions"] if k.startswith("xxx") and k.endswith("...")]
        self.assertEqual(len(truncated_keys), 1)
        self.assertLessEqual(len(truncated_keys[0]), 84)  # 80 + "..."

    def test_descriptions_skipped_when_empty(self):
        files = [self._f("a.py", ""), self._f("b.py", None)]
        s = survey_signals(files)
        self.assertEqual(s["file_descriptions"], {})

    def test_top_n_caps_at_20(self):
        files = [self._f(f"f{i}.ext{i}") for i in range(50)]
        s = survey_signals(files)
        self.assertEqual(len(s["extension_histogram"]), 20)

    def test_filename_samples_evenly_drawn(self):
        files = [self._f(f"file_{i:04d}.txt") for i in range(100)]
        s = survey_signals(files, max_samples=10)
        self.assertEqual(len(s["filename_samples"]), 10)
        # First sample is the first file (stride 10, index 0)
        self.assertEqual(s["filename_samples"][0], "file_0000.txt")
        # Last sample is around index 90, not 99
        self.assertTrue(s["filename_samples"][-1].startswith("file_009"))

    def test_filename_samples_returns_all_when_under_cap(self):
        files = [self._f(f"f{i}.txt") for i in range(5)]
        s = survey_signals(files, max_samples=20)
        self.assertEqual(len(s["filename_samples"]), 5)

    def test_total_files_matches_input(self):
        files = [self._f(f"f{i}.py") for i in range(7)]
        self.assertEqual(survey_signals(files)["total_files"], 7)


if __name__ == "__main__":
    unittest.main()