luminos/tests/test_cache.py

290 lines
11 KiB
Python
Raw Permalink Normal View History

"""Tests for luminos_lib/cache.py"""
import json
import os
import tempfile
import unittest
from datetime import datetime, timezone
from unittest.mock import patch
from luminos_lib.cache import (
_CacheManager,
_sha256_path,
_get_investigation_id,
CACHE_ROOT,
)
def _now():
return datetime.now(timezone.utc).isoformat()
def _make_manager(root):
cm = _CacheManager.__new__(_CacheManager)
cm.investigation_id = "test-id"
cm.target = root
cm.root = os.path.join(root, "cache")
cm.files_dir = os.path.join(cm.root, "files")
cm.dirs_dir = os.path.join(cm.root, "dirs")
cm.log_path = os.path.join(cm.root, "investigation.log")
cm.meta_path = os.path.join(cm.root, "meta.json")
os.makedirs(cm.files_dir, exist_ok=True)
os.makedirs(cm.dirs_dir, exist_ok=True)
return cm
def _file_entry(**overrides):
base = {
"path": "/tmp/foo.py",
"relative_path": "foo.py",
"summary": "A Python file.",
"cached_at": _now(),
"size_bytes": 128,
"category": "source",
}
base.update(overrides)
return base
def _dir_entry(**overrides):
base = {
"path": "/tmp/mydir",
"relative_path": "mydir",
"summary": "A directory.",
"cached_at": _now(),
"child_count": 3,
"dominant_category": "source",
}
base.update(overrides)
return base
class TestSha256Path(unittest.TestCase):
def test_deterministic(self):
self.assertEqual(_sha256_path("/foo/bar"), _sha256_path("/foo/bar"))
def test_different_paths_differ(self):
self.assertNotEqual(_sha256_path("/foo/bar"), _sha256_path("/foo/baz"))
def test_returns_hex_string(self):
result = _sha256_path("/foo")
self.assertIsInstance(result, str)
self.assertEqual(len(result), 64)
class TestWriteEntry(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
self.cm = _make_manager(self.tmpdir)
def test_valid_file_entry(self):
result = self.cm.write_entry("file", "/tmp/foo.py", _file_entry())
self.assertEqual(result, "ok")
def test_valid_dir_entry(self):
result = self.cm.write_entry("dir", "/tmp/mydir", _dir_entry())
self.assertEqual(result, "ok")
def test_missing_required_field_file(self):
entry = _file_entry()
del entry["summary"]
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertIn("Error", result)
self.assertIn("summary", result)
def test_missing_required_field_dir(self):
entry = _dir_entry()
del entry["child_count"]
result = self.cm.write_entry("dir", "/tmp/mydir", entry)
self.assertIn("Error", result)
self.assertIn("child_count", result)
def test_raw_content_rejected(self):
entry = _file_entry(content="raw file data")
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertIn("Error", result)
def test_valid_confidence(self):
entry = _file_entry(confidence=0.85, confidence_reason="")
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertEqual(result, "ok")
def test_confidence_zero(self):
entry = _file_entry(confidence=0.0, confidence_reason="completely unknown")
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertEqual(result, "ok")
def test_confidence_one(self):
entry = _file_entry(confidence=1.0)
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertEqual(result, "ok")
def test_confidence_out_of_range_high(self):
entry = _file_entry(confidence=1.5)
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertIn("Error", result)
self.assertIn("confidence", result)
def test_confidence_out_of_range_low(self):
entry = _file_entry(confidence=-0.1)
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertIn("Error", result)
def test_confidence_wrong_type(self):
entry = _file_entry(confidence="high")
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertIn("Error", result)
def test_confidence_reason_wrong_type(self):
entry = _file_entry(confidence=0.5, confidence_reason=42)
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertIn("Error", result)
def test_confidence_without_reason_is_ok(self):
entry = _file_entry(confidence=0.9)
result = self.cm.write_entry("file", "/tmp/foo.py", entry)
self.assertEqual(result, "ok")
def test_written_file_is_valid_json(self):
entry = _file_entry()
self.cm.write_entry("file", "/tmp/foo.py", entry)
stored = self.cm.read_entry("file", "/tmp/foo.py")
self.assertIsNotNone(stored)
self.assertEqual(stored["summary"], "A Python file.")
class TestReadEntry(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
self.cm = _make_manager(self.tmpdir)
def test_read_after_write(self):
entry = _file_entry(summary="Hello world")
self.cm.write_entry("file", "/tmp/foo.py", entry)
result = self.cm.read_entry("file", "/tmp/foo.py")
self.assertEqual(result["summary"], "Hello world")
def test_read_missing_returns_none(self):
result = self.cm.read_entry("file", "/tmp/nonexistent.py")
self.assertIsNone(result)
def test_has_entry_true(self):
self.cm.write_entry("file", "/tmp/foo.py", _file_entry())
self.assertTrue(self.cm.has_entry("file", "/tmp/foo.py"))
def test_has_entry_false(self):
self.assertFalse(self.cm.has_entry("file", "/tmp/missing.py"))
class TestListEntries(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
self.cm = _make_manager(self.tmpdir)
def test_empty(self):
self.assertEqual(self.cm.list_entries("file"), [])
def test_lists_relative_paths(self):
self.cm.write_entry("file", "/tmp/a.py", _file_entry(path="/tmp/a.py", relative_path="a.py"))
self.cm.write_entry("file", "/tmp/b.py", _file_entry(path="/tmp/b.py", relative_path="b.py"))
entries = self.cm.list_entries("file")
self.assertIn("a.py", entries)
self.assertIn("b.py", entries)
def test_read_all_entries_returns_dicts(self):
self.cm.write_entry("file", "/tmp/a.py", _file_entry(path="/tmp/a.py", relative_path="a.py"))
result = self.cm.read_all_entries("file")
self.assertEqual(len(result), 1)
self.assertIsInstance(result[0], dict)
class TestLowConfidenceEntries(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
self.cm = _make_manager(self.tmpdir)
def test_returns_entries_below_threshold(self):
self.cm.write_entry("file", "/tmp/a.py", _file_entry(path="/tmp/a.py", relative_path="a.py", confidence=0.4))
self.cm.write_entry("file", "/tmp/b.py", _file_entry(path="/tmp/b.py", relative_path="b.py", confidence=0.9))
result = self.cm.low_confidence_entries()
paths = [e["relative_path"] for e in result]
self.assertIn("a.py", paths)
self.assertNotIn("b.py", paths)
def test_excludes_entries_at_threshold(self):
self.cm.write_entry("file", "/tmp/c.py", _file_entry(path="/tmp/c.py", relative_path="c.py", confidence=0.7))
result = self.cm.low_confidence_entries()
self.assertEqual(result, [])
def test_includes_entries_missing_confidence(self):
self.cm.write_entry("file", "/tmp/d.py", _file_entry(path="/tmp/d.py", relative_path="d.py"))
result = self.cm.low_confidence_entries()
paths = [e["relative_path"] for e in result]
self.assertIn("d.py", paths)
def test_includes_dir_entries(self):
self.cm.write_entry("dir", "/tmp/mydir", _dir_entry(path="/tmp/mydir", relative_path="mydir", confidence=0.3))
result = self.cm.low_confidence_entries()
paths = [e["relative_path"] for e in result]
self.assertIn("mydir", paths)
def test_sorted_ascending_by_confidence(self):
self.cm.write_entry("file", "/tmp/e.py", _file_entry(path="/tmp/e.py", relative_path="e.py", confidence=0.6))
self.cm.write_entry("file", "/tmp/f.py", _file_entry(path="/tmp/f.py", relative_path="f.py", confidence=0.2))
self.cm.write_entry("file", "/tmp/g.py", _file_entry(path="/tmp/g.py", relative_path="g.py", confidence=0.4))
result = self.cm.low_confidence_entries()
scores = [e["confidence"] for e in result]
self.assertEqual(scores, sorted(scores))
def test_custom_threshold(self):
self.cm.write_entry("file", "/tmp/h.py", _file_entry(path="/tmp/h.py", relative_path="h.py", confidence=0.5))
self.cm.write_entry("file", "/tmp/i.py", _file_entry(path="/tmp/i.py", relative_path="i.py", confidence=0.8))
result = self.cm.low_confidence_entries(threshold=0.6)
paths = [e["relative_path"] for e in result]
self.assertIn("h.py", paths)
self.assertNotIn("i.py", paths)
def test_empty_cache_returns_empty_list(self):
self.assertEqual(self.cm.low_confidence_entries(), [])
class TestGetInvestigationId(unittest.TestCase):
def test_same_target_same_id(self):
with tempfile.TemporaryDirectory() as d:
from luminos_lib import cache as c
orig_root, orig_path = c.CACHE_ROOT, c.INVESTIGATIONS_PATH
c.CACHE_ROOT = d
c.INVESTIGATIONS_PATH = os.path.join(d, "investigations.json")
try:
id1, _ = _get_investigation_id(d)
# _get_investigation_id checks the cache dir exists before reusing
os.makedirs(os.path.join(d, id1), exist_ok=True)
id2, new = _get_investigation_id(d)
self.assertEqual(id1, id2)
self.assertFalse(new)
finally:
c.CACHE_ROOT = orig_root
c.INVESTIGATIONS_PATH = orig_path
def test_fresh_flag_creates_new_id(self):
with tempfile.TemporaryDirectory() as d:
from luminos_lib import cache as c
orig_root = c.CACHE_ROOT
orig_path = c.INVESTIGATIONS_PATH
c.CACHE_ROOT = d
c.INVESTIGATIONS_PATH = os.path.join(d, "investigations.json")
try:
os.makedirs(os.path.join(d, "someid"), exist_ok=True)
id1, _ = _get_investigation_id(d)
os.makedirs(os.path.join(d, id1), exist_ok=True)
id2, new = _get_investigation_id(d, fresh=True)
self.assertNotEqual(id1, id2)
self.assertTrue(new)
finally:
c.CACHE_ROOT = orig_root
c.INVESTIGATIONS_PATH = orig_path
if __name__ == "__main__":
unittest.main()