From 1d681c8bc193101a4db0893a5770f1a3fc51401a Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Mon, 6 Apr 2026 21:13:58 -0600 Subject: [PATCH] feat(cache): add low_confidence_entries() query to CacheManager (#3) Returns all file and dir cache entries with confidence below a given threshold (default 0.7). Entries missing a confidence field are included as unrated/untrusted. Results sorted ascending by confidence so least-confident entries come first. Co-Authored-By: Claude Sonnet 4.6 --- luminos_lib/cache.py | 12 +++++++++++ tests/test_cache.py | 50 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/luminos_lib/cache.py b/luminos_lib/cache.py index b95f981..dbd873e 100644 --- a/luminos_lib/cache.py +++ b/luminos_lib/cache.py @@ -187,3 +187,15 @@ class _CacheManager: except OSError: pass return result + + def low_confidence_entries(self, threshold=0.7): + """Return all file and dir cache entries with confidence below threshold. + + Entries missing a confidence field are included — they are unrated and + therefore untrusted. Results are sorted ascending by confidence so the + least-confident entries come first. + """ + entries = self.read_all_entries("file") + self.read_all_entries("dir") + low = [e for e in entries if e.get("confidence", 0.0) < threshold] + low.sort(key=lambda e: e.get("confidence", 0.0)) + return low diff --git a/tests/test_cache.py b/tests/test_cache.py index 365a58b..e4791c5 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -198,6 +198,56 @@ class TestListEntries(unittest.TestCase): self.assertIsInstance(result[0], dict) +class TestLowConfidenceEntries(unittest.TestCase): + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.cm = _make_manager(self.tmpdir) + + def test_returns_entries_below_threshold(self): + self.cm.write_entry("file", "/tmp/a.py", _file_entry(path="/tmp/a.py", relative_path="a.py", confidence=0.4)) + self.cm.write_entry("file", "/tmp/b.py", _file_entry(path="/tmp/b.py", relative_path="b.py", confidence=0.9)) + result = self.cm.low_confidence_entries() + paths = [e["relative_path"] for e in result] + self.assertIn("a.py", paths) + self.assertNotIn("b.py", paths) + + def test_excludes_entries_at_threshold(self): + self.cm.write_entry("file", "/tmp/c.py", _file_entry(path="/tmp/c.py", relative_path="c.py", confidence=0.7)) + result = self.cm.low_confidence_entries() + self.assertEqual(result, []) + + def test_includes_entries_missing_confidence(self): + self.cm.write_entry("file", "/tmp/d.py", _file_entry(path="/tmp/d.py", relative_path="d.py")) + result = self.cm.low_confidence_entries() + paths = [e["relative_path"] for e in result] + self.assertIn("d.py", paths) + + def test_includes_dir_entries(self): + self.cm.write_entry("dir", "/tmp/mydir", _dir_entry(path="/tmp/mydir", relative_path="mydir", confidence=0.3)) + result = self.cm.low_confidence_entries() + paths = [e["relative_path"] for e in result] + self.assertIn("mydir", paths) + + def test_sorted_ascending_by_confidence(self): + self.cm.write_entry("file", "/tmp/e.py", _file_entry(path="/tmp/e.py", relative_path="e.py", confidence=0.6)) + self.cm.write_entry("file", "/tmp/f.py", _file_entry(path="/tmp/f.py", relative_path="f.py", confidence=0.2)) + self.cm.write_entry("file", "/tmp/g.py", _file_entry(path="/tmp/g.py", relative_path="g.py", confidence=0.4)) + result = self.cm.low_confidence_entries() + scores = [e["confidence"] for e in result] + self.assertEqual(scores, sorted(scores)) + + def test_custom_threshold(self): + self.cm.write_entry("file", "/tmp/h.py", _file_entry(path="/tmp/h.py", relative_path="h.py", confidence=0.5)) + self.cm.write_entry("file", "/tmp/i.py", _file_entry(path="/tmp/i.py", relative_path="i.py", confidence=0.8)) + result = self.cm.low_confidence_entries(threshold=0.6) + paths = [e["relative_path"] for e in result] + self.assertIn("h.py", paths) + self.assertNotIn("i.py", paths) + + def test_empty_cache_returns_empty_list(self): + self.assertEqual(self.cm.low_confidence_entries(), []) + + class TestGetInvestigationId(unittest.TestCase): def test_same_target_same_id(self): with tempfile.TemporaryDirectory() as d: