import json
import os
from typing import Any, Callable, Dict, List
[docs]
class GeneCacheManager:
"""
Manages the caching and retrieval of gene lists.
This class handles the execution of gene generation recipes and stores
the results as text files to avoid redundant computations. It maintains
a JSON registry to track the mapping between keys and filenames.
Attributes
----------
cache_dir : str
The absolute path to the directory where cache files are stored.
registry_path : str
The full path to the registry JSON file.
recipes : dict
A dictionary storing the registered functions and their arguments.
registry : dict
A dictionary mapping cache keys to their corresponding filenames.
"""
def __init__(
self,
cache_dir: str = "gene_cache",
registry_file: str = "registry.json",
base_path: str = None,
):
"""
Initialize the GeneCacheManager.
Parameters
----------
cache_dir : str, optional
Name or path of the cache directory. By default "gene_cache".
registry_file : str, optional
Name of the registry file. By default "registry.json".
base_path : str, optional
Base path for resolving relative cache directories. If None,
uses the current file's location or the working directory.
"""
if base_path is None:
try:
base_path = __file__
except NameError:
base_path = os.getcwd()
if os.path.isfile(base_path):
base_dir = os.path.dirname(os.path.abspath(base_path))
else:
base_dir = os.path.abspath(base_path)
if not os.path.isabs(cache_dir):
self.cache_dir = os.path.join(base_dir, cache_dir)
else:
self.cache_dir = cache_dir
self.registry_path = os.path.join(self.cache_dir, registry_file)
self.recipes: Dict[str, Any] = {}
os.makedirs(self.cache_dir, exist_ok=True)
if os.path.exists(self.registry_path):
with open(self.registry_path, "r") as f:
self.registry = json.load(f)
else:
self.registry = {}
[docs]
def load(
self, key: str, func: Callable, update: bool = False, **kwargs
) -> List[str]:
"""
Register a recipe and retrieve the gene list in one step.
This is a convenience wrapper that calls `register_recipe` followed
by `get`.
Parameters
----------
key : str
Unique identifier for the cache item.
func : callable
The function used to generate the gene list. Must return a list of strings.
update : bool, optional
If True, forces regeneration of the cache even if it exists.
By default False.
**kwargs
Keyword arguments passed to `func`.
Returns
-------
list of str
The list of genes loaded from cache or generated by the function.
"""
self.register_recipe(key=key, func=func, **kwargs)
return self.get(key=key, update=update)
[docs]
def register_recipe(self, key: str, func: Callable, **kwargs):
"""
Register a function and its arguments for lazy generation.
Parameters
----------
key : str
Unique identifier for the cache item.
func : callable
The function to execute when generation is triggered.
**kwargs
Keyword arguments to be passed to `func` upon execution.
"""
self.recipes[key] = {"func": func, "kwargs": kwargs}
[docs]
def get(self, key: str, update: bool = False) -> List[str]:
"""
Retrieve the gene list for the given key.
If the cache exists and `update` is False, data is read from the file.
Otherwise, the registered recipe is executed to generate the data,
which is then saved to a file.
Parameters
----------
key : str
The identifier of the gene list to retrieve.
update : bool, optional
If True, ignores existing cache and regenerates the file.
By default False.
Returns
-------
list of str
The list of genes. Returns an empty list if generation fails
or no recipe is found.
"""
if not update and key in self.registry:
filename = self.registry[key]
file_path = os.path.join(self.cache_dir, filename)
if os.path.exists(file_path):
print(f"[Cache Hit] Loading '{key}'...")
with open(file_path, "r") as f:
return [line.strip() for line in f.readlines()]
else:
print(f"[Cache Broken] File missing for '{key}'. Regenerating...")
if key in self.recipes:
print(f"[Generating] Running recipe for '{key}'... (Update={update})")
recipe = self.recipes[key]
try:
genes = recipe["func"](**recipe["kwargs"])
except Exception as e:
print(f"[Error] Failed to generate '{key}': {e}")
return []
filename = f"{key}.txt"
save_path = os.path.join(self.cache_dir, filename)
with open(save_path, "w") as f:
f.write("\n".join(genes))
self.registry[key] = filename
self._save_registry()
print(f" -> Saved to {save_path}")
return genes
print(f"[NotFound] No cache and no recipe for '{key}'.")
return []
def _save_registry(self):
"""
Save the current registry to the JSON file.
"""
with open(self.registry_path, "w") as f:
json.dump(self.registry, f, indent=4)
[docs]
def clear_cache(self, key: str):
"""
Remove the cache file and registry entry for a specific key.
Parameters
----------
key : str
The identifier of the cache item to remove.
"""
if key in self.registry:
filename = self.registry[key]
path = os.path.join(self.cache_dir, filename)
if os.path.exists(path):
os.remove(path)
del self.registry[key]
self._save_registry()
print(f"Cache cleared for '{key}'")