Source code for scanpex.sq._gene_cache_mgr

import json
import os
from typing import Any, Callable, Dict, List


[docs] class GeneCacheManager: """ Manages the caching and retrieval of gene lists. This class handles the execution of gene generation recipes and stores the results as text files to avoid redundant computations. It maintains a JSON registry to track the mapping between keys and filenames. Attributes ---------- cache_dir : str The absolute path to the directory where cache files are stored. registry_path : str The full path to the registry JSON file. recipes : dict A dictionary storing the registered functions and their arguments. registry : dict A dictionary mapping cache keys to their corresponding filenames. """ def __init__( self, cache_dir: str = "gene_cache", registry_file: str = "registry.json", base_path: str = None, ): """ Initialize the GeneCacheManager. Parameters ---------- cache_dir : str, optional Name or path of the cache directory. By default "gene_cache". registry_file : str, optional Name of the registry file. By default "registry.json". base_path : str, optional Base path for resolving relative cache directories. If None, uses the current file's location or the working directory. """ if base_path is None: try: base_path = __file__ except NameError: base_path = os.getcwd() if os.path.isfile(base_path): base_dir = os.path.dirname(os.path.abspath(base_path)) else: base_dir = os.path.abspath(base_path) if not os.path.isabs(cache_dir): self.cache_dir = os.path.join(base_dir, cache_dir) else: self.cache_dir = cache_dir self.registry_path = os.path.join(self.cache_dir, registry_file) self.recipes: Dict[str, Any] = {} os.makedirs(self.cache_dir, exist_ok=True) if os.path.exists(self.registry_path): with open(self.registry_path, "r") as f: self.registry = json.load(f) else: self.registry = {}
[docs] def load( self, key: str, func: Callable, update: bool = False, **kwargs ) -> List[str]: """ Register a recipe and retrieve the gene list in one step. This is a convenience wrapper that calls `register_recipe` followed by `get`. Parameters ---------- key : str Unique identifier for the cache item. func : callable The function used to generate the gene list. Must return a list of strings. update : bool, optional If True, forces regeneration of the cache even if it exists. By default False. **kwargs Keyword arguments passed to `func`. Returns ------- list of str The list of genes loaded from cache or generated by the function. """ self.register_recipe(key=key, func=func, **kwargs) return self.get(key=key, update=update)
[docs] def register_recipe(self, key: str, func: Callable, **kwargs): """ Register a function and its arguments for lazy generation. Parameters ---------- key : str Unique identifier for the cache item. func : callable The function to execute when generation is triggered. **kwargs Keyword arguments to be passed to `func` upon execution. """ self.recipes[key] = {"func": func, "kwargs": kwargs}
[docs] def get(self, key: str, update: bool = False) -> List[str]: """ Retrieve the gene list for the given key. If the cache exists and `update` is False, data is read from the file. Otherwise, the registered recipe is executed to generate the data, which is then saved to a file. Parameters ---------- key : str The identifier of the gene list to retrieve. update : bool, optional If True, ignores existing cache and regenerates the file. By default False. Returns ------- list of str The list of genes. Returns an empty list if generation fails or no recipe is found. """ if not update and key in self.registry: filename = self.registry[key] file_path = os.path.join(self.cache_dir, filename) if os.path.exists(file_path): print(f"[Cache Hit] Loading '{key}'...") with open(file_path, "r") as f: return [line.strip() for line in f.readlines()] else: print(f"[Cache Broken] File missing for '{key}'. Regenerating...") if key in self.recipes: print(f"[Generating] Running recipe for '{key}'... (Update={update})") recipe = self.recipes[key] try: genes = recipe["func"](**recipe["kwargs"]) except Exception as e: print(f"[Error] Failed to generate '{key}': {e}") return [] filename = f"{key}.txt" save_path = os.path.join(self.cache_dir, filename) with open(save_path, "w") as f: f.write("\n".join(genes)) self.registry[key] = filename self._save_registry() print(f" -> Saved to {save_path}") return genes print(f"[NotFound] No cache and no recipe for '{key}'.") return []
def _save_registry(self): """ Save the current registry to the JSON file. """ with open(self.registry_path, "w") as f: json.dump(self.registry, f, indent=4)
[docs] def clear_cache(self, key: str): """ Remove the cache file and registry entry for a specific key. Parameters ---------- key : str The identifier of the cache item to remove. """ if key in self.registry: filename = self.registry[key] path = os.path.join(self.cache_dir, filename) if os.path.exists(path): os.remove(path) del self.registry[key] self._save_registry() print(f"Cache cleared for '{key}'")