Coverage for python/lsst/daf/butler/core/datastoreCacheManager.py: 26%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Cache management for a datastore."""
26__all__ = (
27 "AbstractDatastoreCacheManager",
28 "DatastoreDisabledCacheManager",
29 "DatastoreCacheManager",
30 "DatastoreCacheManagerConfig",
31)
33import atexit
34import contextlib
35import datetime
36import itertools
37import logging
38import os
39import shutil
40import tempfile
41from abc import ABC, abstractmethod
42from collections import defaultdict
43from typing import (
44 TYPE_CHECKING,
45 Dict,
46 ItemsView,
47 Iterable,
48 Iterator,
49 KeysView,
50 List,
51 Optional,
52 Union,
53 ValuesView,
54)
56from lsst.resources import ResourcePath
57from pydantic import BaseModel, PrivateAttr
59from .config import ConfigSubset
60from .configSupport import processLookupConfigs
61from .datasets import DatasetId, DatasetRef
63if TYPE_CHECKING: 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true
64 from .configSupport import LookupKey
65 from .datasets import DatasetType
66 from .dimensions import DimensionUniverse
67 from .storageClass import StorageClass
69log = logging.getLogger(__name__)
72def remove_cache_directory(directory: str) -> None:
73 """Remove the specified directory and all its contents."""
74 log.debug("Removing temporary cache directory %s", directory)
75 shutil.rmtree(directory, ignore_errors=True)
78def _construct_cache_path(root: ResourcePath, ref: DatasetRef, extension: str) -> ResourcePath:
79 """Construct the full path to use for this dataset in the cache.
81 Parameters
82 ----------
83 ref : `DatasetRef`
84 The dataset to look up in or write to the cache.
85 extension : `str`
86 File extension to use for this file. Should include the
87 leading "``.``".
89 Returns
90 -------
91 uri : `lsst.resources.ResourcePath`
92 URI to use for this dataset in the cache.
93 """
94 # Dataset type component is needed in the name if composite
95 # disassembly is happening since the ID is shared for all components.
96 component = ref.datasetType.component()
97 component = f"_{component}" if component else ""
98 return root.join(f"{ref.id}{component}{extension}")
101def _parse_cache_name(cached_location: str) -> Dict[str, Optional[str]]:
102 """For a given cache name, return its component parts.
104 Changes to ``_construct_cache_path()`` should be reflected here.
106 Parameters
107 ----------
108 cached_location : `str`
109 The name of the file within the cache.
111 Returns
112 -------
113 parsed : `dict` of `str`, `str`
114 Parsed components of the file. These include:
115 - "id": The dataset ID,
116 - "component": The name of the component (can be `None`),
117 - "extension": File extension (can be `None`).
118 """
119 # Assume first dot is the extension and so allow .fits.gz
120 root_ext = cached_location.split(".", maxsplit=1)
121 root = root_ext.pop(0)
122 ext = "." + root_ext.pop(0) if root_ext else None
124 parts = root.split("_")
125 id_ = parts.pop(0)
126 component = parts.pop(0) if parts else None
127 return {"id": id_, "component": component, "extension": ext}
130class CacheEntry(BaseModel):
131 """Represent an entry in the cache."""
133 name: str
134 """Name of the file."""
136 size: int
137 """Size of the file in bytes."""
139 ctime: datetime.datetime
140 """Creation time of the file."""
142 ref: DatasetId
143 """ID of this dataset."""
145 component: Optional[str]
146 """Component for this disassembled composite (optional)."""
148 @classmethod
149 def from_file(cls, file: ResourcePath, root: ResourcePath) -> CacheEntry:
150 """Construct an object from a file name.
152 Parameters
153 ----------
154 file : `lsst.resources.ResourcePath`
155 Path to the file.
156 root : `lsst.resources.ResourcePath`
157 Cache root directory.
158 """
159 file_in_cache = file.relative_to(root)
160 if file_in_cache is None:
161 raise ValueError(f"Supplied file {file} is not inside root {root}")
162 parts = _parse_cache_name(file_in_cache)
164 stat = os.stat(file.ospath)
165 return cls(
166 name=file_in_cache,
167 size=stat.st_size,
168 ref=parts["id"],
169 component=parts["component"],
170 ctime=datetime.datetime.utcfromtimestamp(stat.st_ctime),
171 )
174class CacheRegistry(BaseModel):
175 """Collection of cache entries."""
177 _size: int = PrivateAttr(0)
178 """Size of the cache."""
180 _entries: Dict[str, CacheEntry] = PrivateAttr({})
181 """Internal collection of cache entries."""
183 @property
184 def cache_size(self) -> int:
185 return self._size
187 def __getitem__(self, key: str) -> CacheEntry:
188 return self._entries[key]
190 def __setitem__(self, key: str, entry: CacheEntry) -> None:
191 self._size += entry.size
192 self._entries[key] = entry
194 def __delitem__(self, key: str) -> None:
195 entry = self._entries.pop(key)
196 self._decrement(entry)
198 def _decrement(self, entry: Optional[CacheEntry]) -> None:
199 if entry:
200 self._size -= entry.size
201 if self._size < 0:
202 log.warning("Cache size has gone negative. Inconsistent cache records...")
203 self._size = 0
205 def __contains__(self, key: str) -> bool:
206 return key in self._entries
208 def __len__(self) -> int:
209 return len(self._entries)
211 def __iter__(self) -> Iterator[str]: # type: ignore
212 return iter(self._entries)
214 def keys(self) -> KeysView[str]:
215 return self._entries.keys()
217 def values(self) -> ValuesView[CacheEntry]:
218 return self._entries.values()
220 def items(self) -> ItemsView[str, CacheEntry]:
221 return self._entries.items()
223 def pop(self, key: str, default: Optional[CacheEntry] = None) -> Optional[CacheEntry]:
224 entry = self._entries.pop(key, default)
225 self._decrement(entry)
226 return entry
229class DatastoreCacheManagerConfig(ConfigSubset):
230 """Configuration information for `DatastoreCacheManager`."""
232 component = "cached"
233 requiredKeys = ("cacheable",)
236class AbstractDatastoreCacheManager(ABC):
237 """An abstract base class for managing caching in a Datastore.
239 Parameters
240 ----------
241 config : `str` or `DatastoreCacheManagerConfig`
242 Configuration to control caching.
243 universe : `DimensionUniverse`
244 Set of all known dimensions, used to expand and validate any used
245 in lookup keys.
246 """
248 @property
249 def cache_size(self) -> int:
250 """Size of the cache in bytes."""
251 return 0
253 @property
254 def file_count(self) -> int:
255 """Return number of cached files tracked by registry."""
256 return 0
258 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], universe: DimensionUniverse):
259 if not isinstance(config, DatastoreCacheManagerConfig):
260 config = DatastoreCacheManagerConfig(config)
261 assert isinstance(config, DatastoreCacheManagerConfig)
262 self.config = config
264 @abstractmethod
265 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
266 """Indicate whether the entity should be added to the cache.
268 This is relevant when reading or writing.
270 Parameters
271 ----------
272 entity : `StorageClass` or `DatasetType` or `DatasetRef`
273 Thing to test against the configuration. The ``name`` property
274 is used to determine a match. A `DatasetType` will first check
275 its name, before checking its `StorageClass`. If there are no
276 matches the default will be returned.
278 Returns
279 -------
280 should_cache : `bool`
281 Returns `True` if the dataset should be cached; `False` otherwise.
282 """
283 raise NotImplementedError()
285 @abstractmethod
286 def move_to_cache(self, uri: ResourcePath, ref: DatasetRef) -> Optional[ResourcePath]:
287 """Move a file to the cache.
289 Move the given file into the cache, using the supplied DatasetRef
290 for naming. A call is made to `should_be_cached()` and if the
291 DatasetRef should not be accepted `None` will be returned.
293 Cache expiry can occur during this.
295 Parameters
296 ----------
297 uri : `lsst.resources.ResourcePath`
298 Location of the file to be relocated to the cache. Will be moved.
299 ref : `DatasetRef`
300 Ref associated with this file. Will be used to determine the name
301 of the file within the cache.
303 Returns
304 -------
305 new : `lsst.resources.ResourcePath` or `None`
306 URI to the file within the cache, or `None` if the dataset
307 was not accepted by the cache.
308 """
309 raise NotImplementedError()
311 @abstractmethod
312 @contextlib.contextmanager
313 def find_in_cache(self, ref: DatasetRef, extension: str) -> Iterator[Optional[ResourcePath]]:
314 """Look for a dataset in the cache and return its location.
316 Parameters
317 ----------
318 ref : `DatasetRef`
319 Dataset to locate in the cache.
320 extension : `str`
321 File extension expected. Should include the leading "``.``".
323 Yields
324 ------
325 uri : `lsst.resources.ResourcePath` or `None`
326 The URI to the cached file, or `None` if the file has not been
327 cached.
329 Notes
330 -----
331 Should be used as a context manager in order to prevent this
332 file from being removed from the cache for that context.
333 """
334 raise NotImplementedError()
336 @abstractmethod
337 def remove_from_cache(self, ref: Union[DatasetRef, Iterable[DatasetRef]]) -> None:
338 """Remove the specified datasets from the cache.
340 It is not an error for these datasets to be missing from the cache.
342 Parameters
343 ----------
344 ref : `DatasetRef` or iterable of `DatasetRef`
345 The datasets to remove from the cache.
346 """
347 raise NotImplementedError()
349 @abstractmethod
350 def __str__(self) -> str:
351 raise NotImplementedError()
354class DatastoreCacheManager(AbstractDatastoreCacheManager):
355 """A class for managing caching in a Datastore using local files.
357 Parameters
358 ----------
359 config : `str` or `DatastoreCacheManagerConfig`
360 Configuration to control caching.
361 universe : `DimensionUniverse`
362 Set of all known dimensions, used to expand and validate any used
363 in lookup keys.
365 Notes
366 -----
367 Two environment variables can be used to override the cache directory
368 and expiration configuration:
370 * ``$DAF_BUTLER_CACHE_DIRECTORY``
371 * ``$DAF_BUTLER_CACHE_EXPIRATION_MODE``
373 The expiration mode should take the form ``mode=threshold`` so for
374 example to configure expiration to limit the cache directory to 5 datasets
375 the value would be ``datasets=5``.
376 """
378 _temp_exemption_prefix = "exempt/"
380 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], universe: DimensionUniverse):
381 super().__init__(config, universe)
383 # Set cache directory if it pre-exists, else defer creation until
384 # requested. Allow external override from environment.
385 root = os.environ.get("DAF_BUTLER_CACHE_DIRECTORY") or self.config.get("root")
386 self._cache_directory = (
387 ResourcePath(root, forceAbsolute=True, forceDirectory=True) if root is not None else None
388 )
390 if self._cache_directory:
391 if not self._cache_directory.isLocal:
392 raise ValueError(
393 f"Cache directory must be on a local file system. Got: {self._cache_directory}"
394 )
395 # Ensure that the cache directory is created. We assume that
396 # someone specifying a permanent cache directory will be expecting
397 # it to always be there. This will also trigger an error
398 # early rather than waiting until the cache is needed.
399 self._cache_directory.mkdir()
401 # Calculate the caching lookup table.
402 self._lut = processLookupConfigs(self.config["cacheable"], universe=universe)
404 # Default decision to for whether a dataset should be cached.
405 self._caching_default = self.config.get("default", False)
407 # Expiration mode. Read from config but allow override from
408 # the environment.
409 expiration_mode = self.config.get(("expiry", "mode"))
410 threshold = self.config.get(("expiry", "threshold"))
412 external_mode = os.environ.get("DAF_BUTLER_CACHE_EXPIRATION_MODE")
413 if external_mode and "=" in external_mode:
414 expiration_mode, expiration_threshold = external_mode.split("=", 1)
415 threshold = int(expiration_threshold)
416 if expiration_mode is None:
417 # Force to None to avoid confusion.
418 threshold = None
420 self._expiration_mode: Optional[str] = expiration_mode
421 self._expiration_threshold: Optional[int] = threshold
422 if self._expiration_threshold is None and self._expiration_mode is not None:
423 raise ValueError(
424 f"Cache expiration threshold must be set for expiration mode {self._expiration_mode}"
425 )
427 log.debug(
428 "Cache configuration:\n- root: %s\n- expiration mode: %s",
429 self._cache_directory if self._cache_directory else "tmpdir",
430 f"{self._expiration_mode}={self._expiration_threshold}" if self._expiration_mode else "disabled",
431 )
433 # Files in cache, indexed by path within the cache directory.
434 self._cache_entries = CacheRegistry()
436 @property
437 def cache_directory(self) -> ResourcePath:
438 if self._cache_directory is None:
439 # Create on demand.
440 self._cache_directory = ResourcePath(
441 tempfile.mkdtemp(prefix="butler-"), forceDirectory=True, isTemporary=True
442 )
443 log.debug("Creating temporary cache directory at %s", self._cache_directory)
444 # Remove when we no longer need it.
445 atexit.register(remove_cache_directory, self._cache_directory.ospath)
446 return self._cache_directory
448 @property
449 def _temp_exempt_directory(self) -> ResourcePath:
450 """Return the directory in which to store temporary cache files that
451 should not be expired.
452 """
453 return self.cache_directory.join(self._temp_exemption_prefix)
455 @property
456 def cache_size(self) -> int:
457 return self._cache_entries.cache_size
459 @property
460 def file_count(self) -> int:
461 return len(self._cache_entries)
463 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
464 # Docstring inherited
465 matchName: Union[LookupKey, str] = "{} (via default)".format(entity)
466 should_cache = self._caching_default
468 for key in entity._lookupNames():
469 if key in self._lut:
470 should_cache = bool(self._lut[key])
471 matchName = key
472 break
474 if not isinstance(should_cache, bool):
475 raise TypeError(
476 f"Got cache value {should_cache!r} for config entry {matchName!r}; expected bool."
477 )
479 log.debug("%s (match: %s) should%s be cached", entity, matchName, "" if should_cache else " not")
480 return should_cache
482 def _construct_cache_name(self, ref: DatasetRef, extension: str) -> ResourcePath:
483 """Construct the name to use for this dataset in the cache.
485 Parameters
486 ----------
487 ref : `DatasetRef`
488 The dataset to look up in or write to the cache.
489 extension : `str`
490 File extension to use for this file. Should include the
491 leading "``.``".
493 Returns
494 -------
495 uri : `lsst.resources.ResourcePath`
496 URI to use for this dataset in the cache.
497 """
498 return _construct_cache_path(self.cache_directory, ref, extension)
500 def move_to_cache(self, uri: ResourcePath, ref: DatasetRef) -> Optional[ResourcePath]:
501 # Docstring inherited
502 if ref.id is None:
503 raise ValueError(f"Can not cache a file associated with an unresolved reference ({ref})")
505 if not self.should_be_cached(ref):
506 return None
508 # Write the file using the id of the dataset ref and the file
509 # extension.
510 cached_location = self._construct_cache_name(ref, uri.getExtension())
512 # Run cache expiry to ensure that we have room for this
513 # item.
514 self._expire_cache()
516 # Move into the cache. Given that multiple processes might be
517 # sharing a single cache directory, and the file we need might have
518 # been copied in whilst we were checking, allow overwrite without
519 # complaint. Even for a private cache directory it is possible that
520 # a second butler in a subprocess could be writing to it.
521 cached_location.transfer_from(uri, transfer="move", overwrite=True)
522 log.debug("Cached dataset %s to %s", ref, cached_location)
524 self._register_cache_entry(cached_location)
526 return cached_location
528 @contextlib.contextmanager
529 def find_in_cache(self, ref: DatasetRef, extension: str) -> Iterator[Optional[ResourcePath]]:
530 # Docstring inherited
531 # Short circuit this if the cache directory has not been created yet.
532 if self._cache_directory is None:
533 yield None
534 return
536 cached_location = self._construct_cache_name(ref, extension)
537 if cached_location.exists():
538 log.debug("Found cached file %s for dataset %s.", cached_location, ref)
540 # The cached file could be removed by another process doing
541 # cache expiration so we need to protect against that by making
542 # a copy in a different tree. Use hardlinks to ensure that
543 # we either have the cached file or we don't. This is robust
544 # against race conditions that can be caused by using soft links
545 # and the other end of the link being deleted just after it
546 # is created.
547 path_in_cache = cached_location.relative_to(self.cache_directory)
548 assert path_in_cache is not None, f"Somehow {cached_location} not in cache directory"
549 temp_location: Optional[ResourcePath] = self._temp_exempt_directory.join(path_in_cache)
550 try:
551 if temp_location is not None:
552 temp_location.transfer_from(cached_location, transfer="hardlink")
553 except Exception:
554 # Any failure will be treated as if the file was not
555 # in the cache. Yielding the original cache location
556 # is too dangerous.
557 temp_location = None
559 try:
560 log.debug("Yielding temporary cache location %s for dataset %s", temp_location, ref)
561 yield temp_location
562 finally:
563 try:
564 if temp_location:
565 temp_location.remove()
566 except FileNotFoundError:
567 pass
568 return
570 log.debug("Dataset %s not found in cache.", ref)
571 yield None
572 return
574 def remove_from_cache(self, refs: Union[DatasetRef, Iterable[DatasetRef]]) -> None:
575 # Docstring inherited.
577 # Stop early if there are no cache entries anyhow.
578 if len(self._cache_entries) == 0:
579 return
581 if isinstance(refs, DatasetRef):
582 refs = [refs]
584 # Create a set of all the IDs
585 all_ids = {ref.getCheckedId() for ref in refs}
587 keys_to_remove = []
588 for key, entry in self._cache_entries.items():
589 if entry.ref in all_ids:
590 keys_to_remove.append(key)
591 self._remove_from_cache(keys_to_remove)
593 def _register_cache_entry(self, cached_location: ResourcePath, can_exist: bool = False) -> str:
594 """Record the file in the cache registry.
596 Parameters
597 ----------
598 cached_location : `lsst.resources.ResourcePath`
599 Location of the file to be registered.
600 can_exist : `bool`, optional
601 If `True` the item being registered can already be listed.
602 This can allow a cache refresh to run without checking the
603 file again. If `False` it is an error for the registry to
604 already know about this file.
606 Returns
607 -------
608 cache_key : `str`
609 The key used in the registry for this file.
610 """
611 path_in_cache = cached_location.relative_to(self.cache_directory)
612 if path_in_cache is None:
613 raise ValueError(
614 f"Can not register cached file {cached_location} that is not within"
615 f" the cache directory at {self.cache_directory}."
616 )
617 if path_in_cache in self._cache_entries:
618 if can_exist:
619 return path_in_cache
620 else:
621 raise ValueError(
622 f"Cached file {cached_location} is already known to the registry"
623 " but this was expected to be a new file."
624 )
625 details = CacheEntry.from_file(cached_location, root=self.cache_directory)
626 self._cache_entries[path_in_cache] = details
627 return path_in_cache
629 def scan_cache(self) -> None:
630 """Scan the cache directory and record information about files."""
631 found = set()
632 for file in ResourcePath.findFileResources([self.cache_directory]):
633 assert isinstance(file, ResourcePath), "Unexpectedly did not get ResourcePath from iterator"
635 # Skip any that are found in an exempt part of the hierarchy
636 # since they should not be part of the registry.
637 if file.relative_to(self._temp_exempt_directory) is not None:
638 continue
640 path_in_cache = self._register_cache_entry(file, can_exist=True)
641 found.add(path_in_cache)
643 # Find any files that were recorded in the cache but are no longer
644 # on disk. (something else cleared them out?)
645 known_to_cache = set(self._cache_entries)
646 missing = known_to_cache - found
648 if missing:
649 log.debug(
650 "Entries no longer on disk but thought to be in cache and so removed: %s", ",".join(missing)
651 )
652 for path_in_cache in missing:
653 self._cache_entries.pop(path_in_cache)
655 def _remove_from_cache(self, cache_entries: Iterable[str]) -> None:
656 """Remove the specified cache entries from cache.
658 Parameters
659 ----------
660 cache_entries : iterable of `str`
661 The entries to remove from the cache. The values are the path
662 within the cache.
663 """
664 for entry in cache_entries:
665 path = self.cache_directory.join(entry)
667 self._cache_entries.pop(entry)
668 log.debug("Removing file from cache: %s", path)
669 try:
670 path.remove()
671 except FileNotFoundError:
672 pass
674 def _expire_cache(self) -> None:
675 """Expire the files in the cache.
677 Notes
678 -----
679 The expiration modes are defined by the config or can be overridden.
680 Available options:
682 * ``files``: Number of files.
683 * ``datasets``: Number of datasets
684 * ``size``: Total size of files.
685 * ``age``: Age of files.
687 The first three would remove in reverse time order.
688 Number of files is complicated by the possibility of disassembled
689 composites where 10 small files can be created for each dataset.
691 Additionally there is a use case for an external user to explicitly
692 state the dataset refs that should be cached and then when to
693 remove them. Overriding any global configuration.
694 """
695 if self._expiration_mode is None:
696 # Expiration has been disabled.
697 return
699 # mypy can't be sure we have set a threshold properly
700 if self._expiration_threshold is None:
701 log.warning(
702 "Requesting cache expiry of mode %s but no threshold set in config.", self._expiration_mode
703 )
704 return
706 # Sync up cache. There is no file locking involved so for a shared
707 # cache multiple processes may be racing to delete files. Deleting
708 # a file that no longer exists is not an error.
709 self.scan_cache()
711 if self._expiration_mode == "files":
712 n_files = len(self._cache_entries)
713 n_over = n_files - self._expiration_threshold
714 if n_over > 0:
715 sorted_keys = self._sort_cache()
716 keys_to_remove = sorted_keys[:n_over]
717 self._remove_from_cache(keys_to_remove)
718 return
720 if self._expiration_mode == "datasets":
721 # Count the datasets, in ascending timestamp order,
722 # so that oldest turn up first.
723 datasets = defaultdict(list)
724 for key in self._sort_cache():
725 entry = self._cache_entries[key]
726 datasets[entry.ref].append(key)
728 n_datasets = len(datasets)
729 n_over = n_datasets - self._expiration_threshold
730 if n_over > 0:
731 # Keys will be read out in insert order which
732 # will be date order so oldest ones are removed.
733 ref_ids = list(datasets.keys())[:n_over]
734 keys_to_remove = list(itertools.chain.from_iterable(datasets[ref_id] for ref_id in ref_ids))
735 self._remove_from_cache(keys_to_remove)
736 return
738 if self._expiration_mode == "size":
739 if self.cache_size > self._expiration_threshold:
740 for key in self._sort_cache():
741 self._remove_from_cache([key])
742 if self.cache_size <= self._expiration_threshold:
743 break
744 return
746 if self._expiration_mode == "age":
747 now = datetime.datetime.utcnow()
748 for key in self._sort_cache():
749 delta = now - self._cache_entries[key].ctime
750 if delta.seconds > self._expiration_threshold:
751 self._remove_from_cache([key])
752 else:
753 # We're already in date order.
754 break
755 return
757 raise ValueError(f"Unrecognized cache expiration mode of {self._expiration_mode}")
759 def _sort_cache(self) -> List[str]:
760 """Sort the cache entries by time and return the sorted keys.
762 Returns
763 -------
764 sorted : `list` of `str`
765 Keys into the cache, sorted by time with oldest first.
766 """
768 def sort_by_time(key: str) -> datetime.datetime:
769 """Sorter key function using cache entry details."""
770 return self._cache_entries[key].ctime
772 return sorted(self._cache_entries, key=sort_by_time)
774 def __str__(self) -> str:
775 cachedir = self._cache_directory if self._cache_directory else "<tempdir>"
776 return (
777 f"{type(self).__name__}@{cachedir} ({self._expiration_mode}={self._expiration_threshold},"
778 f"default={self._caching_default}) "
779 f"n_files={self.file_count}, n_bytes={self.cache_size}"
780 )
783class DatastoreDisabledCacheManager(AbstractDatastoreCacheManager):
784 """A variant of the datastore cache where no cache is enabled.
786 Parameters
787 ----------
788 config : `str` or `DatastoreCacheManagerConfig`
789 Configuration to control caching.
790 universe : `DimensionUniverse`
791 Set of all known dimensions, used to expand and validate any used
792 in lookup keys.
793 """
795 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], universe: DimensionUniverse):
796 return
798 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
799 """Indicate whether the entity should be added to the cache.
801 Always returns `False`.
802 """
803 return False
805 def move_to_cache(self, uri: ResourcePath, ref: DatasetRef) -> Optional[ResourcePath]:
806 """Move dataset to cache but always refuse and returns `None`."""
807 return None
809 @contextlib.contextmanager
810 def find_in_cache(self, ref: DatasetRef, extension: str) -> Iterator[Optional[ResourcePath]]:
811 """Look for a dataset in the cache and return its location.
813 Never finds a file.
814 """
815 yield None
817 def remove_from_cache(self, ref: Union[DatasetRef, Iterable[DatasetRef]]) -> None:
818 """Remove datasets from cache.
820 Always does nothing.
821 """
822 return
824 def __str__(self) -> str:
825 return f"{type(self).__name__}()"