Coverage for python/lsst/daf/butler/core/datastoreCacheManager.py: 26%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Cache management for a datastore."""
26__all__ = (
27 "AbstractDatastoreCacheManager",
28 "DatastoreDisabledCacheManager",
29 "DatastoreCacheManager",
30 "DatastoreCacheManagerConfig",
31)
33import atexit
34import contextlib
35import datetime
36import itertools
37import logging
38import os
39import shutil
40import tempfile
41from abc import ABC, abstractmethod
42from collections import defaultdict
43from typing import (
44 TYPE_CHECKING,
45 Dict,
46 ItemsView,
47 Iterable,
48 Iterator,
49 KeysView,
50 List,
51 Optional,
52 Union,
53 ValuesView,
54)
56from lsst.resources import ResourcePath
57from pydantic import BaseModel, PrivateAttr
59from .config import ConfigSubset
60from .configSupport import processLookupConfigs
61from .datasets import DatasetId, DatasetRef
63if TYPE_CHECKING: 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true
64 from .configSupport import LookupKey
65 from .datasets import DatasetType
66 from .dimensions import DimensionUniverse
67 from .storageClass import StorageClass
69log = logging.getLogger(__name__)
72def remove_cache_directory(directory: str) -> None:
73 """Remove the specified directory and all its contents."""
74 log.debug("Removing temporary cache directory %s", directory)
75 shutil.rmtree(directory, ignore_errors=True)
78def _construct_cache_path(root: ResourcePath, ref: DatasetRef, extension: str) -> ResourcePath:
79 """Construct the full path to use for this dataset in the cache.
81 Parameters
82 ----------
83 ref : `DatasetRef`
84 The dataset to look up in or write to the cache.
85 extension : `str`
86 File extension to use for this file. Should include the
87 leading "``.``".
89 Returns
90 -------
91 uri : `lsst.resources.ResourcePath`
92 URI to use for this dataset in the cache.
93 """
94 # Dataset type component is needed in the name if composite
95 # disassembly is happening since the ID is shared for all components.
96 component = ref.datasetType.component()
97 component = f"_{component}" if component else ""
98 return root.join(f"{ref.id}{component}{extension}")
101def _parse_cache_name(cached_location: str) -> Dict[str, Optional[str]]:
102 """For a given cache name, return its component parts.
104 Changes to ``_construct_cache_path()`` should be reflected here.
106 Parameters
107 ----------
108 cached_location : `str`
109 The name of the file within the cache.
111 Returns
112 -------
113 parsed : `dict` of `str`, `str`
114 Parsed components of the file. These include:
115 - "id": The dataset ID,
116 - "component": The name of the component (can be `None`),
117 - "extension": File extension (can be `None`).
118 """
119 # Assume first dot is the extension and so allow .fits.gz
120 root_ext = cached_location.split(".", maxsplit=1)
121 root = root_ext.pop(0)
122 ext = "." + root_ext.pop(0) if root_ext else None
124 parts = root.split("_")
125 id_ = parts.pop(0)
126 component = parts.pop(0) if parts else None
127 return {"id": id_, "component": component, "extension": ext}
130class CacheEntry(BaseModel):
131 """Represent an entry in the cache."""
133 name: str
134 """Name of the file."""
136 size: int
137 """Size of the file in bytes."""
139 ctime: datetime.datetime
140 """Creation time of the file."""
142 ref: DatasetId
143 """ID of this dataset."""
145 component: Optional[str]
146 """Component for this disassembled composite (optional)."""
148 @classmethod
149 def from_file(cls, file: ResourcePath, root: ResourcePath) -> CacheEntry:
150 """Construct an object from a file name.
152 Parameters
153 ----------
154 file : `lsst.resources.ResourcePath`
155 Path to the file.
156 root : `lsst.resources.ResourcePath`
157 Cache root directory.
158 """
159 file_in_cache = file.relative_to(root)
160 if file_in_cache is None:
161 raise ValueError(f"Supplied file {file} is not inside root {root}")
162 parts = _parse_cache_name(file_in_cache)
164 stat = os.stat(file.ospath)
165 return cls(
166 name=file_in_cache,
167 size=stat.st_size,
168 ref=parts["id"],
169 component=parts["component"],
170 ctime=datetime.datetime.utcfromtimestamp(stat.st_ctime),
171 )
174class CacheRegistry(BaseModel):
175 """Collection of cache entries."""
177 _size: int = PrivateAttr(0)
178 """Size of the cache."""
180 _entries: Dict[str, CacheEntry] = PrivateAttr({})
181 """Internal collection of cache entries."""
183 @property
184 def cache_size(self) -> int:
185 return self._size
187 def __getitem__(self, key: str) -> CacheEntry:
188 return self._entries[key]
190 def __setitem__(self, key: str, entry: CacheEntry) -> None:
191 self._size += entry.size
192 self._entries[key] = entry
194 def __delitem__(self, key: str) -> None:
195 entry = self._entries.pop(key)
196 self._decrement(entry)
198 def _decrement(self, entry: Optional[CacheEntry]) -> None:
199 if entry:
200 self._size -= entry.size
201 if self._size < 0:
202 log.warning("Cache size has gone negative. Inconsistent cache records...")
203 self._size = 0
205 def __contains__(self, key: str) -> bool:
206 return key in self._entries
208 def __len__(self) -> int:
209 return len(self._entries)
211 def __iter__(self) -> Iterator[str]: # type: ignore
212 return iter(self._entries)
214 def keys(self) -> KeysView[str]:
215 return self._entries.keys()
217 def values(self) -> ValuesView[CacheEntry]:
218 return self._entries.values()
220 def items(self) -> ItemsView[str, CacheEntry]:
221 return self._entries.items()
223 def pop(self, key: str, default: Optional[CacheEntry] = None) -> Optional[CacheEntry]:
224 entry = self._entries.pop(key, default)
225 self._decrement(entry)
226 return entry
229class DatastoreCacheManagerConfig(ConfigSubset):
230 """Configuration information for `DatastoreCacheManager`."""
232 component = "cached"
233 requiredKeys = ("cacheable",)
236class AbstractDatastoreCacheManager(ABC):
237 """An abstract base class for managing caching in a Datastore.
239 Parameters
240 ----------
241 config : `str` or `DatastoreCacheManagerConfig`
242 Configuration to control caching.
243 universe : `DimensionUniverse`
244 Set of all known dimensions, used to expand and validate any used
245 in lookup keys.
246 """
248 @property
249 def cache_size(self) -> int:
250 """Size of the cache in bytes."""
251 return 0
253 @property
254 def file_count(self) -> int:
255 """Return number of cached files tracked by registry."""
256 return 0
258 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], universe: DimensionUniverse):
259 if not isinstance(config, DatastoreCacheManagerConfig):
260 config = DatastoreCacheManagerConfig(config)
261 assert isinstance(config, DatastoreCacheManagerConfig)
262 self.config = config
264 @abstractmethod
265 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
266 """Indicate whether the entity should be added to the cache.
268 This is relevant when reading or writing.
270 Parameters
271 ----------
272 entity : `StorageClass` or `DatasetType` or `DatasetRef`
273 Thing to test against the configuration. The ``name`` property
274 is used to determine a match. A `DatasetType` will first check
275 its name, before checking its `StorageClass`. If there are no
276 matches the default will be returned.
278 Returns
279 -------
280 should_cache : `bool`
281 Returns `True` if the dataset should be cached; `False` otherwise.
282 """
283 raise NotImplementedError()
285 @abstractmethod
286 def move_to_cache(self, uri: ResourcePath, ref: DatasetRef) -> Optional[ResourcePath]:
287 """Move a file to the cache.
289 Move the given file into the cache, using the supplied DatasetRef
290 for naming. A call is made to `should_be_cached()` and if the
291 DatasetRef should not be accepted `None` will be returned.
293 Cache expiry can occur during this.
295 Parameters
296 ----------
297 uri : `lsst.resources.ResourcePath`
298 Location of the file to be relocated to the cache. Will be moved.
299 ref : `DatasetRef`
300 Ref associated with this file. Will be used to determine the name
301 of the file within the cache.
303 Returns
304 -------
305 new : `lsst.resources.ResourcePath` or `None`
306 URI to the file within the cache, or `None` if the dataset
307 was not accepted by the cache.
308 """
309 raise NotImplementedError()
311 @abstractmethod
312 @contextlib.contextmanager
313 def find_in_cache(self, ref: DatasetRef, extension: str) -> Iterator[Optional[ResourcePath]]:
314 """Look for a dataset in the cache and return its location.
316 Parameters
317 ----------
318 ref : `DatasetRef`
319 Dataset to locate in the cache.
320 extension : `str`
321 File extension expected. Should include the leading "``.``".
323 Yields
324 ------
325 uri : `lsst.resources.ResourcePath` or `None`
326 The URI to the cached file, or `None` if the file has not been
327 cached.
329 Notes
330 -----
331 Should be used as a context manager in order to prevent this
332 file from being removed from the cache for that context.
333 """
334 raise NotImplementedError()
336 @abstractmethod
337 def remove_from_cache(self, ref: Union[DatasetRef, Iterable[DatasetRef]]) -> None:
338 """Remove the specified datasets from the cache.
340 It is not an error for these datasets to be missing from the cache.
342 Parameters
343 ----------
344 ref : `DatasetRef` or iterable of `DatasetRef`
345 The datasets to remove from the cache.
346 """
347 raise NotImplementedError()
349 @abstractmethod
350 def __str__(self) -> str:
351 raise NotImplementedError()
354class DatastoreCacheManager(AbstractDatastoreCacheManager):
355 """A class for managing caching in a Datastore using local files.
357 Parameters
358 ----------
359 config : `str` or `DatastoreCacheManagerConfig`
360 Configuration to control caching.
361 universe : `DimensionUniverse`
362 Set of all known dimensions, used to expand and validate any used
363 in lookup keys.
365 Notes
366 -----
367 Two environment variables can be used to override the cache directory
368 and expiration configuration:
370 * ``$DAF_BUTLER_CACHE_DIRECTORY``
371 * ``$DAF_BUTLER_CACHE_EXPIRATION_MODE``
373 The expiration mode should take the form ``mode=threshold`` so for
374 example to configure expiration to limit the cache directory to 5 datasets
375 the value would be ``datasets=5``.
376 """
378 _temp_exemption_prefix = "exempt/"
380 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], universe: DimensionUniverse):
381 super().__init__(config, universe)
383 # Set cache directory if it pre-exists, else defer creation until
384 # requested. Allow external override from environment.
385 root = os.environ.get("DAF_BUTLER_CACHE_DIRECTORY") or self.config.get("root")
386 self._cache_directory = (
387 ResourcePath(root, forceAbsolute=True, forceDirectory=True) if root is not None else None
388 )
390 if self._cache_directory:
391 if not self._cache_directory.isLocal:
392 raise ValueError(
393 f"Cache directory must be on a local file system. Got: {self._cache_directory}"
394 )
395 # Ensure that the cache directory is created. We assume that
396 # someone specifying a permanent cache directory will be expecting
397 # it to always be there. This will also trigger an error
398 # early rather than waiting until the cache is needed.
399 self._cache_directory.mkdir()
401 # Calculate the caching lookup table.
402 self._lut = processLookupConfigs(self.config["cacheable"], universe=universe)
404 # Default decision to for whether a dataset should be cached.
405 self._caching_default = self.config.get("default", False)
407 # Expiration mode. Read from config but allow override from
408 # the environment.
409 expiration_mode = self.config.get(("expiry", "mode"))
410 threshold = self.config.get(("expiry", "threshold"))
412 external_mode = os.environ.get("DAF_BUTLER_CACHE_EXPIRATION_MODE")
413 if external_mode and "=" in external_mode:
414 expiration_mode, expiration_threshold = external_mode.split("=", 1)
415 threshold = int(expiration_threshold)
416 if expiration_mode is None:
417 # Force to None to avoid confusion.
418 threshold = None
420 self._expiration_mode: Optional[str] = expiration_mode
421 self._expiration_threshold: Optional[int] = threshold
422 if self._expiration_threshold is None and self._expiration_mode is not None:
423 raise ValueError(
424 f"Cache expiration threshold must be set for expiration mode {self._expiration_mode}"
425 )
427 log.debug(
428 "Cache configuration:\n- root: %s\n- expiration mode: %s",
429 self._cache_directory if self._cache_directory else "tmpdir",
430 f"{self._expiration_mode}={self._expiration_threshold}" if self._expiration_mode else "disabled",
431 )
433 # Files in cache, indexed by path within the cache directory.
434 self._cache_entries = CacheRegistry()
436 @property
437 def cache_directory(self) -> ResourcePath:
438 if self._cache_directory is None:
439 # Create on demand.
440 self._cache_directory = ResourcePath(
441 tempfile.mkdtemp(prefix="butler-"), forceDirectory=True, isTemporary=True
442 )
443 log.debug("Creating temporary cache directory at %s", self._cache_directory)
444 # Remove when we no longer need it.
445 atexit.register(remove_cache_directory, self._cache_directory.ospath)
446 return self._cache_directory
448 @property
449 def _temp_exempt_directory(self) -> ResourcePath:
450 """Return the directory in which to store temporary cache files that
451 should not be expired.
452 """
453 return self.cache_directory.join(self._temp_exemption_prefix)
455 @property
456 def cache_size(self) -> int:
457 return self._cache_entries.cache_size
459 @property
460 def file_count(self) -> int:
461 return len(self._cache_entries)
463 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
464 # Docstring inherited
465 matchName: Union[LookupKey, str] = "{} (via default)".format(entity)
466 should_cache = self._caching_default
468 for key in entity._lookupNames():
469 if key in self._lut:
470 should_cache = bool(self._lut[key])
471 matchName = key
472 break
474 if not isinstance(should_cache, bool):
475 raise TypeError(
476 f"Got cache value {should_cache!r} for config entry {matchName!r}; expected bool."
477 )
479 log.debug("%s (match: %s) should%s be cached", entity, matchName, "" if should_cache else " not")
480 return should_cache
482 def _construct_cache_name(self, ref: DatasetRef, extension: str) -> ResourcePath:
483 """Construct the name to use for this dataset in the cache.
485 Parameters
486 ----------
487 ref : `DatasetRef`
488 The dataset to look up in or write to the cache.
489 extension : `str`
490 File extension to use for this file. Should include the
491 leading "``.``".
493 Returns
494 -------
495 uri : `lsst.resources.ResourcePath`
496 URI to use for this dataset in the cache.
497 """
498 return _construct_cache_path(self.cache_directory, ref, extension)
500 def move_to_cache(self, uri: ResourcePath, ref: DatasetRef) -> Optional[ResourcePath]:
501 # Docstring inherited
502 if ref.id is None:
503 raise ValueError(f"Can not cache a file associated with an unresolved reference ({ref})")
505 if not self.should_be_cached(ref):
506 return None
508 # Write the file using the id of the dataset ref and the file
509 # extension.
510 cached_location = self._construct_cache_name(ref, uri.getExtension())
512 # Run cache expiry to ensure that we have room for this
513 # item.
514 self._expire_cache()
516 # Move into the cache. Given that multiple processes might be
517 # sharing a single cache directory, and the file we need might have
518 # been copied in whilst we were checking, allow overwrite without
519 # complaint. Even for a private cache directory it is possible that
520 # a second butler in a subprocess could be writing to it.
521 cached_location.transfer_from(uri, transfer="move", overwrite=True)
522 log.debug("Cached dataset %s to %s", ref, cached_location)
524 self._register_cache_entry(cached_location)
526 return cached_location
528 @contextlib.contextmanager
529 def find_in_cache(self, ref: DatasetRef, extension: str) -> Iterator[Optional[ResourcePath]]:
530 # Docstring inherited
531 # Short circuit this if the cache directory has not been created yet.
532 if self._cache_directory is None:
533 yield None
534 return
536 cached_location = self._construct_cache_name(ref, extension)
537 if cached_location.exists():
538 log.debug("Found cached file %s for dataset %s.", cached_location, ref)
540 # The cached file could be removed by another process doing
541 # cache expiration so we need to protect against that by making
542 # a copy in a different tree. Use hardlinks to ensure that
543 # we either have the cached file or we don't. This is robust
544 # against race conditions that can be caused by using soft links
545 # and the other end of the link being deleted just after it
546 # is created.
547 path_in_cache = cached_location.relative_to(self.cache_directory)
548 assert path_in_cache is not None, f"Somehow {cached_location} not in cache directory"
549 temp_location: Optional[ResourcePath] = self._temp_exempt_directory.join(path_in_cache)
550 try:
551 if temp_location is not None:
552 temp_location.transfer_from(cached_location, transfer="hardlink")
553 except Exception as e:
554 log.debug("Detected error creating hardlink for dataset %s: %s", ref, e)
555 # Any failure will be treated as if the file was not
556 # in the cache. Yielding the original cache location
557 # is too dangerous.
558 temp_location = None
560 try:
561 log.debug("Yielding temporary cache location %s for dataset %s", temp_location, ref)
562 yield temp_location
563 finally:
564 try:
565 if temp_location:
566 temp_location.remove()
567 except FileNotFoundError:
568 pass
569 return
571 log.debug("Dataset %s not found in cache.", ref)
572 yield None
573 return
575 def remove_from_cache(self, refs: Union[DatasetRef, Iterable[DatasetRef]]) -> None:
576 # Docstring inherited.
578 # Stop early if there are no cache entries anyhow.
579 if len(self._cache_entries) == 0:
580 return
582 if isinstance(refs, DatasetRef):
583 refs = [refs]
585 # Create a set of all the IDs
586 all_ids = {ref.getCheckedId() for ref in refs}
588 keys_to_remove = []
589 for key, entry in self._cache_entries.items():
590 if entry.ref in all_ids:
591 keys_to_remove.append(key)
592 self._remove_from_cache(keys_to_remove)
594 def _register_cache_entry(self, cached_location: ResourcePath, can_exist: bool = False) -> str:
595 """Record the file in the cache registry.
597 Parameters
598 ----------
599 cached_location : `lsst.resources.ResourcePath`
600 Location of the file to be registered.
601 can_exist : `bool`, optional
602 If `True` the item being registered can already be listed.
603 This can allow a cache refresh to run without checking the
604 file again. If `False` it is an error for the registry to
605 already know about this file.
607 Returns
608 -------
609 cache_key : `str`
610 The key used in the registry for this file.
611 """
612 path_in_cache = cached_location.relative_to(self.cache_directory)
613 if path_in_cache is None:
614 raise ValueError(
615 f"Can not register cached file {cached_location} that is not within"
616 f" the cache directory at {self.cache_directory}."
617 )
618 if path_in_cache in self._cache_entries:
619 if can_exist:
620 return path_in_cache
621 else:
622 raise ValueError(
623 f"Cached file {cached_location} is already known to the registry"
624 " but this was expected to be a new file."
625 )
626 details = CacheEntry.from_file(cached_location, root=self.cache_directory)
627 self._cache_entries[path_in_cache] = details
628 return path_in_cache
630 def scan_cache(self) -> None:
631 """Scan the cache directory and record information about files."""
632 found = set()
633 for file in ResourcePath.findFileResources([self.cache_directory]):
634 assert isinstance(file, ResourcePath), "Unexpectedly did not get ResourcePath from iterator"
636 # Skip any that are found in an exempt part of the hierarchy
637 # since they should not be part of the registry.
638 if file.relative_to(self._temp_exempt_directory) is not None:
639 continue
641 path_in_cache = self._register_cache_entry(file, can_exist=True)
642 found.add(path_in_cache)
644 # Find any files that were recorded in the cache but are no longer
645 # on disk. (something else cleared them out?)
646 known_to_cache = set(self._cache_entries)
647 missing = known_to_cache - found
649 if missing:
650 log.debug(
651 "Entries no longer on disk but thought to be in cache and so removed: %s", ",".join(missing)
652 )
653 for path_in_cache in missing:
654 self._cache_entries.pop(path_in_cache)
656 def _remove_from_cache(self, cache_entries: Iterable[str]) -> None:
657 """Remove the specified cache entries from cache.
659 Parameters
660 ----------
661 cache_entries : iterable of `str`
662 The entries to remove from the cache. The values are the path
663 within the cache.
664 """
665 for entry in cache_entries:
666 path = self.cache_directory.join(entry)
668 self._cache_entries.pop(entry)
669 log.debug("Removing file from cache: %s", path)
670 try:
671 path.remove()
672 except FileNotFoundError:
673 pass
675 def _expire_cache(self) -> None:
676 """Expire the files in the cache.
678 Notes
679 -----
680 The expiration modes are defined by the config or can be overridden.
681 Available options:
683 * ``files``: Number of files.
684 * ``datasets``: Number of datasets
685 * ``size``: Total size of files.
686 * ``age``: Age of files.
688 The first three would remove in reverse time order.
689 Number of files is complicated by the possibility of disassembled
690 composites where 10 small files can be created for each dataset.
692 Additionally there is a use case for an external user to explicitly
693 state the dataset refs that should be cached and then when to
694 remove them. Overriding any global configuration.
695 """
696 if self._expiration_mode is None:
697 # Expiration has been disabled.
698 return
700 # mypy can't be sure we have set a threshold properly
701 if self._expiration_threshold is None:
702 log.warning(
703 "Requesting cache expiry of mode %s but no threshold set in config.", self._expiration_mode
704 )
705 return
707 # Sync up cache. There is no file locking involved so for a shared
708 # cache multiple processes may be racing to delete files. Deleting
709 # a file that no longer exists is not an error.
710 self.scan_cache()
712 if self._expiration_mode == "files":
713 n_files = len(self._cache_entries)
714 n_over = n_files - self._expiration_threshold
715 if n_over > 0:
716 sorted_keys = self._sort_cache()
717 keys_to_remove = sorted_keys[:n_over]
718 self._remove_from_cache(keys_to_remove)
719 return
721 if self._expiration_mode == "datasets":
722 # Count the datasets, in ascending timestamp order,
723 # so that oldest turn up first.
724 datasets = defaultdict(list)
725 for key in self._sort_cache():
726 entry = self._cache_entries[key]
727 datasets[entry.ref].append(key)
729 n_datasets = len(datasets)
730 n_over = n_datasets - self._expiration_threshold
731 if n_over > 0:
732 # Keys will be read out in insert order which
733 # will be date order so oldest ones are removed.
734 ref_ids = list(datasets.keys())[:n_over]
735 keys_to_remove = list(itertools.chain.from_iterable(datasets[ref_id] for ref_id in ref_ids))
736 self._remove_from_cache(keys_to_remove)
737 return
739 if self._expiration_mode == "size":
740 if self.cache_size > self._expiration_threshold:
741 for key in self._sort_cache():
742 self._remove_from_cache([key])
743 if self.cache_size <= self._expiration_threshold:
744 break
745 return
747 if self._expiration_mode == "age":
748 now = datetime.datetime.utcnow()
749 for key in self._sort_cache():
750 delta = now - self._cache_entries[key].ctime
751 if delta.seconds > self._expiration_threshold:
752 self._remove_from_cache([key])
753 else:
754 # We're already in date order.
755 break
756 return
758 raise ValueError(f"Unrecognized cache expiration mode of {self._expiration_mode}")
760 def _sort_cache(self) -> List[str]:
761 """Sort the cache entries by time and return the sorted keys.
763 Returns
764 -------
765 sorted : `list` of `str`
766 Keys into the cache, sorted by time with oldest first.
767 """
769 def sort_by_time(key: str) -> datetime.datetime:
770 """Sorter key function using cache entry details."""
771 return self._cache_entries[key].ctime
773 return sorted(self._cache_entries, key=sort_by_time)
775 def __str__(self) -> str:
776 cachedir = self._cache_directory if self._cache_directory else "<tempdir>"
777 return (
778 f"{type(self).__name__}@{cachedir} ({self._expiration_mode}={self._expiration_threshold},"
779 f"default={self._caching_default}) "
780 f"n_files={self.file_count}, n_bytes={self.cache_size}"
781 )
784class DatastoreDisabledCacheManager(AbstractDatastoreCacheManager):
785 """A variant of the datastore cache where no cache is enabled.
787 Parameters
788 ----------
789 config : `str` or `DatastoreCacheManagerConfig`
790 Configuration to control caching.
791 universe : `DimensionUniverse`
792 Set of all known dimensions, used to expand and validate any used
793 in lookup keys.
794 """
796 def __init__(self, config: Union[str, DatastoreCacheManagerConfig], universe: DimensionUniverse):
797 return
799 def should_be_cached(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> bool:
800 """Indicate whether the entity should be added to the cache.
802 Always returns `False`.
803 """
804 return False
806 def move_to_cache(self, uri: ResourcePath, ref: DatasetRef) -> Optional[ResourcePath]:
807 """Move dataset to cache but always refuse and returns `None`."""
808 return None
810 @contextlib.contextmanager
811 def find_in_cache(self, ref: DatasetRef, extension: str) -> Iterator[Optional[ResourcePath]]:
812 """Look for a dataset in the cache and return its location.
814 Never finds a file.
815 """
816 yield None
818 def remove_from_cache(self, ref: Union[DatasetRef, Iterable[DatasetRef]]) -> None:
819 """Remove datasets from cache.
821 Always does nothing.
822 """
823 return
825 def __str__(self) -> str:
826 return f"{type(self).__name__}()"