Coverage for python/lsst/pipe/base/caching_limited_butler.py: 22%
68 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 02:43 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 02:43 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["CachingLimitedButler"]
32import logging
33from collections.abc import Set
34from typing import Any, Iterable
36from lsst.daf.butler import (
37 DatasetId,
38 DatasetRef,
39 DeferredDatasetHandle,
40 DimensionUniverse,
41 LimitedButler,
42 StorageClass,
43)
45from ._dataset_handle import InMemoryDatasetHandle
47_LOG = logging.getLogger(__name__)
50class CachingLimitedButler(LimitedButler):
51 """A `LimitedButler` that caches datasets.
53 A `CachingLimitedButler` caches on both `.put()` and `.get()`, and holds a
54 single instance of the most recently used dataset type for that put/get.
56 The dataset types which will be cached on put/get are controlled via the
57 `cache_on_put` and `cache_on_get` attributes, respectively.
59 By default, copies of the cached items are returned on `get`, so that code
60 is free to operate on data in-place. A `no_copy_on_cache` attribute also
61 exists to tell the `CachingLimitedButler` not to return copies when it is
62 known that the calling code can be trusted not to change values, e.g. when
63 passing calibs to `isrTask`.
65 Parameters
66 ----------
67 wrapped : `LimitedButler`
68 The butler to wrap.
69 cache_on_put : `set` [`str`], optional
70 The dataset types to cache on put.
71 cache_on_get : `set` [`str`], optional
72 The dataset types to cache on get.
73 no_copy_on_cache : `set` [`str`], optional
74 The dataset types for which to not return copies when cached.
75 """
77 def __init__(
78 self,
79 wrapped: LimitedButler,
80 cache_on_put: Set[str] = frozenset(),
81 cache_on_get: Set[str] = frozenset(),
82 no_copy_on_cache: Set[str] = frozenset(),
83 ):
84 self._wrapped = wrapped
85 self._datastore = self._wrapped._datastore
86 self.storageClasses = self._wrapped.storageClasses
87 self._cache_on_put = cache_on_put
88 self._cache_on_get = cache_on_get
89 self._cache: dict[str, tuple[DatasetId, InMemoryDatasetHandle]] = {}
90 self._no_copy_on_cache = no_copy_on_cache
92 def get(
93 self,
94 ref: DatasetRef,
95 /,
96 *,
97 parameters: dict[str, Any] | None = None,
98 storageClass: StorageClass | str | None = None,
99 ) -> Any:
100 if storageClass is None:
101 storageClass = ref.datasetType.storageClass
102 elif isinstance(storageClass, str):
103 storageClass = self.storageClasses.getStorageClass(storageClass)
105 # check if we have this dataset type in the cache
106 if cached := self._cache.get(ref.datasetType.name):
107 dataset_id, handle = cached
108 if dataset_id == ref.id: # if we do, check it's the right object
109 _LOG.debug("Returning cached dataset %s", ref)
110 return handle.get(parameters=parameters, storageClass=storageClass)
112 obj = self._wrapped.get(ref, parameters=parameters, storageClass=storageClass)
113 if ref.datasetType.name in self._cache_on_get and not parameters:
114 handle = InMemoryDatasetHandle(
115 obj,
116 storageClass=storageClass,
117 dataId=ref.dataId,
118 copy=ref.datasetType.name not in self._no_copy_on_cache,
119 )
120 # and not parameters is to make sure we don't cache sub-images etc
121 self._cache[ref.datasetType.name] = (ref.id, handle)
122 _LOG.debug("Cached dataset %s", ref)
123 # make sure copy fires if needed
124 return handle.get()
125 return obj
127 def getDeferred(
128 self,
129 ref: DatasetRef,
130 /,
131 *,
132 parameters: dict[str, Any] | None = None,
133 storageClass: str | StorageClass | None = None,
134 ) -> DeferredDatasetHandle:
135 # note that this does not use the cache at all
136 return self._wrapped.getDeferred(ref, parameters=parameters, storageClass=storageClass)
138 def stored(self, ref: DatasetRef) -> bool:
139 return self.stored_many([ref])[ref] # TODO: remove this once DM-43086 is done.
141 def stored_many(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
142 result = {}
143 unknown_refs = []
144 for ref in refs:
145 if cached := self._cache.get(ref.datasetType.name):
146 dataset_id, _ = cached
147 if dataset_id == ref.id:
148 result[ref] = True
149 continue
150 unknown_refs.append(ref)
152 result.update(self._wrapped.stored_many(unknown_refs))
153 return result
155 def isWriteable(self) -> bool:
156 return self._wrapped.isWriteable()
158 def put(self, obj: Any, ref: DatasetRef) -> DatasetRef:
159 if ref.datasetType.name in self._cache_on_put:
160 self._cache[ref.datasetType.name] = (
161 ref.id,
162 InMemoryDatasetHandle(
163 obj,
164 storageClass=ref.datasetType.storageClass,
165 dataId=ref.dataId,
166 copy=ref.datasetType.name not in self._no_copy_on_cache,
167 ),
168 )
169 _LOG.debug("Cached dataset %s on put", ref)
170 return self._wrapped.put(obj, ref)
172 def pruneDatasets(
173 self,
174 refs: Iterable[DatasetRef],
175 *,
176 disassociate: bool = True,
177 unstore: bool = False,
178 tags: Iterable[str] = (),
179 purge: bool = False,
180 ) -> None:
181 refs = list(refs)
182 for ref in refs:
183 if cached := self._cache.get(ref.datasetType.name):
184 dataset_id, _ = cached
185 if dataset_id == ref.id:
186 del self._cache[ref.datasetType.name]
188 return self._wrapped.pruneDatasets(
189 refs, disassociate=disassociate, unstore=unstore, tags=tags, purge=purge
190 )
192 @property
193 def dimensions(self) -> DimensionUniverse:
194 return self._wrapped.dimensions