Coverage for python / lsst / daf / butler / registry / datasets / byDimensions / _dataset_type_cache.py: 0%
52 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-18 08:43 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-18 08:43 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("DatasetTypeCache",)
32from collections.abc import Iterable, Iterator
34from ...._dataset_type import DatasetType
35from ....dimensions import DimensionGroup
36from .tables import DynamicTables, TableCache
39class DatasetTypeCache:
40 """Cache for dataset types.
42 Notes
43 -----
44 This cache is a pair of mappings with different kinds of keys:
46 - Dataset type name -> (`DatasetType`, database integer primary key)
47 - `DimensionGroup` -> database table information
49 In some contexts (e.g. ``resolve_wildcard``) a full list of dataset types
50 is needed. To signify that cache content can be used in such contexts,
51 cache defines a special ``full`` flag that needs to be set by client.
52 """
54 def __init__(self) -> None:
55 self.tables = TableCache()
56 self._by_name_cache: dict[str, tuple[DatasetType, int]] = {}
57 self._by_id_cache: dict[int, DatasetType] = {}
58 self._by_dimensions_cache: dict[DimensionGroup, DynamicTables] = {}
59 self._full = False
61 def clone(self) -> DatasetTypeCache:
62 """Make a copy of the caches that are safe to use in another thread.
64 Notes
65 -----
66 After cloning, the ``tables`` cache will be shared between the new
67 instance and the current instance. It is safe to read and update
68 ``tables`` from multiple threads simultaneously -- the cached values
69 are immutable table schemas, and they are looked up one at a time by
70 name.
72 The other caches are copied, because their access patterns are more
73 complex.
75 ``full`` and ``dimensions_full`` will initially return `False` in the
76 new instance. This preserves the invariant that a Butler is able to
77 see any changes to the database made before the Butler is instantiated.
78 The downside is that the cloned cache will have to be re-fetched before
79 it can be used for glob searches.
80 """
81 clone = DatasetTypeCache()
82 # Share DynamicTablesCache between instances.
83 clone.tables = self.tables
84 # The inner key/value objects are immutable in both of these caches, so
85 # we can shallow-copy the dicts.
86 clone._by_name_cache = self._by_name_cache.copy()
87 clone._by_dimensions_cache = self._by_dimensions_cache.copy()
88 return clone
90 @property
91 def full(self) -> bool:
92 """`True` if cache holds all known dataset types (`bool`)."""
93 return self._full
95 def add(self, dataset_type: DatasetType, id: int) -> None:
96 """Add one record to the cache.
98 Parameters
99 ----------
100 dataset_type : `DatasetType`
101 Dataset type, replaces any existing dataset type with the same
102 name.
103 id : `int`
104 The dataset type primary key.
105 Additional opaque object stored with this dataset type.
106 """
107 self._by_name_cache[dataset_type.name] = (dataset_type, id)
108 self._by_id_cache[id] = dataset_type
110 def set(
111 self,
112 data: Iterable[tuple[DatasetType, int]],
113 *,
114 full: bool = False,
115 dimensions_data: Iterable[tuple[DimensionGroup, DynamicTables]] | None = None,
116 dimensions_full: bool = False,
117 ) -> None:
118 """Replace cache contents with the new set of dataset types.
120 Parameters
121 ----------
122 data : `~collections.abc.Iterable`
123 Sequence of tuples of `DatasetType` and an extra opaque object.
124 full : `bool`, optional
125 If `True` then ``data`` contains all known dataset types.
126 dimensions_data : `~collections.abc.Iterable`, optional
127 Sequence of tuples of `DimensionGroup` and an extra opaque object.
128 dimensions_full : `bool`, optional
129 If `True` then ``data`` contains all known dataset type dimensions.
130 """
131 self.clear()
132 for item in data:
133 self.add(item[0], item[1])
134 self._full = full
135 if dimensions_data is not None:
136 self._by_dimensions_cache.update(dimensions_data)
138 def clear(self) -> None:
139 """Remove everything from the cache."""
140 self._by_name_cache = {}
141 self._by_dimensions_cache = {}
142 self._by_id_cache = {}
143 self._full = False
145 def get(self, name: str) -> tuple[DatasetType | None, int | None]:
146 """Return cached info given dataset type name.
148 Parameters
149 ----------
150 name : `str`
151 Dataset type name.
153 Returns
154 -------
155 dataset_type : `DatasetType` or `None`
156 Cached dataset type, `None` is returned if the name is not in the
157 cache.
158 extra : `typing.Any` or `None`
159 Cached opaque data, `None` is returned if the name is not in the
160 cache.
161 """
162 item = self._by_name_cache.get(name)
163 if item is None:
164 return (None, None)
165 return item
167 def get_by_id(self, id: int) -> DatasetType | None:
168 """Return cached dataset type given the dataset type ID.
170 Parameters
171 ----------
172 id : `int`
173 Dataset type ID (database key).
175 Returns
176 -------
177 dataset_type : `DatasetType`
178 The `DatasetType` information associated with the given ID.
179 """
180 return self._by_id_cache.get(id, None)
182 def items(self) -> Iterator[tuple[DatasetType, int]]:
183 """Return iterator for the set of items in the cache, can only be
184 used if `full` is true.
186 Returns
187 -------
188 iter : `~collections.abc.Iterator`
189 Iterator over tuples of `DatasetType` and opaque data.
191 Raises
192 ------
193 RuntimeError
194 Raised if ``self.full`` is `False`.
195 """
196 if not self._full:
197 raise RuntimeError("cannot call items() if cache is not full")
198 return iter(self._by_name_cache.values())
200 def add_by_dimensions(self, dimensions: DimensionGroup, tables: DynamicTables) -> None:
201 """Add information about a set of dataset type dimensions to the cache.
203 Parameters
204 ----------
205 dimensions : `DimensionGroup`
206 Dimensions of one or more dataset types.
207 tables : `DynamicTables`
208 Additional opaque object stored with these dimensions.
209 """
210 self._by_dimensions_cache[dimensions] = tables
212 def get_by_dimensions(self, dimensions: DimensionGroup) -> DynamicTables | None:
213 """Get information about a set of dataset type dimensions.
215 Parameters
216 ----------
217 dimensions : `DimensionGroup`
218 Dimensions of one or more dataset types.
220 Returns
221 -------
222 tables : `DynamicTables` or `None`
223 Additional opaque object stored with these dimensions, or `None` if
224 these dimensions are not present in the cache.
225 """
226 return self._by_dimensions_cache.get(dimensions)