Coverage for python/lsst/daf/butler/tests/_testRepo.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId", "DatastoreMock"]
25import random
26from typing import (
27 Any,
28 Iterable,
29 Mapping,
30 Optional,
31 Tuple,
32)
33from unittest.mock import MagicMock
35from lsst.daf.butler import (
36 Butler,
37 Config,
38 DatasetRef,
39 DatasetType,
40 FileDataset,
41)
44def makeTestRepo(root, dataIds, *, config=None, **kwargs):
45 """Create an empty repository with dummy data IDs.
47 Parameters
48 ----------
49 root : `str`
50 The location of the root directory for the repository.
51 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
52 A mapping keyed by the dimensions used in the test. Each value
53 is an iterable of names for that dimension (e.g., detector IDs for
54 `"detector"`). Related dimensions (e.g., instruments and detectors)
55 are linked arbitrarily.
56 config : `lsst.daf.butler.Config`, optional
57 A configuration for the repository (for details, see
58 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository
59 with default dataset and storage types, but optimized for speed.
60 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and
61 ``.registry.db``. If a supplied config does not specify these values
62 the internal defaults will be used to ensure that we have a usable
63 configuration.
64 **kwargs
65 Extra arguments to `lsst.daf.butler.Butler.makeRepo`.
67 Returns
68 -------
69 butler : `lsst.daf.butler.Butler`
70 A Butler referring to the new repository. This Butler is provided only
71 for additional setup; to keep test cases isolated, it is highly
72 recommended that each test create its own Butler with a
73 unique run/collection. See `makeTestCollection`.
75 Notes
76 -----
77 This function provides a "quick and dirty" repository for simple unit
78 tests that don't depend on complex data relationships. Because it assigns
79 dimension relationships and other metadata abitrarily, it is ill-suited
80 for tests where the structure of the data matters. If you need such a
81 dataset, create it directly or use a saved test dataset.
83 Since the values in ``dataIds`` uniquely determine the repository's
84 data IDs, the fully linked IDs can be recovered by calling
85 `expandUniqueId`, so long as no other code has inserted dimensions into
86 the repository registry.
87 """
88 defaults = Config()
89 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore"
90 defaults["datastore", "checksum"] = False # In case of future changes
91 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3"
93 if config:
94 defaults.update(config)
96 # Disable config root by default so that our registry override will
97 # not be ignored.
98 # newConfig guards against location-related keywords like outfile
99 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs)
100 butler = Butler(newConfig, writeable=True)
101 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions)
102 for dimension, records in dimensionRecords.items():
103 butler.registry.insertDimensionData(dimension, *records)
104 return butler
107def makeTestCollection(repo):
108 """Create a read/write Butler to a fresh collection.
110 Parameters
111 ----------
112 repo : `lsst.daf.butler.Butler`
113 A previously existing Butler to a repository, such as that returned by
114 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`.
116 Returns
117 -------
118 butler : `lsst.daf.butler.Butler`
119 A Butler referring to a new collection in the repository at ``root``.
120 The collection is (almost) guaranteed to be new.
121 """
122 # Create a "random" collection name
123 # Speed matters more than cryptographic guarantees
124 collection = "test" + str(random.randrange(1_000_000_000))
125 return Butler(butler=repo, run=collection)
128def _makeRecords(dataIds, universe):
129 """Create cross-linked dimension records from a collection of
130 data ID values.
132 Parameters
133 ----------
134 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
135 A mapping keyed by the dimensions of interest. Each value is an
136 iterable of names for that dimension (e.g., detector IDs for
137 `"detector"`).
138 universe : lsst.daf.butler.DimensionUniverse
139 Set of all known dimensions and their relationships.
141 Returns
142 -------
143 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
144 A mapping keyed by the dimensions of interest, giving one
145 `~lsst.daf.butler.DimensionRecord` for each input name. Related
146 dimensions (e.g., instruments and detectors) are linked arbitrarily.
147 """
148 expandedIds = {}
149 # Provide alternate keys like detector names
150 for name, values in dataIds.items():
151 expandedIds[name] = []
152 dimension = universe[name]
153 for value in values:
154 expandedValue = {}
155 for key in dimension.uniqueKeys:
156 if key.nbytes:
157 castType = bytes
158 else:
159 castType = key.dtype().python_type
160 try:
161 castValue = castType(value)
162 except TypeError:
163 castValue = castType()
164 expandedValue[key.name] = castValue
165 for key in dimension.metadata:
166 if not key.nullable:
167 expandedValue[key.name] = key.dtype().python_type(value)
168 expandedIds[name].append(expandedValue)
170 # Pick cross-relationships arbitrarily
171 for name, values in expandedIds.items():
172 dimension = universe[name]
173 for value in values:
174 for other in dimension.required:
175 if other != dimension:
176 relation = expandedIds[other.name][0]
177 value[other.name] = relation[other.primaryKey.name]
178 # Do not recurse, to keep the user from having to provide
179 # irrelevant dimensions
180 for other in dimension.implied:
181 if other != dimension and other.name in expandedIds and other.viewOf is None:
182 relation = expandedIds[other.name][0]
183 value[other.name] = relation[other.primaryKey.name]
185 return {dimension: [universe[dimension].RecordClass(**value) for value in values]
186 for dimension, values in expandedIds.items()}
189def expandUniqueId(butler, partialId):
190 """Return a complete data ID matching some criterion.
192 Parameters
193 ----------
194 butler : `lsst.daf.butler.Butler`
195 The repository to query.
196 partialId : `~collections.abc.Mapping` [`str`, any]
197 A mapping of known dimensions and values.
199 Returns
200 -------
201 dataId : `lsst.daf.butler.DataCoordinate`
202 The unique data ID that matches ``partialId``.
204 Raises
205 ------
206 ValueError
207 Raised if ``partialId`` does not uniquely identify a data ID.
209 Notes
210 -----
211 This method will only work correctly if all dimensions attached to the
212 target dimension (eg., "physical_filter" for "visit") are known to the
213 repository, even if they're not needed to identify a dataset.
215 Examples
216 --------
217 .. code-block:: py
219 >>> butler = makeTestRepo(
220 "testdir", {"instrument": ["notACam"], "detector": [1]})
221 >>> expandUniqueId(butler, {"detector": 1})
222 DataCoordinate({instrument, detector}, ('notACam', 1))
223 """
224 # The example is *not* a doctest because it requires dangerous I/O
225 registry = butler.registry
226 dimensions = registry.dimensions.extract(partialId.keys()).required
228 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items())
230 # Much of the purpose of this function is to do something we explicitly
231 # reject most of the time: query for a governor dimension (e.g. instrument)
232 # given something that depends on it (e.g. visit), hence check=False.
233 dataId = list(registry.queryDataIds(dimensions, where=query, check=False))
234 if len(dataId) == 1:
235 return dataId[0]
236 else:
237 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.")
240def addDatasetType(butler, name, dimensions, storageClass):
241 """Add a new dataset type to a repository.
243 Parameters
244 ----------
245 butler : `lsst.daf.butler.Butler`
246 The repository to update.
247 name : `str`
248 The name of the dataset type.
249 dimensions : `set` [`str`]
250 The dimensions of the new dataset type.
251 storageClass : `str`
252 The storage class the dataset will use.
254 Returns
255 -------
256 datasetType : `lsst.daf.butler.DatasetType`
257 The new type.
259 Raises
260 ------
261 ValueError
262 Raised if the dimensions or storage class is invalid.
264 Notes
265 -----
266 Dataset types are shared across all collections in a repository, so this
267 function does not need to be run for each collection.
268 """
269 try:
270 datasetType = DatasetType(name, dimensions, storageClass,
271 universe=butler.registry.dimensions)
272 butler.registry.registerDatasetType(datasetType)
273 return datasetType
274 except KeyError as e:
275 raise ValueError from e
278class DatastoreMock:
279 """Mocks a butler datastore.
281 Has functions that mock the datastore in a butler. Provides an `apply`
282 function to replace the relevent butler datastore functions with the mock
283 functions.
284 """
286 @staticmethod
287 def apply(butler):
288 """Apply datastore mocks to a butler."""
289 butler.datastore.export = DatastoreMock._mock_export
290 butler.datastore.get = DatastoreMock._mock_get
291 butler.datastore.ingest = MagicMock()
293 @staticmethod
294 def _mock_export(refs: Iterable[DatasetRef], *,
295 directory: Optional[str] = None,
296 transfer: Optional[str] = None) -> Iterable[FileDataset]:
297 """A mock of `Datastore.export` that satisfies the requirement that
298 the refs passed in are included in the `FileDataset` objects
299 returned.
301 This can be used to construct a `Datastore` mock that can be used
302 in repository export via::
304 datastore = unittest.mock.Mock(spec=Datastore)
305 datastore.export = DatastoreMock._mock_export
307 """
308 for ref in refs:
309 yield FileDataset(refs=[ref],
310 path="mock/path",
311 formatter="lsst.daf.butler.formatters.json.JsonFormatter")
313 @staticmethod
314 def _mock_get(ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None
315 ) -> Tuple[int, Optional[Mapping[str, Any]]]:
316 """A mock of `Datastore.get` that just returns the integer dataset ID
317 value and parameters it was given.
318 """
319 return (ref.id, parameters)