Coverage for python/lsst/daf/butler/tests/_testRepo.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId", "DatastoreMock",
24 "addDataIdValue",
25 ]
27import random
28from typing import (
29 Any,
30 Iterable,
31 Mapping,
32 Optional,
33 Set,
34 Tuple,
35)
36from unittest.mock import MagicMock
38import sqlalchemy
40from lsst.daf.butler import (
41 Butler,
42 Config,
43 DataCoordinate,
44 DatasetRef,
45 DatasetType,
46 Dimension,
47 DimensionUniverse,
48 FileDataset,
49 Registry,
50)
53def makeTestRepo(root: str,
54 dataIds: Optional[Mapping[str, Iterable]] = None, *,
55 config: Config = None,
56 **kwargs) -> Butler:
57 """Create an empty test repository.
59 Parameters
60 ----------
61 root : `str`
62 The location of the root directory for the repository.
63 dataIds : `~collections.abc.Mapping` [`str`, `iterable`], optional
64 A mapping keyed by the dimensions used in the test. Each value
65 is an iterable of names for that dimension (e.g., detector IDs for
66 `"detector"`). Related dimensions (e.g., instruments and detectors)
67 are linked arbitrarily. This parameter is provided for compatibility
68 with old code; newer code should make the repository, then call
69 `~lsst.daf.butler.tests.addDataIdValue`.
70 config : `lsst.daf.butler.Config`, optional
71 A configuration for the repository (for details, see
72 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository
73 with default dataset and storage types, but optimized for speed.
74 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and
75 ``.registry.db``. If a supplied config does not specify these values
76 the internal defaults will be used to ensure that we have a usable
77 configuration.
78 **kwargs
79 Extra arguments to `lsst.daf.butler.Butler.makeRepo`.
81 Returns
82 -------
83 butler : `lsst.daf.butler.Butler`
84 A Butler referring to the new repository. This Butler is provided only
85 for additional setup; to keep test cases isolated, it is highly
86 recommended that each test create its own Butler with a
87 unique run/collection. See `makeTestCollection`.
89 Notes
90 -----
91 This function provides a "quick and dirty" repository for simple unit
92 tests that don't depend on complex data relationships. It is ill-suited
93 for tests where the structure of the data matters. If you need such a
94 dataset, create it directly or use a saved test dataset.
95 """
96 defaults = Config()
97 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore"
98 defaults["datastore", "checksum"] = False # In case of future changes
99 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3"
101 if config:
102 defaults.update(config)
104 if not dataIds:
105 dataIds = {}
107 # Disable config root by default so that our registry override will
108 # not be ignored.
109 # newConfig guards against location-related keywords like outfile
110 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs)
111 butler = Butler(newConfig, writeable=True)
112 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions)
113 for dimension, records in dimensionRecords.items():
114 butler.registry.insertDimensionData(dimension, *records)
115 return butler
118def makeTestCollection(repo: Butler, uniqueId: Optional[str] = None) -> Butler:
119 """Create a read/write Butler to a fresh collection.
121 Parameters
122 ----------
123 repo : `lsst.daf.butler.Butler`
124 A previously existing Butler to a repository, such as that returned by
125 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`.
126 uniqueId : `str`, optional
127 A collection ID guaranteed by external code to be unique across all
128 calls to ``makeTestCollection`` for the same repository.
130 Returns
131 -------
132 butler : `lsst.daf.butler.Butler`
133 A Butler referring to a new collection in the repository at ``root``.
134 The collection is (almost) guaranteed to be new.
136 Notes
137 -----
138 This function creates a single run collection that does not necessarily
139 conform to any repository conventions. It is only suitable for creating an
140 isolated test area, and not for repositories intended for real data
141 processing or analysis.
142 """
143 if not uniqueId:
144 # Create a "random" collection name
145 # Speed matters more than cryptographic guarantees
146 uniqueId = str(random.randrange(1_000_000_000))
147 collection = "test_" + uniqueId
148 return Butler(butler=repo, run=collection)
151def _makeRecords(dataIds: Mapping[str, Iterable],
152 universe: DimensionUniverse) -> Mapping[str, Iterable]:
153 """Create cross-linked dimension records from a collection of
154 data ID values.
156 Parameters
157 ----------
158 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
159 A mapping keyed by the dimensions of interest. Each value is an
160 iterable of names for that dimension (e.g., detector IDs for
161 `"detector"`).
162 universe : lsst.daf.butler.DimensionUniverse
163 Set of all known dimensions and their relationships.
165 Returns
166 -------
167 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
168 A mapping keyed by the dimensions of interest, giving one
169 `~lsst.daf.butler.DimensionRecord` for each input name. Related
170 dimensions (e.g., instruments and detectors) are linked arbitrarily.
171 """
172 expandedIds = {}
173 # Provide alternate keys like detector names
174 for name, values in dataIds.items():
175 expandedIds[name] = []
176 dimension = universe[name]
177 for value in values:
178 expandedIds[name].append(_fillAllKeys(dimension, value))
180 # Pick cross-relationships arbitrarily
181 for name, values in expandedIds.items():
182 dimension = universe[name]
183 for value in values:
184 for other in dimension.required:
185 if other != dimension:
186 relation = expandedIds[other.name][0]
187 value[other.name] = relation[other.primaryKey.name]
188 # Do not recurse, to keep the user from having to provide
189 # irrelevant dimensions
190 for other in dimension.implied:
191 if other != dimension and other.name in expandedIds and other.viewOf is None:
192 relation = expandedIds[other.name][0]
193 value[other.name] = relation[other.primaryKey.name]
195 return {dimension: [universe[dimension].RecordClass(**value) for value in values]
196 for dimension, values in expandedIds.items()}
199def _fillAllKeys(dimension: Dimension, value: Any) -> Mapping[str, Any]:
200 """Create an arbitrary mapping of all required keys for a given dimension
201 that do not refer to other dimensions.
203 Parameters
204 ----------
205 dimension : `lsst.daf.butler.Dimension`
206 The dimension for which to generate a set of keys (e.g., detector).
207 value
208 The value assigned to ``dimension`` (e.g., detector ID).
210 Returns
211 -------
212 expandedValue : `dict` [`str`]
213 A mapping of dimension keys to values. ``dimension's`` primary key
214 maps to ``value``, but all other mappings (e.g., detector name)
215 are arbitrary.
216 """
217 expandedValue = {}
218 for key in dimension.uniqueKeys:
219 if key.nbytes:
220 castType = bytes
221 else:
222 castType = key.dtype().python_type
223 try:
224 castValue = castType(value)
225 except TypeError:
226 castValue = castType()
227 expandedValue[key.name] = castValue
228 for key in dimension.metadata:
229 if not key.nullable:
230 expandedValue[key.name] = key.dtype().python_type(value)
231 return expandedValue
234def _matchAnyDataId(record: Mapping[str, Any], registry: Registry, dimension: Dimension):
235 """Matches a partial dimension record to an existing record along a
236 specific dimension.
238 Parameters
239 ----------
240 record : `dict` [`str`]
241 A mapping representing the record to be matched.
242 registry : `lsst.daf.butler.Registry`
243 The registry with all known dimension records.
244 dimension : `lsst.daf.butler.Dimension`
245 The dimension on which to find a match for ``record``.
247 Raises
248 ------
249 RuntimeError
250 Raised if there are no existing records for ``dimension``.
251 """
252 matches = list(registry.queryDimensionRecords(dimension.name))
253 if matches:
254 record[dimension.name] = matches[0].dataId[dimension.name]
255 else:
256 raise RuntimeError(f"No matching values for {dimension.name} found.")
259def _fillRelationships(dimension: Dimension,
260 dimensionInfo: Mapping[str, Any],
261 existing: Registry) -> Mapping[str, Any]:
262 """Create arbitrary mappings from one dimension to all dimensions it
263 depends on.
265 Parameters
266 ----------
267 dimension : `lsst.daf.butler.Dimension`
268 The dimension for which to generate relationships.
269 dimensionInfo : `dict` [`str`]
270 A mapping of dimension keys to values.
271 existing : `lsst.daf.butler.Registry`
272 The registry with all previously registered dimensions.
274 Returns
275 -------
276 filledInfo : `dict` [`str`]
277 A version of ``dimensionInfo`` with extra mappings for any
278 relationships required by ``dimension``. Any relationships already
279 defined in ``dimensionInfo`` are preserved.
281 Raises
282 ------
283 ValueError
284 Raised if ``dimension`` depends on a dimension for which no values
285 exist yet.
286 """
287 filledInfo = dimensionInfo.copy()
288 for other in dimension.required:
289 if other != dimension and other.name not in filledInfo:
290 _matchAnyDataId(filledInfo, existing, other)
291 # Do not recurse, to keep the user from having to provide
292 # irrelevant dimensions.
293 for other in dimension.implied:
294 toUpdate = other != dimension and other.name not in filledInfo
295 updatable = other.viewOf is None
296 # Do not run query if either toUpdate or updatable is false
297 if toUpdate and updatable and list(existing.queryDimensionRecords(other)):
298 _matchAnyDataId(filledInfo, existing, other)
299 return filledInfo
302def expandUniqueId(butler: Butler, partialId: Mapping[str, Any]) -> DataCoordinate:
303 """Return a complete data ID matching some criterion.
305 Parameters
306 ----------
307 butler : `lsst.daf.butler.Butler`
308 The repository to query.
309 partialId : `~collections.abc.Mapping` [`str`]
310 A mapping of known dimensions and values.
312 Returns
313 -------
314 dataId : `lsst.daf.butler.DataCoordinate`
315 The unique data ID that matches ``partialId``.
317 Raises
318 ------
319 ValueError
320 Raised if ``partialId`` does not uniquely identify a data ID.
322 Notes
323 -----
324 This method will only work correctly if all dimensions attached to the
325 target dimension (eg., "physical_filter" for "visit") are known to the
326 repository, even if they're not needed to identify a dataset. This function
327 is only suitable for certain kinds of test repositories, and not for
328 repositories intended for real data processing or analysis.
330 Examples
331 --------
332 .. code-block:: py
334 >>> butler = makeTestRepo(
335 "testdir", {"instrument": ["notACam"], "detector": [1]})
336 >>> expandUniqueId(butler, {"detector": 1})
337 DataCoordinate({instrument, detector}, ('notACam', 1))
338 """
339 # The example is *not* a doctest because it requires dangerous I/O
340 registry = butler.registry
341 dimensions = registry.dimensions.extract(partialId.keys()).required
343 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items())
345 # Much of the purpose of this function is to do something we explicitly
346 # reject most of the time: query for a governor dimension (e.g. instrument)
347 # given something that depends on it (e.g. visit), hence check=False.
348 dataId = list(registry.queryDataIds(dimensions, where=query, check=False))
349 if len(dataId) == 1:
350 return dataId[0]
351 else:
352 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.")
355def addDataIdValue(butler: Butler, dimension: str, value: Any, **related: Any):
356 """Add a new data ID to a repository.
358 Related dimensions (e.g., the instrument associated with a detector) may
359 be specified using ``related``. While these keywords are sometimes needed
360 to get self-consistent repositories, you do not need to define
361 relationships you do not use. Any unspecified dimensions will be
362 linked arbitrarily.
364 Parameters
365 ----------
366 butler : `lsst.daf.butler.Butler`
367 The repository to update.
368 dimension : `str`
369 The name of the dimension to gain a new value.
370 value
371 The value to register for the dimension.
372 **related
373 Any existing dimensions to be linked to ``value``.
375 Notes
376 -----
377 Because this function creates filler data, it is only suitable for test
378 repositories. It should not be used for repositories intended for real data
379 processing or analysis, which have known dimension values.
381 Examples
382 --------
384 See the guide on :ref:`using-butler-in-tests-make-repo` for usage examples.
385 """
386 # Example is not doctest, because it's probably unsafe to create even an
387 # in-memory butler in that environment.
388 try:
389 fullDimension = butler.registry.dimensions[dimension]
390 except KeyError as e:
391 raise ValueError from e
392 # Bad keys ignored by registry code
393 extraKeys = related.keys() - (fullDimension.required | fullDimension.implied)
394 if extraKeys:
395 raise ValueError(f"Unexpected keywords {extraKeys} not found "
396 f"in {fullDimension.required | fullDimension.implied}")
398 # Define secondary keys (e.g., detector name given detector id)
399 expandedValue = _fillAllKeys(fullDimension, value)
400 expandedValue.update(**related)
401 completeValue = _fillRelationships(fullDimension, expandedValue, butler.registry)
403 dimensionRecord = fullDimension.RecordClass(**completeValue)
404 try:
405 butler.registry.syncDimensionData(dimension, dimensionRecord)
406 except sqlalchemy.exc.IntegrityError as e:
407 raise RuntimeError("Could not create data ID value. Automatic relationship generation "
408 "may have failed; try adding keywords to assign a specific instrument, "
409 "physical_filter, etc. based on the nested exception message.") from e
412def addDatasetType(butler: Butler, name: str, dimensions: Set[str], storageClass: str) -> DatasetType:
413 """Add a new dataset type to a repository.
415 Parameters
416 ----------
417 butler : `lsst.daf.butler.Butler`
418 The repository to update.
419 name : `str`
420 The name of the dataset type.
421 dimensions : `set` [`str`]
422 The dimensions of the new dataset type.
423 storageClass : `str`
424 The storage class the dataset will use.
426 Returns
427 -------
428 datasetType : `lsst.daf.butler.DatasetType`
429 The new type.
431 Raises
432 ------
433 ValueError
434 Raised if the dimensions or storage class is invalid.
436 Notes
437 -----
438 Dataset types are shared across all collections in a repository, so this
439 function does not need to be run for each collection.
440 """
441 try:
442 datasetType = DatasetType(name, dimensions, storageClass,
443 universe=butler.registry.dimensions)
444 butler.registry.registerDatasetType(datasetType)
445 return datasetType
446 except KeyError as e:
447 raise ValueError from e
450class DatastoreMock:
451 """Mocks a butler datastore.
453 Has functions that mock the datastore in a butler. Provides an `apply`
454 function to replace the relevent butler datastore functions with the mock
455 functions.
456 """
458 @staticmethod
459 def apply(butler):
460 """Apply datastore mocks to a butler."""
461 butler.datastore.export = DatastoreMock._mock_export
462 butler.datastore.get = DatastoreMock._mock_get
463 butler.datastore.ingest = MagicMock()
465 @staticmethod
466 def _mock_export(refs: Iterable[DatasetRef], *,
467 directory: Optional[str] = None,
468 transfer: Optional[str] = None) -> Iterable[FileDataset]:
469 """A mock of `Datastore.export` that satisfies the requirement that
470 the refs passed in are included in the `FileDataset` objects
471 returned.
473 This can be used to construct a `Datastore` mock that can be used
474 in repository export via::
476 datastore = unittest.mock.Mock(spec=Datastore)
477 datastore.export = DatastoreMock._mock_export
479 """
480 for ref in refs:
481 yield FileDataset(refs=[ref],
482 path="mock/path",
483 formatter="lsst.daf.butler.formatters.json.JsonFormatter")
485 @staticmethod
486 def _mock_get(ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None
487 ) -> Tuple[int, Optional[Mapping[str, Any]]]:
488 """A mock of `Datastore.get` that just returns the integer dataset ID
489 value and parameters it was given.
490 """
491 return (ref.id, parameters)