Coverage for python/lsst/daf/butler/tests/_testRepo.py: 14%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = [
24 "makeTestRepo",
25 "makeTestCollection",
26 "addDatasetType",
27 "expandUniqueId",
28 "DatastoreMock",
29 "addDataIdValue",
30]
32import random
33from typing import Any, Iterable, Mapping, Optional, Set, Tuple
34from unittest.mock import MagicMock
36import sqlalchemy
37from lsst.daf.butler import (
38 Butler,
39 Config,
40 DataCoordinate,
41 DatasetRef,
42 DatasetType,
43 Dimension,
44 DimensionUniverse,
45 FileDataset,
46 Registry,
47)
50def makeTestRepo(
51 root: str, dataIds: Optional[Mapping[str, Iterable]] = None, *, config: Config = None, **kwargs
52) -> Butler:
53 """Create an empty test repository.
55 Parameters
56 ----------
57 root : `str`
58 The location of the root directory for the repository.
59 dataIds : `~collections.abc.Mapping` [`str`, `iterable`], optional
60 A mapping keyed by the dimensions used in the test. Each value
61 is an iterable of names for that dimension (e.g., detector IDs for
62 `"detector"`). Related dimensions (e.g., instruments and detectors)
63 are linked arbitrarily. This parameter is provided for compatibility
64 with old code; newer code should make the repository, then call
65 `~lsst.daf.butler.tests.addDataIdValue`.
66 config : `lsst.daf.butler.Config`, optional
67 A configuration for the repository (for details, see
68 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository
69 with default dataset and storage types, but optimized for speed.
70 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and
71 ``.registry.db``. If a supplied config does not specify these values
72 the internal defaults will be used to ensure that we have a usable
73 configuration.
74 **kwargs
75 Extra arguments to `lsst.daf.butler.Butler.makeRepo`.
77 Returns
78 -------
79 butler : `lsst.daf.butler.Butler`
80 A Butler referring to the new repository. This Butler is provided only
81 for additional setup; to keep test cases isolated, it is highly
82 recommended that each test create its own Butler with a
83 unique run/collection. See `makeTestCollection`.
85 Notes
86 -----
87 This function provides a "quick and dirty" repository for simple unit
88 tests that don't depend on complex data relationships. It is ill-suited
89 for tests where the structure of the data matters. If you need such a
90 dataset, create it directly or use a saved test dataset.
91 """
92 defaults = Config()
93 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore"
94 defaults["datastore", "checksum"] = False # In case of future changes
95 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3"
97 if config:
98 defaults.update(config)
100 if not dataIds:
101 dataIds = {}
103 # Disable config root by default so that our registry override will
104 # not be ignored.
105 # newConfig guards against location-related keywords like outfile
106 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs)
107 butler = Butler(newConfig, writeable=True)
108 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions)
109 for dimension, records in dimensionRecords.items():
110 butler.registry.insertDimensionData(dimension, *records)
111 return butler
114def makeTestCollection(repo: Butler, uniqueId: Optional[str] = None) -> Butler:
115 """Create a read/write Butler to a fresh collection.
117 Parameters
118 ----------
119 repo : `lsst.daf.butler.Butler`
120 A previously existing Butler to a repository, such as that returned by
121 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`.
122 uniqueId : `str`, optional
123 A collection ID guaranteed by external code to be unique across all
124 calls to ``makeTestCollection`` for the same repository.
126 Returns
127 -------
128 butler : `lsst.daf.butler.Butler`
129 A Butler referring to a new collection in the repository at ``root``.
130 The collection is (almost) guaranteed to be new.
132 Notes
133 -----
134 This function creates a single run collection that does not necessarily
135 conform to any repository conventions. It is only suitable for creating an
136 isolated test area, and not for repositories intended for real data
137 processing or analysis.
138 """
139 if not uniqueId:
140 # Create a "random" collection name
141 # Speed matters more than cryptographic guarantees
142 uniqueId = str(random.randrange(1_000_000_000))
143 collection = "test_" + uniqueId
144 return Butler(butler=repo, run=collection)
147def _makeRecords(dataIds: Mapping[str, Iterable], universe: DimensionUniverse) -> Mapping[str, Iterable]:
148 """Create cross-linked dimension records from a collection of
149 data ID values.
151 Parameters
152 ----------
153 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
154 A mapping keyed by the dimensions of interest. Each value is an
155 iterable of names for that dimension (e.g., detector IDs for
156 `"detector"`).
157 universe : lsst.daf.butler.DimensionUniverse
158 Set of all known dimensions and their relationships.
160 Returns
161 -------
162 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
163 A mapping keyed by the dimensions of interest, giving one
164 `~lsst.daf.butler.DimensionRecord` for each input name. Related
165 dimensions (e.g., instruments and detectors) are linked arbitrarily.
166 """
167 expandedIds = {}
168 # Provide alternate keys like detector names
169 for name, values in dataIds.items():
170 expandedIds[name] = []
171 dimension = universe[name]
172 for value in values:
173 expandedIds[name].append(_fillAllKeys(dimension, value))
175 # Pick cross-relationships arbitrarily
176 for name, values in expandedIds.items():
177 dimension = universe[name]
178 for value in values:
179 for other in dimension.required:
180 if other != dimension:
181 relation = expandedIds[other.name][0]
182 value[other.name] = relation[other.primaryKey.name]
183 # Do not recurse, to keep the user from having to provide
184 # irrelevant dimensions
185 for other in dimension.implied:
186 if other != dimension and other.name in expandedIds and other.viewOf is None:
187 relation = expandedIds[other.name][0]
188 value[other.name] = relation[other.primaryKey.name]
190 return {
191 dimension: [universe[dimension].RecordClass(**value) for value in values]
192 for dimension, values in expandedIds.items()
193 }
196def _fillAllKeys(dimension: Dimension, value: Any) -> Mapping[str, Any]:
197 """Create an arbitrary mapping of all required keys for a given dimension
198 that do not refer to other dimensions.
200 Parameters
201 ----------
202 dimension : `lsst.daf.butler.Dimension`
203 The dimension for which to generate a set of keys (e.g., detector).
204 value
205 The value assigned to ``dimension`` (e.g., detector ID).
207 Returns
208 -------
209 expandedValue : `dict` [`str`]
210 A mapping of dimension keys to values. ``dimension's`` primary key
211 maps to ``value``, but all other mappings (e.g., detector name)
212 are arbitrary.
213 """
214 expandedValue = {}
215 for key in dimension.uniqueKeys:
216 if key.nbytes:
217 castType = bytes
218 else:
219 castType = key.dtype().python_type
220 try:
221 castValue = castType(value)
222 except TypeError:
223 castValue = castType()
224 expandedValue[key.name] = castValue
225 for key in dimension.metadata:
226 if not key.nullable:
227 expandedValue[key.name] = key.dtype().python_type(value)
228 return expandedValue
231def _matchAnyDataId(record: Mapping[str, Any], registry: Registry, dimension: Dimension):
232 """Matches a partial dimension record to an existing record along a
233 specific dimension.
235 Parameters
236 ----------
237 record : `dict` [`str`]
238 A mapping representing the record to be matched.
239 registry : `lsst.daf.butler.Registry`
240 The registry with all known dimension records.
241 dimension : `lsst.daf.butler.Dimension`
242 The dimension on which to find a match for ``record``.
244 Raises
245 ------
246 RuntimeError
247 Raised if there are no existing records for ``dimension``.
248 """
249 matches = list(registry.queryDimensionRecords(dimension.name))
250 if matches:
251 record[dimension.name] = matches[0].dataId[dimension.name]
252 else:
253 raise RuntimeError(f"No matching values for {dimension.name} found.")
256def _fillRelationships(
257 dimension: Dimension, dimensionInfo: Mapping[str, Any], existing: Registry
258) -> Mapping[str, Any]:
259 """Create arbitrary mappings from one dimension to all dimensions it
260 depends on.
262 Parameters
263 ----------
264 dimension : `lsst.daf.butler.Dimension`
265 The dimension for which to generate relationships.
266 dimensionInfo : `dict` [`str`]
267 A mapping of dimension keys to values.
268 existing : `lsst.daf.butler.Registry`
269 The registry with all previously registered dimensions.
271 Returns
272 -------
273 filledInfo : `dict` [`str`]
274 A version of ``dimensionInfo`` with extra mappings for any
275 relationships required by ``dimension``. Any relationships already
276 defined in ``dimensionInfo`` are preserved.
278 Raises
279 ------
280 ValueError
281 Raised if ``dimension`` depends on a dimension for which no values
282 exist yet.
283 """
284 filledInfo = dimensionInfo.copy()
285 for other in dimension.required:
286 if other != dimension and other.name not in filledInfo:
287 _matchAnyDataId(filledInfo, existing, other)
288 # Do not recurse, to keep the user from having to provide
289 # irrelevant dimensions.
290 for other in dimension.implied:
291 toUpdate = other != dimension and other.name not in filledInfo
292 updatable = other.viewOf is None
293 # Do not run query if either toUpdate or updatable is false
294 if toUpdate and updatable and list(existing.queryDimensionRecords(other)):
295 _matchAnyDataId(filledInfo, existing, other)
296 return filledInfo
299def expandUniqueId(butler: Butler, partialId: Mapping[str, Any]) -> DataCoordinate:
300 """Return a complete data ID matching some criterion.
302 Parameters
303 ----------
304 butler : `lsst.daf.butler.Butler`
305 The repository to query.
306 partialId : `~collections.abc.Mapping` [`str`]
307 A mapping of known dimensions and values.
309 Returns
310 -------
311 dataId : `lsst.daf.butler.DataCoordinate`
312 The unique data ID that matches ``partialId``.
314 Raises
315 ------
316 ValueError
317 Raised if ``partialId`` does not uniquely identify a data ID.
319 Notes
320 -----
321 This method will only work correctly if all dimensions attached to the
322 target dimension (eg., "physical_filter" for "visit") are known to the
323 repository, even if they're not needed to identify a dataset. This function
324 is only suitable for certain kinds of test repositories, and not for
325 repositories intended for real data processing or analysis.
327 Examples
328 --------
329 .. code-block:: py
331 >>> butler = makeTestRepo(
332 "testdir", {"instrument": ["notACam"], "detector": [1]})
333 >>> expandUniqueId(butler, {"detector": 1})
334 DataCoordinate({instrument, detector}, ('notACam', 1))
335 """
336 # The example is *not* a doctest because it requires dangerous I/O
337 registry = butler.registry
338 dimensions = registry.dimensions.extract(partialId.keys()).required
340 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items())
342 # Much of the purpose of this function is to do something we explicitly
343 # reject most of the time: query for a governor dimension (e.g. instrument)
344 # given something that depends on it (e.g. visit), hence check=False.
345 dataId = list(registry.queryDataIds(dimensions, where=query, check=False))
346 if len(dataId) == 1:
347 return dataId[0]
348 else:
349 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.")
352def addDataIdValue(butler: Butler, dimension: str, value: Any, **related: Any):
353 """Add a new data ID to a repository.
355 Related dimensions (e.g., the instrument associated with a detector) may
356 be specified using ``related``. While these keywords are sometimes needed
357 to get self-consistent repositories, you do not need to define
358 relationships you do not use. Any unspecified dimensions will be
359 linked arbitrarily.
361 Parameters
362 ----------
363 butler : `lsst.daf.butler.Butler`
364 The repository to update.
365 dimension : `str`
366 The name of the dimension to gain a new value.
367 value
368 The value to register for the dimension.
369 **related
370 Any existing dimensions to be linked to ``value``.
372 Notes
373 -----
374 Because this function creates filler data, it is only suitable for test
375 repositories. It should not be used for repositories intended for real data
376 processing or analysis, which have known dimension values.
378 Examples
379 --------
381 See the guide on :ref:`using-butler-in-tests-make-repo` for usage examples.
382 """
383 # Example is not doctest, because it's probably unsafe to create even an
384 # in-memory butler in that environment.
385 try:
386 fullDimension = butler.registry.dimensions[dimension]
387 except KeyError as e:
388 raise ValueError from e
389 # Bad keys ignored by registry code
390 extraKeys = related.keys() - (fullDimension.required | fullDimension.implied)
391 if extraKeys:
392 raise ValueError(
393 f"Unexpected keywords {extraKeys} not found "
394 f"in {fullDimension.required | fullDimension.implied}"
395 )
397 # Define secondary keys (e.g., detector name given detector id)
398 expandedValue = _fillAllKeys(fullDimension, value)
399 expandedValue.update(**related)
400 completeValue = _fillRelationships(fullDimension, expandedValue, butler.registry)
402 dimensionRecord = fullDimension.RecordClass(**completeValue)
403 try:
404 butler.registry.syncDimensionData(dimension, dimensionRecord)
405 except sqlalchemy.exc.IntegrityError as e:
406 raise RuntimeError(
407 "Could not create data ID value. Automatic relationship generation "
408 "may have failed; try adding keywords to assign a specific instrument, "
409 "physical_filter, etc. based on the nested exception message."
410 ) from e
413def addDatasetType(butler: Butler, name: str, dimensions: Set[str], storageClass: str) -> DatasetType:
414 """Add a new dataset type to a repository.
416 Parameters
417 ----------
418 butler : `lsst.daf.butler.Butler`
419 The repository to update.
420 name : `str`
421 The name of the dataset type.
422 dimensions : `set` [`str`]
423 The dimensions of the new dataset type.
424 storageClass : `str`
425 The storage class the dataset will use.
427 Returns
428 -------
429 datasetType : `lsst.daf.butler.DatasetType`
430 The new type.
432 Raises
433 ------
434 ValueError
435 Raised if the dimensions or storage class is invalid.
437 Notes
438 -----
439 Dataset types are shared across all collections in a repository, so this
440 function does not need to be run for each collection.
441 """
442 try:
443 datasetType = DatasetType(name, dimensions, storageClass, universe=butler.registry.dimensions)
444 butler.registry.registerDatasetType(datasetType)
445 return datasetType
446 except KeyError as e:
447 raise ValueError from e
450class DatastoreMock:
451 """Mocks a butler datastore.
453 Has functions that mock the datastore in a butler. Provides an `apply`
454 function to replace the relevent butler datastore functions with the mock
455 functions.
456 """
458 @staticmethod
459 def apply(butler):
460 """Apply datastore mocks to a butler."""
461 butler.datastore.export = DatastoreMock._mock_export
462 butler.datastore.get = DatastoreMock._mock_get
463 butler.datastore.ingest = MagicMock()
465 @staticmethod
466 def _mock_export(
467 refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
468 ) -> Iterable[FileDataset]:
469 """A mock of `Datastore.export` that satisfies the requirement that
470 the refs passed in are included in the `FileDataset` objects
471 returned.
473 This can be used to construct a `Datastore` mock that can be used
474 in repository export via::
476 datastore = unittest.mock.Mock(spec=Datastore)
477 datastore.export = DatastoreMock._mock_export
479 """
480 for ref in refs:
481 yield FileDataset(
482 refs=[ref], path="mock/path", formatter="lsst.daf.butler.formatters.json.JsonFormatter"
483 )
485 @staticmethod
486 def _mock_get(
487 ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None
488 ) -> Tuple[int, Optional[Mapping[str, Any]]]:
489 """A mock of `Datastore.get` that just returns the integer dataset ID
490 value and parameters it was given.
491 """
492 return (ref.id, parameters)