Coverage for python/lsst/daf/butler/tests/_testRepo.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId", "DatastoreMock",
24 "addDataIdValue",
25 ]
27import random
28from typing import (
29 Any,
30 Iterable,
31 Mapping,
32 Optional,
33 Set,
34 Tuple,
35)
36from unittest.mock import MagicMock
38import sqlalchemy
40from lsst.daf.butler import (
41 Butler,
42 Config,
43 DataCoordinate,
44 DatasetRef,
45 DatasetType,
46 Dimension,
47 DimensionUniverse,
48 FileDataset,
49 Registry,
50)
53def makeTestRepo(root: str,
54 dataIds: Optional[Mapping[str, Iterable]] = None, *,
55 config: Config = None,
56 **kwargs) -> Butler:
57 """Create an empty test repository.
59 Parameters
60 ----------
61 root : `str`
62 The location of the root directory for the repository.
63 dataIds : `~collections.abc.Mapping` [`str`, `iterable`], optional
64 A mapping keyed by the dimensions used in the test. Each value
65 is an iterable of names for that dimension (e.g., detector IDs for
66 `"detector"`). Related dimensions (e.g., instruments and detectors)
67 are linked arbitrarily. This parameter is provided for compatibility
68 with old code; newer code should make the repository, then call
69 `~lsst.daf.butler.tests.addDataIdValue`.
70 config : `lsst.daf.butler.Config`, optional
71 A configuration for the repository (for details, see
72 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository
73 with default dataset and storage types, but optimized for speed.
74 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and
75 ``.registry.db``. If a supplied config does not specify these values
76 the internal defaults will be used to ensure that we have a usable
77 configuration.
78 **kwargs
79 Extra arguments to `lsst.daf.butler.Butler.makeRepo`.
81 Returns
82 -------
83 butler : `lsst.daf.butler.Butler`
84 A Butler referring to the new repository. This Butler is provided only
85 for additional setup; to keep test cases isolated, it is highly
86 recommended that each test create its own Butler with a
87 unique run/collection. See `makeTestCollection`.
89 Notes
90 -----
91 This function provides a "quick and dirty" repository for simple unit
92 tests that don't depend on complex data relationships. It is ill-suited
93 for tests where the structure of the data matters. If you need such a
94 dataset, create it directly or use a saved test dataset.
95 """
96 defaults = Config()
97 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore"
98 defaults["datastore", "checksum"] = False # In case of future changes
99 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3"
101 if config:
102 defaults.update(config)
104 if not dataIds:
105 dataIds = {}
107 # Disable config root by default so that our registry override will
108 # not be ignored.
109 # newConfig guards against location-related keywords like outfile
110 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs)
111 butler = Butler(newConfig, writeable=True)
112 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions)
113 for dimension, records in dimensionRecords.items():
114 butler.registry.insertDimensionData(dimension, *records)
115 return butler
118def makeTestCollection(repo: Butler) -> Butler:
119 """Create a read/write Butler to a fresh collection.
121 Parameters
122 ----------
123 repo : `lsst.daf.butler.Butler`
124 A previously existing Butler to a repository, such as that returned by
125 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`.
127 Returns
128 -------
129 butler : `lsst.daf.butler.Butler`
130 A Butler referring to a new collection in the repository at ``root``.
131 The collection is (almost) guaranteed to be new.
133 Notes
134 -----
135 This function creates a single run collection that does not necessarily
136 conform to any repository conventions. It is only suitable for creating an
137 isolated test area, and not for repositories intended for real data
138 processing or analysis.
139 """
140 # Create a "random" collection name
141 # Speed matters more than cryptographic guarantees
142 collection = "test" + str(random.randrange(1_000_000_000))
143 return Butler(butler=repo, run=collection)
146def _makeRecords(dataIds: Mapping[str, Iterable],
147 universe: DimensionUniverse) -> Mapping[str, Iterable]:
148 """Create cross-linked dimension records from a collection of
149 data ID values.
151 Parameters
152 ----------
153 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
154 A mapping keyed by the dimensions of interest. Each value is an
155 iterable of names for that dimension (e.g., detector IDs for
156 `"detector"`).
157 universe : lsst.daf.butler.DimensionUniverse
158 Set of all known dimensions and their relationships.
160 Returns
161 -------
162 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
163 A mapping keyed by the dimensions of interest, giving one
164 `~lsst.daf.butler.DimensionRecord` for each input name. Related
165 dimensions (e.g., instruments and detectors) are linked arbitrarily.
166 """
167 expandedIds = {}
168 # Provide alternate keys like detector names
169 for name, values in dataIds.items():
170 expandedIds[name] = []
171 dimension = universe[name]
172 for value in values:
173 expandedIds[name].append(_fillAllKeys(dimension, value))
175 # Pick cross-relationships arbitrarily
176 for name, values in expandedIds.items():
177 dimension = universe[name]
178 for value in values:
179 for other in dimension.required:
180 if other != dimension:
181 relation = expandedIds[other.name][0]
182 value[other.name] = relation[other.primaryKey.name]
183 # Do not recurse, to keep the user from having to provide
184 # irrelevant dimensions
185 for other in dimension.implied:
186 if other != dimension and other.name in expandedIds and other.viewOf is None:
187 relation = expandedIds[other.name][0]
188 value[other.name] = relation[other.primaryKey.name]
190 return {dimension: [universe[dimension].RecordClass(**value) for value in values]
191 for dimension, values in expandedIds.items()}
194def _fillAllKeys(dimension: Dimension, value: Any) -> Mapping[str, Any]:
195 """Create an arbitrary mapping of all required keys for a given dimension
196 that do not refer to other dimensions.
198 Parameters
199 ----------
200 dimension : `lsst.daf.butler.Dimension`
201 The dimension for which to generate a set of keys (e.g., detector).
202 value
203 The value assigned to ``dimension`` (e.g., detector ID).
205 Returns
206 -------
207 expandedValue : `dict` [`str`]
208 A mapping of dimension keys to values. ``dimension's`` primary key
209 maps to ``value``, but all other mappings (e.g., detector name)
210 are arbitrary.
211 """
212 expandedValue = {}
213 for key in dimension.uniqueKeys:
214 if key.nbytes:
215 castType = bytes
216 else:
217 castType = key.dtype().python_type
218 try:
219 castValue = castType(value)
220 except TypeError:
221 castValue = castType()
222 expandedValue[key.name] = castValue
223 for key in dimension.metadata:
224 if not key.nullable:
225 expandedValue[key.name] = key.dtype().python_type(value)
226 return expandedValue
229def _matchAnyDataId(record: Mapping[str, Any], registry: Registry, dimension: Dimension):
230 """Matches a partial dimension record to an existing record along a
231 specific dimension.
233 Parameters
234 ----------
235 record : `dict` [`str`]
236 A mapping representing the record to be matched.
237 registry : `lsst.daf.butler.Registry`
238 The registry with all known dimension records.
239 dimension : `lsst.daf.butler.Dimension`
240 The dimension on which to find a match for ``record``.
242 Raises
243 ------
244 RuntimeError
245 Raised if there are no existing records for ``dimension``.
246 """
247 matches = list(registry.queryDimensionRecords(dimension.name))
248 if matches:
249 record[dimension.name] = matches[0].dataId[dimension.name]
250 else:
251 raise RuntimeError(f"No matching values for {dimension.name} found.")
254def _fillRelationships(dimension: Dimension,
255 dimensionInfo: Mapping[str, Any],
256 existing: Registry) -> Mapping[str, Any]:
257 """Create arbitrary mappings from one dimension to all dimensions it
258 depends on.
260 Parameters
261 ----------
262 dimension : `lsst.daf.butler.Dimension`
263 The dimension for which to generate relationships.
264 dimensionInfo : `dict` [`str`]
265 A mapping of dimension keys to values.
266 existing : `lsst.daf.butler.Registry`
267 The registry with all previously registered dimensions.
269 Returns
270 -------
271 filledInfo : `dict` [`str`]
272 A version of ``dimensionInfo`` with extra mappings for any
273 relationships required by ``dimension``. Any relationships already
274 defined in ``dimensionInfo`` are preserved.
276 Raises
277 ------
278 ValueError
279 Raised if ``dimension`` depends on a dimension for which no values
280 exist yet.
281 """
282 filledInfo = dimensionInfo.copy()
283 for other in dimension.required:
284 if other != dimension and other.name not in filledInfo:
285 _matchAnyDataId(filledInfo, existing, other)
286 # Do not recurse, to keep the user from having to provide
287 # irrelevant dimensions.
288 for other in dimension.implied:
289 toUpdate = other != dimension and other.name not in filledInfo
290 updatable = other.viewOf is None
291 # Do not run query if either toUpdate or updatable is false
292 if toUpdate and updatable and list(existing.queryDimensionRecords(other)):
293 _matchAnyDataId(filledInfo, existing, other)
294 return filledInfo
297def expandUniqueId(butler: Butler, partialId: Mapping[str, Any]) -> DataCoordinate:
298 """Return a complete data ID matching some criterion.
300 Parameters
301 ----------
302 butler : `lsst.daf.butler.Butler`
303 The repository to query.
304 partialId : `~collections.abc.Mapping` [`str`]
305 A mapping of known dimensions and values.
307 Returns
308 -------
309 dataId : `lsst.daf.butler.DataCoordinate`
310 The unique data ID that matches ``partialId``.
312 Raises
313 ------
314 ValueError
315 Raised if ``partialId`` does not uniquely identify a data ID.
317 Notes
318 -----
319 This method will only work correctly if all dimensions attached to the
320 target dimension (eg., "physical_filter" for "visit") are known to the
321 repository, even if they're not needed to identify a dataset. This function
322 is only suitable for certain kinds of test repositories, and not for
323 repositories intended for real data processing or analysis.
325 Examples
326 --------
327 .. code-block:: py
329 >>> butler = makeTestRepo(
330 "testdir", {"instrument": ["notACam"], "detector": [1]})
331 >>> expandUniqueId(butler, {"detector": 1})
332 DataCoordinate({instrument, detector}, ('notACam', 1))
333 """
334 # The example is *not* a doctest because it requires dangerous I/O
335 registry = butler.registry
336 dimensions = registry.dimensions.extract(partialId.keys()).required
338 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items())
340 # Much of the purpose of this function is to do something we explicitly
341 # reject most of the time: query for a governor dimension (e.g. instrument)
342 # given something that depends on it (e.g. visit), hence check=False.
343 dataId = list(registry.queryDataIds(dimensions, where=query, check=False))
344 if len(dataId) == 1:
345 return dataId[0]
346 else:
347 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.")
350def addDataIdValue(butler: Butler, dimension: str, value: Any, **related: Any):
351 """Add a new data ID to a repository.
353 Related dimensions (e.g., the instrument associated with a detector) may
354 be specified using ``related``. While these keywords are sometimes needed
355 to get self-consistent repositories, you do not need to define
356 relationships you do not use. Any unspecified dimensions will be
357 linked arbitrarily.
359 Parameters
360 ----------
361 butler : `lsst.daf.butler.Butler`
362 The repository to update.
363 dimension : `str`
364 The name of the dimension to gain a new value.
365 value
366 The value to register for the dimension.
367 **related
368 Any existing dimensions to be linked to ``value``.
370 Notes
371 -----
372 Because this function creates filler data, it is only suitable for test
373 repositories. It should not be used for repositories intended for real data
374 processing or analysis, which have known dimension values.
376 Examples
377 --------
379 See the guide on :ref:`using-butler-in-tests-make-repo` for usage examples.
380 """
381 # Example is not doctest, because it's probably unsafe to create even an
382 # in-memory butler in that environment.
383 try:
384 fullDimension = butler.registry.dimensions[dimension]
385 except KeyError as e:
386 raise ValueError from e
387 # Bad keys ignored by registry code
388 extraKeys = related.keys() - (fullDimension.required | fullDimension.implied)
389 if extraKeys:
390 raise ValueError(f"Unexpected keywords {extraKeys} not found "
391 f"in {fullDimension.required | fullDimension.implied}")
393 # Define secondary keys (e.g., detector name given detector id)
394 expandedValue = _fillAllKeys(fullDimension, value)
395 expandedValue.update(**related)
396 completeValue = _fillRelationships(fullDimension, expandedValue, butler.registry)
398 dimensionRecord = fullDimension.RecordClass(**completeValue)
399 try:
400 butler.registry.syncDimensionData(dimension, dimensionRecord)
401 except sqlalchemy.exc.IntegrityError as e:
402 raise RuntimeError("Could not create data ID value. Automatic relationship generation "
403 "may have failed; try adding keywords to assign a specific instrument, "
404 "physical_filter, etc. based on the nested exception message.") from e
407def addDatasetType(butler: Butler, name: str, dimensions: Set[str], storageClass: str) -> DatasetType:
408 """Add a new dataset type to a repository.
410 Parameters
411 ----------
412 butler : `lsst.daf.butler.Butler`
413 The repository to update.
414 name : `str`
415 The name of the dataset type.
416 dimensions : `set` [`str`]
417 The dimensions of the new dataset type.
418 storageClass : `str`
419 The storage class the dataset will use.
421 Returns
422 -------
423 datasetType : `lsst.daf.butler.DatasetType`
424 The new type.
426 Raises
427 ------
428 ValueError
429 Raised if the dimensions or storage class is invalid.
431 Notes
432 -----
433 Dataset types are shared across all collections in a repository, so this
434 function does not need to be run for each collection.
435 """
436 try:
437 datasetType = DatasetType(name, dimensions, storageClass,
438 universe=butler.registry.dimensions)
439 butler.registry.registerDatasetType(datasetType)
440 return datasetType
441 except KeyError as e:
442 raise ValueError from e
445class DatastoreMock:
446 """Mocks a butler datastore.
448 Has functions that mock the datastore in a butler. Provides an `apply`
449 function to replace the relevent butler datastore functions with the mock
450 functions.
451 """
453 @staticmethod
454 def apply(butler):
455 """Apply datastore mocks to a butler."""
456 butler.datastore.export = DatastoreMock._mock_export
457 butler.datastore.get = DatastoreMock._mock_get
458 butler.datastore.ingest = MagicMock()
460 @staticmethod
461 def _mock_export(refs: Iterable[DatasetRef], *,
462 directory: Optional[str] = None,
463 transfer: Optional[str] = None) -> Iterable[FileDataset]:
464 """A mock of `Datastore.export` that satisfies the requirement that
465 the refs passed in are included in the `FileDataset` objects
466 returned.
468 This can be used to construct a `Datastore` mock that can be used
469 in repository export via::
471 datastore = unittest.mock.Mock(spec=Datastore)
472 datastore.export = DatastoreMock._mock_export
474 """
475 for ref in refs:
476 yield FileDataset(refs=[ref],
477 path="mock/path",
478 formatter="lsst.daf.butler.formatters.json.JsonFormatter")
480 @staticmethod
481 def _mock_get(ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None
482 ) -> Tuple[int, Optional[Mapping[str, Any]]]:
483 """A mock of `Datastore.get` that just returns the integer dataset ID
484 value and parameters it was given.
485 """
486 return (ref.id, parameters)