Coverage for python/lsst/daf/butler/tests/_testRepo.py : 8%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId"]
25import random
26from lsst.daf.butler import Butler, Config, DatasetType
29def makeTestRepo(root, dataIds, *, config=None, **kwargs):
30 """Create an empty repository with dummy data IDs.
32 Parameters
33 ----------
34 root : `str`
35 The location of the root directory for the repository.
36 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
37 A mapping keyed by the dimensions used in the test. Each value
38 is an iterable of names for that dimension (e.g., detector IDs for
39 `"detector"`). Related dimensions (e.g., instruments and detectors)
40 are linked arbitrarily.
41 config : `lsst.daf.butler.Config`, optional
42 A configuration for the repository (for details, see
43 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository
44 with default dataset and storage types, but optimized for speed.
45 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and
46 ``.registry.db``. If a supplied config does not specify these values
47 the internal defaults will be used to ensure that we have a usable
48 configuration.
49 **kwargs
50 Extra arguments to `lsst.daf.butler.Butler.makeRepo`.
52 Returns
53 -------
54 butler : `lsst.daf.butler.Butler`
55 A Butler referring to the new repository. This Butler is provided only
56 for additional setup; to keep test cases isolated, it is highly
57 recommended that each test create its own Butler with a
58 unique run/collection. See `makeTestCollection`.
60 Notes
61 -----
62 This function provides a "quick and dirty" repository for simple unit
63 tests that don't depend on complex data relationships. Because it assigns
64 dimension relationships and other metadata abitrarily, it is ill-suited
65 for tests where the structure of the data matters. If you need such a
66 dataset, create it directly or use a saved test dataset.
68 Since the values in ``dataIds`` uniquely determine the repository's
69 data IDs, the fully linked IDs can be recovered by calling
70 `expandUniqueId`, so long as no other code has inserted dimensions into
71 the repository registry.
72 """
73 defaults = Config()
74 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore"
75 defaults["datastore", "checksum"] = False # In case of future changes
76 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3"
78 if config:
79 defaults.update(config)
81 # Disable config root by default so that our registry override will
82 # not be ignored.
83 # newConfig guards against location-related keywords like outfile
84 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs)
85 butler = Butler(newConfig, writeable=True)
86 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions)
87 for dimension, records in dimensionRecords.items():
88 butler.registry.insertDimensionData(dimension, *records)
89 return butler
92def makeTestCollection(repo):
93 """Create a read/write Butler to a fresh collection.
95 Parameters
96 ----------
97 repo : `lsst.daf.butler.Butler`
98 A previously existing Butler to a repository, such as that returned by
99 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`.
101 Returns
102 -------
103 butler : `lsst.daf.butler.Butler`
104 A Butler referring to a new collection in the repository at ``root``.
105 The collection is (almost) guaranteed to be new.
106 """
107 # Create a "random" collection name
108 # Speed matters more than cryptographic guarantees
109 collection = "test" + str(random.randrange(1_000_000_000))
110 return Butler(butler=repo, run=collection)
113def _makeRecords(dataIds, universe):
114 """Create cross-linked dimension records from a collection of
115 data ID values.
117 Parameters
118 ----------
119 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
120 A mapping keyed by the dimensions of interest. Each value is an
121 iterable of names for that dimension (e.g., detector IDs for
122 `"detector"`).
123 universe : lsst.daf.butler.DimensionUniverse
124 Set of all known dimensions and their relationships.
126 Returns
127 -------
128 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
129 A mapping keyed by the dimensions of interest, giving one
130 `~lsst.daf.butler.DimensionRecord` for each input name. Related
131 dimensions (e.g., instruments and detectors) are linked arbitrarily.
132 """
133 expandedIds = {}
134 # Provide alternate keys like detector names
135 for name, values in dataIds.items():
136 expandedIds[name] = []
137 dimension = universe[name]
138 for value in values:
139 expandedValue = {}
140 for key in dimension.uniqueKeys:
141 if key.nbytes:
142 castType = bytes
143 else:
144 castType = key.dtype().python_type
145 try:
146 castValue = castType(value)
147 except TypeError:
148 castValue = castType()
149 expandedValue[key.name] = castValue
150 for key in dimension.metadata:
151 if not key.nullable:
152 expandedValue[key.name] = key.dtype().python_type(value)
153 expandedIds[name].append(expandedValue)
155 # Pick cross-relationships arbitrarily
156 for name, values in expandedIds.items():
157 dimension = universe[name]
158 for value in values:
159 for other in dimension.required:
160 if other != dimension:
161 relation = expandedIds[other.name][0]
162 value[other.name] = relation[other.primaryKey.name]
163 # Do not recurse, to keep the user from having to provide
164 # irrelevant dimensions
165 for other in dimension.implied:
166 if other != dimension and other.name in expandedIds and other.viewOf is None:
167 relation = expandedIds[other.name][0]
168 value[other.name] = relation[other.primaryKey.name]
170 return {dimension: [universe[dimension].RecordClass(**value) for value in values]
171 for dimension, values in expandedIds.items()}
174def expandUniqueId(butler, partialId):
175 """Return a complete data ID matching some criterion.
177 Parameters
178 ----------
179 butler : `lsst.daf.butler.Butler`
180 The repository to query.
181 partialId : `~collections.abc.Mapping` [`str`, any]
182 A mapping of known dimensions and values.
184 Returns
185 -------
186 dataId : `lsst.daf.butler.DataCoordinate`
187 The unique data ID that matches ``partialId``.
189 Raises
190 ------
191 ValueError
192 Raised if ``partialId`` does not uniquely identify a data ID.
194 Notes
195 -----
196 This method will only work correctly if all dimensions attached to the
197 target dimension (eg., "physical_filter" for "visit") are known to the
198 repository, even if they're not needed to identify a dataset.
200 Examples
201 --------
202 .. code-block:: py
204 >>> butler = makeTestRepo(
205 "testdir", {"instrument": ["notACam"], "detector": [1]})
206 >>> expandUniqueId(butler, {"detector": 1})
207 DataCoordinate({instrument, detector}, ('notACam', 1))
208 """
209 # The example is *not* a doctest because it requires dangerous I/O
210 registry = butler.registry
211 dimensions = registry.dimensions.extract(partialId.keys()).required
213 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items())
215 dataId = list(registry.queryDataIds(dimensions, where=query))
216 if len(dataId) == 1:
217 return dataId[0]
218 else:
219 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.")
222def addDatasetType(butler, name, dimensions, storageClass):
223 """Add a new dataset type to a repository.
225 Parameters
226 ----------
227 butler : `lsst.daf.butler.Butler`
228 The repository to update.
229 name : `str`
230 The name of the dataset type.
231 dimensions : `set` [`str`]
232 The dimensions of the new dataset type.
233 storageClass : `str`
234 The storage class the dataset will use.
236 Returns
237 -------
238 datasetType : `lsst.daf.butler.DatasetType`
239 The new type.
241 Raises
242 ------
243 ValueError
244 Raised if the dimensions or storage class is invalid.
246 Notes
247 -----
248 Dataset types are shared across all collections in a repository, so this
249 function does not need to be run for each collection.
250 """
251 try:
252 datasetType = DatasetType(name, dimensions, storageClass,
253 universe=butler.registry.dimensions)
254 butler.registry.registerDatasetType(datasetType)
255 return datasetType
256 except KeyError as e:
257 raise ValueError from e