Coverage for python/lsst/daf/butler/tests/_testRepo.py : 8%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId"]
25import random
26from lsst.daf.butler import Butler, Config, DatasetType
29def makeTestRepo(root, dataIds, *, config=None, **kwargs):
30 """Create an empty repository with dummy data IDs.
32 Parameters
33 ----------
34 root : `str`
35 The location of the root directory for the repository.
36 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
37 A mapping keyed by the dimensions used in the test. Each value
38 is an iterable of names for that dimension (e.g., detector IDs for
39 `"detector"`). Related dimensions (e.g., instruments and detectors)
40 are linked arbitrarily.
41 config : `lsst.daf.butler.Config`, optional
42 A configuration for the repository (for details, see
43 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository
44 with default dataset and storage types, but optimized for speed.
45 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and
46 ``.registry.db``. If a supplied config does not specify these values
47 the internal defaults will be used to ensure that we have a usable
48 configuration.
49 **kwargs
50 Extra arguments to `lsst.daf.butler.Butler.makeRepo`.
52 Returns
53 -------
54 butler : `lsst.daf.butler.Butler`
55 A Butler referring to the new repository. This Butler is provided only
56 for additional setup; to keep test cases isolated, it is highly
57 recommended that each test create its own Butler with a
58 unique run/collection. See `makeTestCollection`.
60 Notes
61 -----
62 This function provides a "quick and dirty" repository for simple unit
63 tests that don't depend on complex data relationships. Because it assigns
64 dimension relationships and other metadata abitrarily, it is ill-suited
65 for tests where the structure of the data matters. If you need such a
66 dataset, create it directly or use a saved test dataset.
68 Since the values in ``dataIds`` uniquely determine the repository's
69 data IDs, the fully linked IDs can be recovered by calling
70 `expandUniqueId`, so long as no other code has inserted dimensions into
71 the repository registry.
72 """
73 defaults = Config()
74 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore"
75 defaults["datastore", "checksum"] = False # In case of future changes
76 defaults["registry", "db"] = "sqlite:///:memory:"
78 if config:
79 defaults.update(config)
81 # newConfig guards against location-related keywords like outfile
82 newConfig = Butler.makeRepo(root, config=defaults, **kwargs)
83 butler = Butler(newConfig, writeable=True)
84 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions)
85 for dimension, records in dimensionRecords.items():
86 butler.registry.insertDimensionData(dimension, *records)
87 return butler
90def makeTestCollection(repo):
91 """Create a read/write Butler to a fresh collection.
93 Parameters
94 ----------
95 repo : `lsst.daf.butler.Butler`
96 A previously existing Butler to a repository, such as that returned by
97 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`.
99 Returns
100 -------
101 butler : `lsst.daf.butler.Butler`
102 A Butler referring to a new collection in the repository at ``root``.
103 The collection is (almost) guaranteed to be new.
104 """
105 # Create a "random" collection name
106 # Speed matters more than cryptographic guarantees
107 collection = "test" + str(random.randrange(1_000_000_000))
108 return Butler(butler=repo, run=collection)
111def _makeRecords(dataIds, universe):
112 """Create cross-linked dimension records from a collection of
113 data ID values.
115 Parameters
116 ----------
117 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
118 A mapping keyed by the dimensions of interest. Each value is an
119 iterable of names for that dimension (e.g., detector IDs for
120 `"detector"`).
121 universe : lsst.daf.butler.DimensionUniverse
122 Set of all known dimensions and their relationships.
124 Returns
125 -------
126 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
127 A mapping keyed by the dimensions of interest, giving one
128 `~lsst.daf.butler.DimensionRecord` for each input name. Related
129 dimensions (e.g., instruments and detectors) are linked arbitrarily.
130 """
131 expandedIds = {}
132 # Provide alternate keys like detector names
133 for name, values in dataIds.items():
134 expandedIds[name] = []
135 dimension = universe[name]
136 for value in values:
137 expandedValue = {}
138 for key in dimension.uniqueKeys:
139 if key.nbytes:
140 castType = bytes
141 else:
142 castType = key.dtype().python_type
143 try:
144 castValue = castType(value)
145 except TypeError:
146 castValue = castType()
147 expandedValue[key.name] = castValue
148 for key in dimension.metadata:
149 if not key.nullable:
150 expandedValue[key.name] = key.dtype().python_type(value)
151 expandedIds[name].append(expandedValue)
153 # Pick cross-relationships arbitrarily
154 for name, values in expandedIds.items():
155 dimension = universe[name]
156 for value in values:
157 for other in dimension.required:
158 if other != dimension:
159 relation = expandedIds[other.name][0]
160 value[other.name] = relation[other.primaryKey.name]
161 # Do not recurse, to keep the user from having to provide
162 # irrelevant dimensions
163 for other in dimension.implied:
164 if other != dimension and other.name in expandedIds and other.viewOf is None:
165 relation = expandedIds[other.name][0]
166 value[other.name] = relation[other.primaryKey.name]
168 return {dimension: [universe[dimension].RecordClass.fromDict(value) for value in values]
169 for dimension, values in expandedIds.items()}
172def expandUniqueId(butler, partialId):
173 """Return a complete data ID matching some criterion.
175 Parameters
176 ----------
177 butler : `lsst.daf.butler.Butler`
178 The repository to query.
179 partialId : `~collections.abc.Mapping` [`str`, any]
180 A mapping of known dimensions and values.
182 Returns
183 -------
184 dataId : `lsst.daf.butler.DataCoordinate`
185 The unique data ID that matches ``partialId``.
187 Raises
188 ------
189 ValueError
190 Raised if ``partialId`` does not uniquely identify a data ID.
192 Notes
193 -----
194 This method will only work correctly if all dimensions attached to the
195 target dimension (eg., "physical_filter" for "visit") are known to the
196 repository, even if they're not needed to identify a dataset.
198 Examples
199 --------
200 .. code-block:: py
202 >>> butler = makeTestRepo(
203 "testdir", {"instrument": ["notACam"], "detector": [1]})
204 >>> expandUniqueId(butler, {"detector": 1})
205 DataCoordinate({instrument, detector}, ('notACam', 1))
206 """
207 # The example is *not* a doctest because it requires dangerous I/O
208 registry = butler.registry
209 dimensions = registry.dimensions.extract(partialId.keys()).required
211 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items())
213 dataId = [id for id in registry.queryDimensions(dimensions, where=query, expand=False)]
214 if len(dataId) == 1:
215 return dataId[0]
216 else:
217 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.")
220def addDatasetType(butler, name, dimensions, storageClass):
221 """Add a new dataset type to a repository.
223 Parameters
224 ----------
225 butler : `lsst.daf.butler.Butler`
226 The repository to update.
227 name : `str`
228 The name of the dataset type.
229 dimensions : `set` [`str`]
230 The dimensions of the new dataset type.
231 storageClass : `str`
232 The storage class the dataset will use.
234 Returns
235 -------
236 datasetType : `lsst.daf.butler.DatasetType`
237 The new type.
239 Raises
240 ------
241 ValueError
242 Raised if the dimensions or storage class is invalid.
244 Notes
245 -----
246 Dataset types are shared across all collections in a repository, so this
247 function does not need to be run for each collection.
248 """
249 try:
250 datasetType = DatasetType(name, dimensions, storageClass,
251 universe=butler.registry.dimensions)
252 butler.registry.registerDatasetType(datasetType)
253 return datasetType
254 except KeyError as e:
255 raise ValueError from e