Coverage for python/lsst/daf/butler/tests/_testRepo.py : 8%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId"]
26import numpy as np
28from lsst.daf.butler import Butler, Config, DatasetType
31def makeTestRepo(root, dataIds, *, config=None, **kwargs):
32 """Create an empty repository with dummy data IDs.
34 Parameters
35 ----------
36 root : `str`
37 The location of the root directory for the repository.
38 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
39 A mapping keyed by the dimensions used in the test. Each value
40 is an iterable of names for that dimension (e.g., detector IDs for
41 `"detector"`). Related dimensions (e.g., instruments and detectors)
42 are linked arbitrarily.
43 config : `lsst.daf.butler.Config`, optional
44 A configuration for the repository (for details, see
45 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository
46 with default dataset and storage types, but optimized for speed.
47 **kwargs
48 Extra arguments to `lsst.daf.butler.Butler.makeRepo`.
50 Returns
51 -------
52 butler : `lsst.daf.butler.Butler`
53 A Butler referring to the new repository. This Butler is provided only
54 for additional setup; to keep test cases isolated, it is highly
55 recommended that each test create its own Butler with a
56 unique run/collection. See `makeTestCollection`.
58 Notes
59 -----
60 This function provides a "quick and dirty" repository for simple unit
61 tests that don't depend on complex data relationships. Because it assigns
62 dimension relationships and other metadata abitrarily, it is ill-suited
63 for tests where the structure of the data matters. If you need such a
64 dataset, create it directly or use a saved test dataset.
66 Since the values in ``dataIds`` uniquely determine the repository's
67 data IDs, the fully linked IDs can be recovered by calling
68 `expandUniqueId`, so long as no other code has inserted dimensions into
69 the repository registry.
70 """
71 if not config:
72 config = Config()
73 config["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore"
74 config["datastore", "checksum"] = False # In case of future changes
75 config["registry", "db"] = "sqlite:///:memory:"
76 # newConfig guards against location-related keywords like outfile
77 newConfig = Butler.makeRepo(root, config=config, **kwargs)
78 butler = Butler(newConfig, writeable=True)
79 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions)
80 for dimension, records in dimensionRecords.items():
81 butler.registry.insertDimensionData(dimension, *records)
82 return butler
85def makeTestCollection(repo):
86 """Create a read/write Butler to a fresh collection.
88 Parameters
89 ----------
90 repo : `lsst.daf.butler.Butler`
91 A previously existing Butler to a repository, such as that returned by
92 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`.
94 Returns
95 -------
96 butler : `lsst.daf.butler.Butler`
97 A Butler referring to a new collection in the repository at ``root``.
98 The collection is (almost) guaranteed to be new.
99 """
100 # Create a "random" collection name
101 # Speed matters more than cryptographic guarantees
102 collection = "test" + "".join((str(i) for i in np.random.randint(0, 10, size=8)))
103 return Butler(butler=repo, run=collection)
106def _makeRecords(dataIds, universe):
107 """Create cross-linked dimension records from a collection of
108 data ID values.
110 Parameters
111 ----------
112 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
113 A mapping keyed by the dimensions of interest. Each value is an
114 iterable of names for that dimension (e.g., detector IDs for
115 `"detector"`).
116 universe : lsst.daf.butler.DimensionUniverse
117 Set of all known dimensions and their relationships.
119 Returns
120 -------
121 dataIds : `~collections.abc.Mapping` [`str`, `iterable`]
122 A mapping keyed by the dimensions of interest, giving one
123 `~lsst.daf.butler.DimensionRecord` for each input name. Related
124 dimensions (e.g., instruments and detectors) are linked arbitrarily.
125 """
126 expandedIds = {}
127 # Provide alternate keys like detector names
128 for name, values in dataIds.items():
129 expandedIds[name] = []
130 dimension = universe[name]
131 for value in values:
132 expandedValue = {}
133 for key in dimension.uniqueKeys:
134 if key.nbytes:
135 castType = bytes
136 else:
137 castType = key.dtype().python_type
138 try:
139 castValue = castType(value)
140 except TypeError:
141 castValue = castType()
142 expandedValue[key.name] = castValue
143 for key in dimension.metadata:
144 if not key.nullable:
145 expandedValue[key.name] = key.dtype().python_type(value)
146 expandedIds[name].append(expandedValue)
148 # Pick cross-relationships arbitrarily
149 for name, values in expandedIds.items():
150 dimension = universe[name]
151 for value in values:
152 for other in dimension.required:
153 if other != dimension:
154 relation = expandedIds[other.name][0]
155 value[other.name] = relation[other.primaryKey.name]
156 # Do not recurse, to keep the user from having to provide
157 # irrelevant dimensions
158 for other in dimension.implied:
159 if other != dimension and other.name in expandedIds and other.viewOf is None:
160 relation = expandedIds[other.name][0]
161 value[other.name] = relation[other.primaryKey.name]
163 return {dimension: [universe[dimension].RecordClass.fromDict(value) for value in values]
164 for dimension, values in expandedIds.items()}
167def expandUniqueId(butler, partialId):
168 """Return a complete data ID matching some criterion.
170 Parameters
171 ----------
172 butler : `lsst.daf.butler.Butler`
173 The repository to query.
174 partialId : `~collections.abc.Mapping` [`str`, any]
175 A mapping of known dimensions and values.
177 Returns
178 -------
179 dataId : `lsst.daf.butler.DataCoordinate`
180 The unique data ID that matches ``partialId``.
182 Raises
183 ------
184 ValueError
185 Raised if ``partialId`` does not uniquely identify a data ID.
187 Notes
188 -----
189 This method will only work correctly if all dimensions attached to the
190 target dimension (eg., "physical_filter" for "visit") are known to the
191 repository, even if they're not needed to identify a dataset.
193 Examples
194 --------
195 .. code-block:: py
197 >>> butler = makeTestRepo(
198 "testdir", {"instrument": ["notACam"], "detector": [1]})
199 >>> expandUniqueId(butler, {"detector": 1})
200 DataCoordinate({instrument, detector}, ('notACam', 1))
201 """
202 # The example is *not* a doctest because it requires dangerous I/O
203 registry = butler.registry
204 dimensions = registry.dimensions.extract(partialId.keys()).required
206 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items())
208 dataId = [id for id in registry.queryDimensions(dimensions, where=query, expand=False)]
209 if len(dataId) == 1:
210 return dataId[0]
211 else:
212 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.")
215def addDatasetType(butler, name, dimensions, storageClass):
216 """Add a new dataset type to a repository.
218 Parameters
219 ----------
220 butler : `lsst.daf.butler.Butler`
221 The repository to update.
222 name : `str`
223 The name of the dataset type.
224 dimensions : `set` [`str`]
225 The dimensions of the new dataset type.
226 storageClass : `str`
227 The storage class the dataset will use.
229 Returns
230 -------
231 datasetType : `lsst.daf.butler.DatasetType`
232 The new type.
234 Raises
235 ------
236 ValueError
237 Raised if the dimensions or storage class is invalid.
239 Notes
240 -----
241 Dataset types are shared across all collections in a repository, so this
242 function does not need to be run for each collection.
243 """
244 try:
245 datasetType = DatasetType(name, dimensions, storageClass,
246 universe=butler.registry.dimensions)
247 butler.registry.registerDatasetType(datasetType)
248 return datasetType
249 except KeyError as e:
250 raise ValueError from e