Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId"] 

24 

25 

26import numpy as np 

27 

28from lsst.daf.butler import Butler, Config, DatasetType 

29 

30 

31def makeTestRepo(root, dataIds, *, config=None, **kwargs): 

32 """Create an empty repository with dummy data IDs. 

33 

34 Parameters 

35 ---------- 

36 root : `str` 

37 The location of the root directory for the repository. 

38 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

39 A mapping keyed by the dimensions used in the test. Each value 

40 is an iterable of names for that dimension (e.g., detector IDs for 

41 `"detector"`). Related dimensions (e.g., instruments and detectors) 

42 are linked arbitrarily. 

43 config : `lsst.daf.butler.Config`, optional 

44 A configuration for the repository (for details, see 

45 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

46 with default dataset and storage types, but optimized for speed. 

47 **kwargs 

48 Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

49 

50 Returns 

51 ------- 

52 butler : `lsst.daf.butler.Butler` 

53 A Butler referring to the new repository. This Butler is provided only 

54 for additional setup; to keep test cases isolated, it is highly 

55 recommended that each test create its own Butler with a 

56 unique run/collection. See `makeTestCollection`. 

57 

58 Notes 

59 ----- 

60 This function provides a "quick and dirty" repository for simple unit 

61 tests that don't depend on complex data relationships. Because it assigns 

62 dimension relationships and other metadata abitrarily, it is ill-suited 

63 for tests where the structure of the data matters. If you need such a 

64 dataset, create it directly or use a saved test dataset. 

65 

66 Since the values in ``dataIds`` uniquely determine the repository's 

67 data IDs, the fully linked IDs can be recovered by calling 

68 `expandUniqueId`, so long as no other code has inserted dimensions into 

69 the repository registry. 

70 """ 

71 if not config: 

72 config = Config() 

73 config["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

74 config["datastore", "checksum"] = False # In case of future changes 

75 config["registry", "db"] = "sqlite:///:memory:" 

76 # newConfig guards against location-related keywords like outfile 

77 newConfig = Butler.makeRepo(root, config=config, **kwargs) 

78 butler = Butler(newConfig, writeable=True) 

79 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

80 for dimension, records in dimensionRecords.items(): 

81 butler.registry.insertDimensionData(dimension, *records) 

82 return butler 

83 

84 

85def makeTestCollection(repo): 

86 """Create a read/write Butler to a fresh collection. 

87 

88 Parameters 

89 ---------- 

90 repo : `lsst.daf.butler.Butler` 

91 A previously existing Butler to a repository, such as that returned by 

92 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

93 

94 Returns 

95 ------- 

96 butler : `lsst.daf.butler.Butler` 

97 A Butler referring to a new collection in the repository at ``root``. 

98 The collection is (almost) guaranteed to be new. 

99 """ 

100 # Create a "random" collection name 

101 # Speed matters more than cryptographic guarantees 

102 collection = "test" + "".join((str(i) for i in np.random.randint(0, 10, size=8))) 

103 return Butler(butler=repo, run=collection) 

104 

105 

106def _makeRecords(dataIds, universe): 

107 """Create cross-linked dimension records from a collection of 

108 data ID values. 

109 

110 Parameters 

111 ---------- 

112 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

113 A mapping keyed by the dimensions of interest. Each value is an 

114 iterable of names for that dimension (e.g., detector IDs for 

115 `"detector"`). 

116 universe : lsst.daf.butler.DimensionUniverse 

117 Set of all known dimensions and their relationships. 

118 

119 Returns 

120 ------- 

121 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

122 A mapping keyed by the dimensions of interest, giving one 

123 `~lsst.daf.butler.DimensionRecord` for each input name. Related 

124 dimensions (e.g., instruments and detectors) are linked arbitrarily. 

125 """ 

126 expandedIds = {} 

127 # Provide alternate keys like detector names 

128 for name, values in dataIds.items(): 

129 expandedIds[name] = [] 

130 dimension = universe[name] 

131 for value in values: 

132 expandedValue = {} 

133 for key in dimension.uniqueKeys: 

134 if key.nbytes: 

135 castType = bytes 

136 else: 

137 castType = key.dtype().python_type 

138 try: 

139 castValue = castType(value) 

140 except TypeError: 

141 castValue = castType() 

142 expandedValue[key.name] = castValue 

143 for key in dimension.metadata: 

144 if not key.nullable: 

145 expandedValue[key.name] = key.dtype().python_type(value) 

146 expandedIds[name].append(expandedValue) 

147 

148 # Pick cross-relationships arbitrarily 

149 for name, values in expandedIds.items(): 

150 dimension = universe[name] 

151 for value in values: 

152 for other in dimension.required: 

153 if other != dimension: 

154 relation = expandedIds[other.name][0] 

155 value[other.name] = relation[other.primaryKey.name] 

156 # Do not recurse, to keep the user from having to provide 

157 # irrelevant dimensions 

158 for other in dimension.implied: 

159 if other != dimension and other.name in expandedIds and other.viewOf is None: 

160 relation = expandedIds[other.name][0] 

161 value[other.name] = relation[other.primaryKey.name] 

162 

163 return {dimension: [universe[dimension].RecordClass.fromDict(value) for value in values] 

164 for dimension, values in expandedIds.items()} 

165 

166 

167def expandUniqueId(butler, partialId): 

168 """Return a complete data ID matching some criterion. 

169 

170 Parameters 

171 ---------- 

172 butler : `lsst.daf.butler.Butler` 

173 The repository to query. 

174 partialId : `~collections.abc.Mapping` [`str`, any] 

175 A mapping of known dimensions and values. 

176 

177 Returns 

178 ------- 

179 dataId : `lsst.daf.butler.DataCoordinate` 

180 The unique data ID that matches ``partialId``. 

181 

182 Raises 

183 ------ 

184 ValueError 

185 Raised if ``partialId`` does not uniquely identify a data ID. 

186 

187 Notes 

188 ----- 

189 This method will only work correctly if all dimensions attached to the 

190 target dimension (eg., "physical_filter" for "visit") are known to the 

191 repository, even if they're not needed to identify a dataset. 

192 

193 Examples 

194 -------- 

195 .. code-block:: py 

196 

197 >>> butler = makeTestRepo( 

198 "testdir", {"instrument": ["notACam"], "detector": [1]}) 

199 >>> expandUniqueId(butler, {"detector": 1}) 

200 DataCoordinate({instrument, detector}, ('notACam', 1)) 

201 """ 

202 # The example is *not* a doctest because it requires dangerous I/O 

203 registry = butler.registry 

204 dimensions = registry.dimensions.extract(partialId.keys()).required 

205 

206 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

207 

208 dataId = [id for id in registry.queryDimensions(dimensions, where=query, expand=False)] 

209 if len(dataId) == 1: 

210 return dataId[0] 

211 else: 

212 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

213 

214 

215def addDatasetType(butler, name, dimensions, storageClass): 

216 """Add a new dataset type to a repository. 

217 

218 Parameters 

219 ---------- 

220 butler : `lsst.daf.butler.Butler` 

221 The repository to update. 

222 name : `str` 

223 The name of the dataset type. 

224 dimensions : `set` [`str`] 

225 The dimensions of the new dataset type. 

226 storageClass : `str` 

227 The storage class the dataset will use. 

228 

229 Returns 

230 ------- 

231 datasetType : `lsst.daf.butler.DatasetType` 

232 The new type. 

233 

234 Raises 

235 ------ 

236 ValueError 

237 Raised if the dimensions or storage class is invalid. 

238 

239 Notes 

240 ----- 

241 Dataset types are shared across all collections in a repository, so this 

242 function does not need to be run for each collection. 

243 """ 

244 try: 

245 datasetType = DatasetType(name, dimensions, storageClass, 

246 universe=butler.registry.dimensions) 

247 butler.registry.registerDatasetType(datasetType) 

248 return datasetType 

249 except KeyError as e: 

250 raise ValueError from e