Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId"] 

24 

25import random 

26from lsst.daf.butler import Butler, Config, DatasetType 

27 

28 

29def makeTestRepo(root, dataIds, *, config=None, **kwargs): 

30 """Create an empty repository with dummy data IDs. 

31 

32 Parameters 

33 ---------- 

34 root : `str` 

35 The location of the root directory for the repository. 

36 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

37 A mapping keyed by the dimensions used in the test. Each value 

38 is an iterable of names for that dimension (e.g., detector IDs for 

39 `"detector"`). Related dimensions (e.g., instruments and detectors) 

40 are linked arbitrarily. 

41 config : `lsst.daf.butler.Config`, optional 

42 A configuration for the repository (for details, see 

43 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

44 with default dataset and storage types, but optimized for speed. 

45 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and 

46 ``.registry.db``. If a supplied config does not specify these values 

47 the internal defaults will be used to ensure that we have a usable 

48 configuration. 

49 **kwargs 

50 Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

51 

52 Returns 

53 ------- 

54 butler : `lsst.daf.butler.Butler` 

55 A Butler referring to the new repository. This Butler is provided only 

56 for additional setup; to keep test cases isolated, it is highly 

57 recommended that each test create its own Butler with a 

58 unique run/collection. See `makeTestCollection`. 

59 

60 Notes 

61 ----- 

62 This function provides a "quick and dirty" repository for simple unit 

63 tests that don't depend on complex data relationships. Because it assigns 

64 dimension relationships and other metadata abitrarily, it is ill-suited 

65 for tests where the structure of the data matters. If you need such a 

66 dataset, create it directly or use a saved test dataset. 

67 

68 Since the values in ``dataIds`` uniquely determine the repository's 

69 data IDs, the fully linked IDs can be recovered by calling 

70 `expandUniqueId`, so long as no other code has inserted dimensions into 

71 the repository registry. 

72 """ 

73 defaults = Config() 

74 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

75 defaults["datastore", "checksum"] = False # In case of future changes 

76 defaults["registry", "db"] = "sqlite:///:memory:" 

77 

78 if config: 

79 defaults.update(config) 

80 

81 # newConfig guards against location-related keywords like outfile 

82 newConfig = Butler.makeRepo(root, config=defaults, **kwargs) 

83 butler = Butler(newConfig, writeable=True) 

84 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

85 for dimension, records in dimensionRecords.items(): 

86 butler.registry.insertDimensionData(dimension, *records) 

87 return butler 

88 

89 

90def makeTestCollection(repo): 

91 """Create a read/write Butler to a fresh collection. 

92 

93 Parameters 

94 ---------- 

95 repo : `lsst.daf.butler.Butler` 

96 A previously existing Butler to a repository, such as that returned by 

97 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

98 

99 Returns 

100 ------- 

101 butler : `lsst.daf.butler.Butler` 

102 A Butler referring to a new collection in the repository at ``root``. 

103 The collection is (almost) guaranteed to be new. 

104 """ 

105 # Create a "random" collection name 

106 # Speed matters more than cryptographic guarantees 

107 collection = "test" + str(random.randrange(1_000_000_000)) 

108 return Butler(butler=repo, run=collection) 

109 

110 

111def _makeRecords(dataIds, universe): 

112 """Create cross-linked dimension records from a collection of 

113 data ID values. 

114 

115 Parameters 

116 ---------- 

117 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

118 A mapping keyed by the dimensions of interest. Each value is an 

119 iterable of names for that dimension (e.g., detector IDs for 

120 `"detector"`). 

121 universe : lsst.daf.butler.DimensionUniverse 

122 Set of all known dimensions and their relationships. 

123 

124 Returns 

125 ------- 

126 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

127 A mapping keyed by the dimensions of interest, giving one 

128 `~lsst.daf.butler.DimensionRecord` for each input name. Related 

129 dimensions (e.g., instruments and detectors) are linked arbitrarily. 

130 """ 

131 expandedIds = {} 

132 # Provide alternate keys like detector names 

133 for name, values in dataIds.items(): 

134 expandedIds[name] = [] 

135 dimension = universe[name] 

136 for value in values: 

137 expandedValue = {} 

138 for key in dimension.uniqueKeys: 

139 if key.nbytes: 

140 castType = bytes 

141 else: 

142 castType = key.dtype().python_type 

143 try: 

144 castValue = castType(value) 

145 except TypeError: 

146 castValue = castType() 

147 expandedValue[key.name] = castValue 

148 for key in dimension.metadata: 

149 if not key.nullable: 

150 expandedValue[key.name] = key.dtype().python_type(value) 

151 expandedIds[name].append(expandedValue) 

152 

153 # Pick cross-relationships arbitrarily 

154 for name, values in expandedIds.items(): 

155 dimension = universe[name] 

156 for value in values: 

157 for other in dimension.required: 

158 if other != dimension: 

159 relation = expandedIds[other.name][0] 

160 value[other.name] = relation[other.primaryKey.name] 

161 # Do not recurse, to keep the user from having to provide 

162 # irrelevant dimensions 

163 for other in dimension.implied: 

164 if other != dimension and other.name in expandedIds and other.viewOf is None: 

165 relation = expandedIds[other.name][0] 

166 value[other.name] = relation[other.primaryKey.name] 

167 

168 return {dimension: [universe[dimension].RecordClass.fromDict(value) for value in values] 

169 for dimension, values in expandedIds.items()} 

170 

171 

172def expandUniqueId(butler, partialId): 

173 """Return a complete data ID matching some criterion. 

174 

175 Parameters 

176 ---------- 

177 butler : `lsst.daf.butler.Butler` 

178 The repository to query. 

179 partialId : `~collections.abc.Mapping` [`str`, any] 

180 A mapping of known dimensions and values. 

181 

182 Returns 

183 ------- 

184 dataId : `lsst.daf.butler.DataCoordinate` 

185 The unique data ID that matches ``partialId``. 

186 

187 Raises 

188 ------ 

189 ValueError 

190 Raised if ``partialId`` does not uniquely identify a data ID. 

191 

192 Notes 

193 ----- 

194 This method will only work correctly if all dimensions attached to the 

195 target dimension (eg., "physical_filter" for "visit") are known to the 

196 repository, even if they're not needed to identify a dataset. 

197 

198 Examples 

199 -------- 

200 .. code-block:: py 

201 

202 >>> butler = makeTestRepo( 

203 "testdir", {"instrument": ["notACam"], "detector": [1]}) 

204 >>> expandUniqueId(butler, {"detector": 1}) 

205 DataCoordinate({instrument, detector}, ('notACam', 1)) 

206 """ 

207 # The example is *not* a doctest because it requires dangerous I/O 

208 registry = butler.registry 

209 dimensions = registry.dimensions.extract(partialId.keys()).required 

210 

211 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

212 

213 dataId = [id for id in registry.queryDimensions(dimensions, where=query, expand=False)] 

214 if len(dataId) == 1: 

215 return dataId[0] 

216 else: 

217 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

218 

219 

220def addDatasetType(butler, name, dimensions, storageClass): 

221 """Add a new dataset type to a repository. 

222 

223 Parameters 

224 ---------- 

225 butler : `lsst.daf.butler.Butler` 

226 The repository to update. 

227 name : `str` 

228 The name of the dataset type. 

229 dimensions : `set` [`str`] 

230 The dimensions of the new dataset type. 

231 storageClass : `str` 

232 The storage class the dataset will use. 

233 

234 Returns 

235 ------- 

236 datasetType : `lsst.daf.butler.DatasetType` 

237 The new type. 

238 

239 Raises 

240 ------ 

241 ValueError 

242 Raised if the dimensions or storage class is invalid. 

243 

244 Notes 

245 ----- 

246 Dataset types are shared across all collections in a repository, so this 

247 function does not need to be run for each collection. 

248 """ 

249 try: 

250 datasetType = DatasetType(name, dimensions, storageClass, 

251 universe=butler.registry.dimensions) 

252 butler.registry.registerDatasetType(datasetType) 

253 return datasetType 

254 except KeyError as e: 

255 raise ValueError from e