Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId"] 

24 

25import random 

26from lsst.daf.butler import Butler, Config, DatasetType 

27 

28 

29def makeTestRepo(root, dataIds, *, config=None, **kwargs): 

30 """Create an empty repository with dummy data IDs. 

31 

32 Parameters 

33 ---------- 

34 root : `str` 

35 The location of the root directory for the repository. 

36 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

37 A mapping keyed by the dimensions used in the test. Each value 

38 is an iterable of names for that dimension (e.g., detector IDs for 

39 `"detector"`). Related dimensions (e.g., instruments and detectors) 

40 are linked arbitrarily. 

41 config : `lsst.daf.butler.Config`, optional 

42 A configuration for the repository (for details, see 

43 `lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

44 with default dataset and storage types, but optimized for speed. 

45 The defaults set ``.datastore.cls``, ``.datastore.checksum`` and 

46 ``.registry.db``. If a supplied config does not specify these values 

47 the internal defaults will be used to ensure that we have a usable 

48 configuration. 

49 **kwargs 

50 Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

51 

52 Returns 

53 ------- 

54 butler : `lsst.daf.butler.Butler` 

55 A Butler referring to the new repository. This Butler is provided only 

56 for additional setup; to keep test cases isolated, it is highly 

57 recommended that each test create its own Butler with a 

58 unique run/collection. See `makeTestCollection`. 

59 

60 Notes 

61 ----- 

62 This function provides a "quick and dirty" repository for simple unit 

63 tests that don't depend on complex data relationships. Because it assigns 

64 dimension relationships and other metadata abitrarily, it is ill-suited 

65 for tests where the structure of the data matters. If you need such a 

66 dataset, create it directly or use a saved test dataset. 

67 

68 Since the values in ``dataIds`` uniquely determine the repository's 

69 data IDs, the fully linked IDs can be recovered by calling 

70 `expandUniqueId`, so long as no other code has inserted dimensions into 

71 the repository registry. 

72 """ 

73 defaults = Config() 

74 defaults["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

75 defaults["datastore", "checksum"] = False # In case of future changes 

76 defaults["registry", "db"] = "sqlite:///<butlerRoot>/gen3.sqlite3" 

77 

78 if config: 

79 defaults.update(config) 

80 

81 # Disable config root by default so that our registry override will 

82 # not be ignored. 

83 # newConfig guards against location-related keywords like outfile 

84 newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs) 

85 butler = Butler(newConfig, writeable=True) 

86 dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

87 for dimension, records in dimensionRecords.items(): 

88 butler.registry.insertDimensionData(dimension, *records) 

89 return butler 

90 

91 

92def makeTestCollection(repo): 

93 """Create a read/write Butler to a fresh collection. 

94 

95 Parameters 

96 ---------- 

97 repo : `lsst.daf.butler.Butler` 

98 A previously existing Butler to a repository, such as that returned by 

99 `~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

100 

101 Returns 

102 ------- 

103 butler : `lsst.daf.butler.Butler` 

104 A Butler referring to a new collection in the repository at ``root``. 

105 The collection is (almost) guaranteed to be new. 

106 """ 

107 # Create a "random" collection name 

108 # Speed matters more than cryptographic guarantees 

109 collection = "test" + str(random.randrange(1_000_000_000)) 

110 return Butler(butler=repo, run=collection) 

111 

112 

113def _makeRecords(dataIds, universe): 

114 """Create cross-linked dimension records from a collection of 

115 data ID values. 

116 

117 Parameters 

118 ---------- 

119 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

120 A mapping keyed by the dimensions of interest. Each value is an 

121 iterable of names for that dimension (e.g., detector IDs for 

122 `"detector"`). 

123 universe : lsst.daf.butler.DimensionUniverse 

124 Set of all known dimensions and their relationships. 

125 

126 Returns 

127 ------- 

128 dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

129 A mapping keyed by the dimensions of interest, giving one 

130 `~lsst.daf.butler.DimensionRecord` for each input name. Related 

131 dimensions (e.g., instruments and detectors) are linked arbitrarily. 

132 """ 

133 expandedIds = {} 

134 # Provide alternate keys like detector names 

135 for name, values in dataIds.items(): 

136 expandedIds[name] = [] 

137 dimension = universe[name] 

138 for value in values: 

139 expandedValue = {} 

140 for key in dimension.uniqueKeys: 

141 if key.nbytes: 

142 castType = bytes 

143 else: 

144 castType = key.dtype().python_type 

145 try: 

146 castValue = castType(value) 

147 except TypeError: 

148 castValue = castType() 

149 expandedValue[key.name] = castValue 

150 for key in dimension.metadata: 

151 if not key.nullable: 

152 expandedValue[key.name] = key.dtype().python_type(value) 

153 expandedIds[name].append(expandedValue) 

154 

155 # Pick cross-relationships arbitrarily 

156 for name, values in expandedIds.items(): 

157 dimension = universe[name] 

158 for value in values: 

159 for other in dimension.required: 

160 if other != dimension: 

161 relation = expandedIds[other.name][0] 

162 value[other.name] = relation[other.primaryKey.name] 

163 # Do not recurse, to keep the user from having to provide 

164 # irrelevant dimensions 

165 for other in dimension.implied: 

166 if other != dimension and other.name in expandedIds and other.viewOf is None: 

167 relation = expandedIds[other.name][0] 

168 value[other.name] = relation[other.primaryKey.name] 

169 

170 return {dimension: [universe[dimension].RecordClass(**value) for value in values] 

171 for dimension, values in expandedIds.items()} 

172 

173 

174def expandUniqueId(butler, partialId): 

175 """Return a complete data ID matching some criterion. 

176 

177 Parameters 

178 ---------- 

179 butler : `lsst.daf.butler.Butler` 

180 The repository to query. 

181 partialId : `~collections.abc.Mapping` [`str`, any] 

182 A mapping of known dimensions and values. 

183 

184 Returns 

185 ------- 

186 dataId : `lsst.daf.butler.DataCoordinate` 

187 The unique data ID that matches ``partialId``. 

188 

189 Raises 

190 ------ 

191 ValueError 

192 Raised if ``partialId`` does not uniquely identify a data ID. 

193 

194 Notes 

195 ----- 

196 This method will only work correctly if all dimensions attached to the 

197 target dimension (eg., "physical_filter" for "visit") are known to the 

198 repository, even if they're not needed to identify a dataset. 

199 

200 Examples 

201 -------- 

202 .. code-block:: py 

203 

204 >>> butler = makeTestRepo( 

205 "testdir", {"instrument": ["notACam"], "detector": [1]}) 

206 >>> expandUniqueId(butler, {"detector": 1}) 

207 DataCoordinate({instrument, detector}, ('notACam', 1)) 

208 """ 

209 # The example is *not* a doctest because it requires dangerous I/O 

210 registry = butler.registry 

211 dimensions = registry.dimensions.extract(partialId.keys()).required 

212 

213 query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

214 

215 dataId = list(registry.queryDataIds(dimensions, where=query)) 

216 if len(dataId) == 1: 

217 return dataId[0] 

218 else: 

219 raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

220 

221 

222def addDatasetType(butler, name, dimensions, storageClass): 

223 """Add a new dataset type to a repository. 

224 

225 Parameters 

226 ---------- 

227 butler : `lsst.daf.butler.Butler` 

228 The repository to update. 

229 name : `str` 

230 The name of the dataset type. 

231 dimensions : `set` [`str`] 

232 The dimensions of the new dataset type. 

233 storageClass : `str` 

234 The storage class the dataset will use. 

235 

236 Returns 

237 ------- 

238 datasetType : `lsst.daf.butler.DatasetType` 

239 The new type. 

240 

241 Raises 

242 ------ 

243 ValueError 

244 Raised if the dimensions or storage class is invalid. 

245 

246 Notes 

247 ----- 

248 Dataset types are shared across all collections in a repository, so this 

249 function does not need to be run for each collection. 

250 """ 

251 try: 

252 datasetType = DatasetType(name, dimensions, storageClass, 

253 universe=butler.registry.dimensions) 

254 butler.registry.registerDatasetType(datasetType) 

255 return datasetType 

256 except KeyError as e: 

257 raise ValueError from e