Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26import os 

27 

28import astropy.time 

29import sqlalchemy 

30 

31from ...core import ( 

32 DataCoordinate, 

33 DatasetType, 

34 DimensionGraph, 

35 StorageClass, 

36 ddl, 

37 YamlRepoImportBackend 

38) 

39from .._registry import Registry, CollectionType, ConflictingDefinitionError, OrphanedRecordError 

40from ..wildcards import DatasetTypeRestriction 

41 

42 

43class RegistryTests(ABC): 

44 """Generic tests for the `Registry` class that can be subclassed to 

45 generate tests for different configurations. 

46 """ 

47 

48 @classmethod 

49 @abstractmethod 

50 def getDataDir(cls) -> str: 

51 """Return the root directory containing test data YAML files. 

52 """ 

53 raise NotImplementedError() 

54 

55 @abstractmethod 

56 def makeRegistry(self) -> Registry: 

57 """Return the Registry instance to be tested. 

58 """ 

59 raise NotImplementedError() 

60 

61 def loadData(self, registry: Registry, filename: str): 

62 """Load registry test data from ``getDataDir/<filename>``, 

63 which should be a YAML import/export file. 

64 """ 

65 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

66 backend = YamlRepoImportBackend(stream, registry) 

67 backend.register() 

68 backend.load(datastore=None) 

69 

70 def assertRowCount(self, registry: Registry, table: str, count: int): 

71 """Check the number of rows in table. 

72 """ 

73 # TODO: all tests that rely on this method should be rewritten, as it 

74 # needs to depend on Registry implementation details to have any chance 

75 # of working. 

76 sql = sqlalchemy.sql.select( 

77 [sqlalchemy.sql.func.count()] 

78 ).select_from( 

79 getattr(registry._tables, table) 

80 ) 

81 self.assertEqual(registry._db.query(sql).scalar(), count) 

82 

83 def testOpaque(self): 

84 """Tests for `Registry.registerOpaqueTable`, 

85 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

86 `Registry.deleteOpaqueData`. 

87 """ 

88 registry = self.makeRegistry() 

89 table = "opaque_table_for_testing" 

90 registry.registerOpaqueTable( 

91 table, 

92 spec=ddl.TableSpec( 

93 fields=[ 

94 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

95 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

96 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

97 ], 

98 ) 

99 ) 

100 rows = [ 

101 {"id": 1, "name": "one", "count": None}, 

102 {"id": 2, "name": "two", "count": 5}, 

103 {"id": 3, "name": "three", "count": 6}, 

104 ] 

105 registry.insertOpaqueData(table, *rows) 

106 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

107 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

108 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

109 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

110 registry.deleteOpaqueData(table, id=3) 

111 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

112 registry.deleteOpaqueData(table) 

113 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

114 

115 def testDatasetType(self): 

116 """Tests for `Registry.registerDatasetType` and 

117 `Registry.getDatasetType`. 

118 """ 

119 registry = self.makeRegistry() 

120 # Check valid insert 

121 datasetTypeName = "test" 

122 storageClass = StorageClass("testDatasetType") 

123 registry.storageClasses.registerStorageClass(storageClass) 

124 dimensions = registry.dimensions.extract(("instrument", "visit")) 

125 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

126 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

127 # Inserting for the first time should return True 

128 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

129 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

130 self.assertEqual(outDatasetType1, inDatasetType) 

131 

132 # Re-inserting should work 

133 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

134 # Except when they are not identical 

135 with self.assertRaises(ConflictingDefinitionError): 

136 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

137 registry.registerDatasetType(nonIdenticalDatasetType) 

138 

139 # Template can be None 

140 datasetTypeName = "testNoneTemplate" 

141 storageClass = StorageClass("testDatasetType2") 

142 registry.storageClasses.registerStorageClass(storageClass) 

143 dimensions = registry.dimensions.extract(("instrument", "visit")) 

144 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

145 registry.registerDatasetType(inDatasetType) 

146 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

147 self.assertEqual(outDatasetType2, inDatasetType) 

148 

149 allTypes = set(registry.queryDatasetTypes()) 

150 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

151 

152 def testDimensions(self): 

153 """Tests for `Registry.insertDimensionData` and 

154 `Registry.expandDataId`. 

155 """ 

156 registry = self.makeRegistry() 

157 dimensionName = "instrument" 

158 dimension = registry.dimensions[dimensionName] 

159 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

160 "class_name": "lsst.obs.base.Instrument"} 

161 registry.insertDimensionData(dimensionName, dimensionValue) 

162 # Inserting the same value twice should fail 

163 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

164 registry.insertDimensionData(dimensionName, dimensionValue) 

165 # expandDataId should retrieve the record we just inserted 

166 self.assertEqual( 

167 registry.expandDataId( 

168 instrument="DummyCam", 

169 graph=dimension.graph 

170 ).records[dimensionName].toDict(), 

171 dimensionValue 

172 ) 

173 # expandDataId should raise if there is no record with the given ID. 

174 with self.assertRaises(LookupError): 

175 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

176 # abstract_filter doesn't have a table; insert should fail. 

177 with self.assertRaises(TypeError): 

178 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"}) 

179 dimensionName2 = "physical_filter" 

180 dimension2 = registry.dimensions[dimensionName2] 

181 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"} 

182 # Missing required dependency ("instrument") should fail 

183 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

184 registry.insertDimensionData(dimensionName2, dimensionValue2) 

185 # Adding required dependency should fix the failure 

186 dimensionValue2["instrument"] = "DummyCam" 

187 registry.insertDimensionData(dimensionName2, dimensionValue2) 

188 # expandDataId should retrieve the record we just inserted. 

189 self.assertEqual( 

190 registry.expandDataId( 

191 instrument="DummyCam", physical_filter="DummyCam_i", 

192 graph=dimension2.graph 

193 ).records[dimensionName2].toDict(), 

194 dimensionValue2 

195 ) 

196 

197 def testDataset(self): 

198 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

199 and `Registry.removeDataset`. 

200 """ 

201 registry = self.makeRegistry() 

202 self.loadData(registry, "base.yaml") 

203 run = "test" 

204 registry.registerRun(run) 

205 datasetType = registry.getDatasetType("permabias") 

206 dataId = {"instrument": "Cam1", "detector": 2} 

207 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

208 outRef = registry.getDataset(ref.id) 

209 self.assertIsNotNone(ref.id) 

210 self.assertEqual(ref, outRef) 

211 with self.assertRaises(ConflictingDefinitionError): 

212 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

213 registry.removeDataset(ref) 

214 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

215 

216 def testComponents(self): 

217 """Tests for `Registry.attachComponent` and other dataset operations 

218 on composite datasets. 

219 """ 

220 registry = self.makeRegistry() 

221 self.loadData(registry, "base.yaml") 

222 run = "test" 

223 registry.registerRun(run) 

224 parentDatasetType = registry.getDatasetType("permabias") 

225 childDatasetType1 = registry.getDatasetType("permabias.image") 

226 childDatasetType2 = registry.getDatasetType("permabias.mask") 

227 dataId = {"instrument": "Cam1", "detector": 2} 

228 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run) 

229 children = {"image": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0], 

230 "mask": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]} 

231 for name, child in children.items(): 

232 registry.attachComponent(name, parent, child) 

233 self.assertEqual(parent.components, children) 

234 outParent = registry.getDataset(parent.id) 

235 self.assertEqual(outParent.components, children) 

236 # Remove the parent; this should remove all children. 

237 registry.removeDataset(parent) 

238 self.assertIsNone(registry.findDataset(parentDatasetType, dataId, collections=[run])) 

239 self.assertIsNone(registry.findDataset(childDatasetType1, dataId, collections=[run])) 

240 self.assertIsNone(registry.findDataset(childDatasetType2, dataId, collections=[run])) 

241 

242 def testFindDataset(self): 

243 """Tests for `Registry.findDataset`. 

244 """ 

245 registry = self.makeRegistry() 

246 self.loadData(registry, "base.yaml") 

247 run = "test" 

248 datasetType = registry.getDatasetType("permabias") 

249 dataId = {"instrument": "Cam1", "detector": 4} 

250 registry.registerRun(run) 

251 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

252 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

253 self.assertEqual(outputRef, inputRef) 

254 # Check that retrieval with invalid dataId raises 

255 with self.assertRaises(LookupError): 

256 dataId = {"instrument": "Cam1"} # no detector 

257 registry.findDataset(datasetType, dataId, collections=run) 

258 # Check that different dataIds match to different datasets 

259 dataId1 = {"instrument": "Cam1", "detector": 1} 

260 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

261 dataId2 = {"instrument": "Cam1", "detector": 2} 

262 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

263 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

264 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

265 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

266 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

267 # Check that requesting a non-existing dataId returns None 

268 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

269 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

270 

271 def testCollections(self): 

272 """Tests for registry methods that manage collections. 

273 """ 

274 registry = self.makeRegistry() 

275 self.loadData(registry, "base.yaml") 

276 self.loadData(registry, "datasets.yaml") 

277 run1 = "imported_g" 

278 run2 = "imported_r" 

279 datasetType = "permabias" 

280 # Find some datasets via their run's collection. 

281 dataId1 = {"instrument": "Cam1", "detector": 1} 

282 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

283 self.assertIsNotNone(ref1) 

284 dataId2 = {"instrument": "Cam1", "detector": 2} 

285 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

286 self.assertIsNotNone(ref2) 

287 # Associate those into a new collection,then look for them there. 

288 tag1 = "tag1" 

289 registry.registerCollection(tag1, type=CollectionType.TAGGED) 

290 registry.associate(tag1, [ref1, ref2]) 

291 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

292 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

293 # Disassociate one and verify that we can't it there anymore... 

294 registry.disassociate(tag1, [ref1]) 

295 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

296 # ...but we can still find ref2 in tag1, and ref1 in the run. 

297 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

298 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

299 collections = set(registry.queryCollections()) 

300 self.assertEqual(collections, {run1, run2, tag1}) 

301 # Associate both refs into tag1 again; ref2 is already there, but that 

302 # should be a harmless no-op. 

303 registry.associate(tag1, [ref1, ref2]) 

304 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

305 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

306 # Get a different dataset (from a different run) that has the same 

307 # dataset type and data ID as ref2. 

308 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

309 self.assertNotEqual(ref2, ref2b) 

310 # Attempting to associate that into tag1 should be an error. 

311 with self.assertRaises(ConflictingDefinitionError): 

312 registry.associate(tag1, [ref2b]) 

313 # That error shouldn't have messed up what we had before. 

314 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

315 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

316 # Attempt to associate the conflicting dataset again, this time with 

317 # a dataset that isn't in the collection and won't cause a conflict. 

318 # Should also fail without modifying anything. 

319 dataId3 = {"instrument": "Cam1", "detector": 3} 

320 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

321 with self.assertRaises(ConflictingDefinitionError): 

322 registry.associate(tag1, [ref3, ref2b]) 

323 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

324 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

325 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

326 # Register a chained collection that searches: 

327 # 1. 'tag1' 

328 # 2. 'run1', but only for the permaflat dataset 

329 # 3. 'run2' 

330 chain1 = "chain1" 

331 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

332 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

333 # Chained collection exists, but has no collections in it. 

334 self.assertFalse(registry.getCollectionChain(chain1)) 

335 # If we query for all collections, we should get the chained collection 

336 # only if we don't ask to flatten it (i.e. yield only its children). 

337 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

338 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

339 # Attempt to set its child collections to something circular; that 

340 # should fail. 

341 with self.assertRaises(ValueError): 

342 registry.setCollectionChain(chain1, [tag1, chain1]) 

343 # Add the child collections. 

344 registry.setCollectionChain(chain1, [tag1, (run1, "permaflat"), run2]) 

345 self.assertEqual( 

346 list(registry.getCollectionChain(chain1)), 

347 [(tag1, DatasetTypeRestriction.any), 

348 (run1, DatasetTypeRestriction.fromExpression("permaflat")), 

349 (run2, DatasetTypeRestriction.any)] 

350 ) 

351 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

352 # ref2, because both are in tag1. 

353 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

354 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

355 # Now disassociate ref2 from tag1. The search (for permabias) with 

356 # dataId2 in chain1 should then: 

357 # 1. not find it in tag1 

358 # 2. not look in tag2, because it's restricted to permaflat here 

359 # 3. find a different dataset in run2 

360 registry.disassociate(tag1, [ref2]) 

361 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

362 self.assertNotEqual(ref2b, ref2) 

363 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

364 # Look in the chain for a permaflat that is in run1; should get the 

365 # same ref as if we'd searched run1 directly. 

366 dataId3 = {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"} 

367 self.assertEqual(registry.findDataset("permaflat", dataId3, collections=chain1), 

368 registry.findDataset("permaflat", dataId3, collections=run1),) 

369 # Define a new chain so we can test recursive chains. 

370 chain2 = "chain2" 

371 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

372 registry.setCollectionChain(chain2, [(run2, "permabias"), chain1]) 

373 # Search for permabias with dataId1 should find it via tag1 in chain2, 

374 # recursing, because is not in run1. 

375 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

376 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

377 # Search for permabias with dataId2 should find it in run2 (ref2b). 

378 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

379 # Search for a permaflat that is in run2. That should not be found 

380 # at the front of chain2, because of the restriction to permabias 

381 # on run2 there, but it should be found in at the end of chain1. 

382 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

383 ref4 = registry.findDataset("permaflat", dataId4, collections=run2) 

384 self.assertIsNotNone(ref4) 

385 self.assertEqual(ref4, registry.findDataset("permaflat", dataId4, collections=chain2)) 

386 

387 def testDatasetLocations(self): 

388 """Tests for `Registry.insertDatasetLocations`, 

389 `Registry.getDatasetLocations`, and `Registry.removeDatasetLocations`. 

390 """ 

391 registry = self.makeRegistry() 

392 self.loadData(registry, "base.yaml") 

393 self.loadData(registry, "datasets.yaml") 

394 run = "imported_g" 

395 ref = registry.findDataset("permabias", dataId={"instrument": "Cam1", "detector": 1}, collections=run) 

396 ref2 = registry.findDataset("permaflat", 

397 dataId={"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-G"}, 

398 collections=run) 

399 datastoreName = "dummystore" 

400 datastoreName2 = "dummystore2" 

401 # Test adding information about a new dataset 

402 registry.insertDatasetLocations(datastoreName, [ref]) 

403 addresses = registry.getDatasetLocations(ref) 

404 self.assertIn(datastoreName, addresses) 

405 self.assertEqual(len(addresses), 1) 

406 registry.insertDatasetLocations(datastoreName2, [ref, ref2]) 

407 addresses = registry.getDatasetLocations(ref) 

408 self.assertEqual(len(addresses), 2) 

409 self.assertIn(datastoreName, addresses) 

410 self.assertIn(datastoreName2, addresses) 

411 registry.removeDatasetLocation(datastoreName, ref) 

412 addresses = registry.getDatasetLocations(ref) 

413 self.assertEqual(len(addresses), 1) 

414 self.assertNotIn(datastoreName, addresses) 

415 self.assertIn(datastoreName2, addresses) 

416 with self.assertRaises(OrphanedRecordError): 

417 registry.removeDataset(ref) 

418 registry.removeDatasetLocation(datastoreName2, ref) 

419 addresses = registry.getDatasetLocations(ref) 

420 self.assertEqual(len(addresses), 0) 

421 self.assertNotIn(datastoreName2, addresses) 

422 registry.removeDataset(ref) # should not raise 

423 addresses = registry.getDatasetLocations(ref2) 

424 self.assertEqual(len(addresses), 1) 

425 self.assertIn(datastoreName2, addresses) 

426 

427 def testBasicTransaction(self): 

428 """Test that all operations within a single transaction block are 

429 rolled back if an exception propagates out of the block. 

430 """ 

431 registry = self.makeRegistry() 

432 storageClass = StorageClass("testDatasetType") 

433 registry.storageClasses.registerStorageClass(storageClass) 

434 dimensions = registry.dimensions.extract(("instrument",)) 

435 dataId = {"instrument": "DummyCam"} 

436 datasetTypeA = DatasetType(name="A", 

437 dimensions=dimensions, 

438 storageClass=storageClass) 

439 datasetTypeB = DatasetType(name="B", 

440 dimensions=dimensions, 

441 storageClass=storageClass) 

442 datasetTypeC = DatasetType(name="C", 

443 dimensions=dimensions, 

444 storageClass=storageClass) 

445 run = "test" 

446 registry.registerRun(run) 

447 refId = None 

448 with registry.transaction(): 

449 registry.registerDatasetType(datasetTypeA) 

450 with self.assertRaises(ValueError): 

451 with registry.transaction(): 

452 registry.registerDatasetType(datasetTypeB) 

453 registry.registerDatasetType(datasetTypeC) 

454 registry.insertDimensionData("instrument", {"instrument": "DummyCam"}) 

455 ref, = registry.insertDatasets(datasetTypeA, dataIds=[dataId], run=run) 

456 refId = ref.id 

457 raise ValueError("Oops, something went wrong") 

458 # A should exist 

459 self.assertEqual(registry.getDatasetType("A"), datasetTypeA) 

460 # But B and C should both not exist 

461 with self.assertRaises(KeyError): 

462 registry.getDatasetType("B") 

463 with self.assertRaises(KeyError): 

464 registry.getDatasetType("C") 

465 # And neither should the dataset 

466 self.assertIsNotNone(refId) 

467 self.assertIsNone(registry.getDataset(refId)) 

468 # Or the Dimension entries 

469 with self.assertRaises(LookupError): 

470 registry.expandDataId({"instrument": "DummyCam"}) 

471 

472 def testNestedTransaction(self): 

473 """Test that operations within a transaction block are not rolled back 

474 if an exception propagates out of an inner transaction block and is 

475 then caught. 

476 """ 

477 registry = self.makeRegistry() 

478 dimension = registry.dimensions["instrument"] 

479 dataId1 = {"instrument": "DummyCam"} 

480 dataId2 = {"instrument": "DummyCam2"} 

481 checkpointReached = False 

482 with registry.transaction(): 

483 # This should be added and (ultimately) committed. 

484 registry.insertDimensionData(dimension, dataId1) 

485 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

486 with registry.transaction(): 

487 # This does not conflict, and should succeed (but not 

488 # be committed). 

489 registry.insertDimensionData(dimension, dataId2) 

490 checkpointReached = True 

491 # This should conflict and raise, triggerring a rollback 

492 # of the previous insertion within the same transaction 

493 # context, but not the original insertion in the outer 

494 # block. 

495 registry.insertDimensionData(dimension, dataId1) 

496 self.assertTrue(checkpointReached) 

497 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

498 with self.assertRaises(LookupError): 

499 registry.expandDataId(dataId2, graph=dimension.graph) 

500 

501 def testInstrumentDimensions(self): 

502 """Test queries involving only instrument dimensions, with no joins to 

503 skymap.""" 

504 registry = self.makeRegistry() 

505 

506 # need a bunch of dimensions and datasets for test 

507 registry.insertDimensionData( 

508 "instrument", 

509 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

510 ) 

511 registry.insertDimensionData( 

512 "physical_filter", 

513 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

514 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

515 ) 

516 registry.insertDimensionData( 

517 "detector", 

518 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

519 ) 

520 registry.insertDimensionData( 

521 "visit", 

522 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

523 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"), 

524 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"), 

525 ) 

526 registry.insertDimensionData( 

527 "exposure", 

528 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i"), 

529 dict(instrument="DummyCam", id=101, name="101", visit=10, physical_filter="dummy_i"), 

530 dict(instrument="DummyCam", id=110, name="110", visit=11, physical_filter="dummy_r"), 

531 dict(instrument="DummyCam", id=111, name="111", visit=11, physical_filter="dummy_r"), 

532 dict(instrument="DummyCam", id=200, name="200", visit=20, physical_filter="dummy_r"), 

533 dict(instrument="DummyCam", id=201, name="201", visit=20, physical_filter="dummy_r"), 

534 ) 

535 # dataset types 

536 run1 = "test1_r" 

537 run2 = "test2_r" 

538 tagged2 = "test2_t" 

539 registry.registerRun(run1) 

540 registry.registerRun(run2) 

541 registry.registerCollection(tagged2) 

542 storageClass = StorageClass("testDataset") 

543 registry.storageClasses.registerStorageClass(storageClass) 

544 rawType = DatasetType(name="RAW", 

545 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

546 storageClass=storageClass) 

547 registry.registerDatasetType(rawType) 

548 calexpType = DatasetType(name="CALEXP", 

549 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

550 storageClass=storageClass) 

551 registry.registerDatasetType(calexpType) 

552 

553 # add pre-existing datasets 

554 for exposure in (100, 101, 110, 111): 

555 for detector in (1, 2, 3): 

556 # note that only 3 of 5 detectors have datasets 

557 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

558 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

559 # exposures 100 and 101 appear in both run1 and tagged2. 

560 # 100 has different datasets in the different collections 

561 # 101 has the same dataset in both collections. 

562 if exposure == 100: 

563 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

564 if exposure in (100, 101): 

565 registry.associate(tagged2, [ref]) 

566 # Add pre-existing datasets to tagged2. 

567 for exposure in (200, 201): 

568 for detector in (3, 4, 5): 

569 # note that only 3 of 5 detectors have datasets 

570 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

571 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

572 registry.associate(tagged2, [ref]) 

573 

574 dimensions = DimensionGraph( 

575 registry.dimensions, 

576 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

577 ) 

578 # Test that single dim string works as well as list of str 

579 rows = list(registry.queryDimensions("visit", datasets=rawType, collections=run1, expand=True)) 

580 rowsI = list(registry.queryDimensions(["visit"], datasets=rawType, collections=run1, expand=True)) 

581 self.assertEqual(rows, rowsI) 

582 # with empty expression 

583 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, expand=True)) 

584 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

585 for dataId in rows: 

586 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

587 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

588 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

589 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

590 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

591 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

592 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

593 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

594 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

595 (100, 101, 110, 111)) 

596 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

597 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

598 

599 # second collection 

600 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=tagged2)) 

601 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

602 for dataId in rows: 

603 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

604 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

605 (100, 101, 200, 201)) 

606 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

607 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

608 

609 # with two input datasets 

610 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=[run1, tagged2])) 

611 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

612 for dataId in rows: 

613 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

614 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

615 (100, 101, 110, 111, 200, 201)) 

616 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

617 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

618 

619 # limit to single visit 

620 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

621 where="visit = 10")) 

622 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

623 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

624 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

625 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

626 

627 # more limiting expression, using link names instead of Table.column 

628 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

629 where="visit = 10 and detector > 1")) 

630 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

631 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

632 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

633 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

634 

635 # expression excludes everything 

636 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

637 where="visit > 1000")) 

638 self.assertEqual(len(rows), 0) 

639 

640 # Selecting by physical_filter, this is not in the dimensions, but it 

641 # is a part of the full expression so it should work too. 

642 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

643 where="physical_filter = 'dummy_r'")) 

644 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

645 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

646 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

647 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

648 

649 def testSkyMapDimensions(self): 

650 """Tests involving only skymap dimensions, no joins to instrument.""" 

651 registry = self.makeRegistry() 

652 

653 # need a bunch of dimensions and datasets for test, we want 

654 # "abstract_filter" in the test so also have to add physical_filter 

655 # dimensions 

656 registry.insertDimensionData( 

657 "instrument", 

658 dict(instrument="DummyCam") 

659 ) 

660 registry.insertDimensionData( 

661 "physical_filter", 

662 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

663 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

664 ) 

665 registry.insertDimensionData( 

666 "skymap", 

667 dict(name="DummyMap", hash="sha!".encode("utf8")) 

668 ) 

669 for tract in range(10): 

670 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

671 registry.insertDimensionData( 

672 "patch", 

673 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

674 for patch in range(10)] 

675 ) 

676 

677 # dataset types 

678 run = "test" 

679 registry.registerRun(run) 

680 storageClass = StorageClass("testDataset") 

681 registry.storageClasses.registerStorageClass(storageClass) 

682 calexpType = DatasetType(name="deepCoadd_calexp", 

683 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

684 "abstract_filter")), 

685 storageClass=storageClass) 

686 registry.registerDatasetType(calexpType) 

687 mergeType = DatasetType(name="deepCoadd_mergeDet", 

688 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

689 storageClass=storageClass) 

690 registry.registerDatasetType(mergeType) 

691 measType = DatasetType(name="deepCoadd_meas", 

692 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

693 "abstract_filter")), 

694 storageClass=storageClass) 

695 registry.registerDatasetType(measType) 

696 

697 dimensions = DimensionGraph( 

698 registry.dimensions, 

699 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

700 | measType.dimensions.required) 

701 ) 

702 

703 # add pre-existing datasets 

704 for tract in (1, 3, 5): 

705 for patch in (2, 4, 6, 7): 

706 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

707 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

708 for aFilter in ("i", "r"): 

709 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter) 

710 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

711 

712 # with empty expression 

713 rows = list(registry.queryDimensions(dimensions, 

714 datasets=[calexpType, mergeType], collections=run)) 

715 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

716 for dataId in rows: 

717 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter")) 

718 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

719 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

720 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r")) 

721 

722 # limit to 2 tracts and 2 patches 

723 rows = list(registry.queryDimensions(dimensions, 

724 datasets=[calexpType, mergeType], collections=run, 

725 where="tract IN (1, 5) AND patch IN (2, 7)")) 

726 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

727 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

728 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

729 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r")) 

730 

731 # limit to single filter 

732 rows = list(registry.queryDimensions(dimensions, 

733 datasets=[calexpType, mergeType], collections=run, 

734 where="abstract_filter = 'i'")) 

735 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

736 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

737 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

738 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",)) 

739 

740 # expression excludes everything, specifying non-existing skymap is 

741 # not a fatal error, it's operator error 

742 rows = list(registry.queryDimensions(dimensions, 

743 datasets=[calexpType, mergeType], collections=run, 

744 where="skymap = 'Mars'")) 

745 self.assertEqual(len(rows), 0) 

746 

747 def testSpatialMatch(self): 

748 """Test involving spatial match using join tables. 

749 

750 Note that realistic test needs a reasonably-defined skypix and regions 

751 in registry tables which is hard to implement in this simple test. 

752 So we do not actually fill registry with any data and all queries will 

753 return empty result, but this is still useful for coverage of the code 

754 that generates query. 

755 """ 

756 registry = self.makeRegistry() 

757 

758 # dataset types 

759 collection = "test" 

760 registry.registerRun(name=collection) 

761 storageClass = StorageClass("testDataset") 

762 registry.storageClasses.registerStorageClass(storageClass) 

763 

764 calexpType = DatasetType(name="CALEXP", 

765 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

766 storageClass=storageClass) 

767 registry.registerDatasetType(calexpType) 

768 

769 coaddType = DatasetType(name="deepCoadd_calexp", 

770 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

771 "abstract_filter")), 

772 storageClass=storageClass) 

773 registry.registerDatasetType(coaddType) 

774 

775 dimensions = DimensionGraph( 

776 registry.dimensions, 

777 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required) 

778 ) 

779 

780 # without data this should run OK but return empty set 

781 rows = list(registry.queryDimensions(dimensions, datasets=calexpType, collections=collection)) 

782 self.assertEqual(len(rows), 0) 

783 

784 def testCalibrationLabelIndirection(self): 

785 """Test that we can look up datasets with calibration_label dimensions 

786 from a data ID with exposure dimensions. 

787 """ 

788 

789 def _dt(iso_string): 

790 return astropy.time.Time(iso_string, format="iso", scale="utc") 

791 

792 registry = self.makeRegistry() 

793 

794 flat = DatasetType( 

795 "flat", 

796 registry.dimensions.extract( 

797 ["instrument", "detector", "physical_filter", "calibration_label"] 

798 ), 

799 "ImageU" 

800 ) 

801 registry.registerDatasetType(flat) 

802 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

803 registry.insertDimensionData( 

804 "physical_filter", 

805 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

806 ) 

807 registry.insertDimensionData( 

808 "detector", 

809 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)] 

810 ) 

811 registry.insertDimensionData( 

812 "visit", 

813 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

814 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_i"), 

815 ) 

816 registry.insertDimensionData( 

817 "exposure", 

818 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i", 

819 datetime_begin=_dt("2005-12-15 02:00:00"), datetime_end=_dt("2005-12-15 03:00:00")), 

820 dict(instrument="DummyCam", id=101, name="101", visit=11, physical_filter="dummy_i", 

821 datetime_begin=_dt("2005-12-16 02:00:00"), datetime_end=_dt("2005-12-16 03:00:00")), 

822 ) 

823 registry.insertDimensionData( 

824 "calibration_label", 

825 dict(instrument="DummyCam", name="first_night", 

826 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-15 04:00:00")), 

827 dict(instrument="DummyCam", name="second_night", 

828 datetime_begin=_dt("2005-12-16 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")), 

829 dict(instrument="DummyCam", name="both_nights", 

830 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")), 

831 ) 

832 # Different flats for different nights for detectors 1-3 in first 

833 # collection. 

834 run1 = "calibs1" 

835 registry.registerRun(run1) 

836 for detector in (1, 2, 3): 

837 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night", 

838 physical_filter="dummy_i", detector=detector)], 

839 run=run1) 

840 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night", 

841 physical_filter="dummy_i", detector=detector)], 

842 run=run1) 

843 # The same flat for both nights for detectors 3-5 (so detector 3 has 

844 # multiple valid flats) in second collection. 

845 run2 = "calib2" 

846 registry.registerRun(run2) 

847 for detector in (3, 4, 5): 

848 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights", 

849 physical_filter="dummy_i", detector=detector)], 

850 run=run2) 

851 # Perform queries for individual exposure+detector combinations, which 

852 # should always return exactly one flat. 

853 for exposure in (100, 101): 

854 for detector in (1, 2, 3): 

855 with self.subTest(exposure=exposure, detector=detector): 

856 rows = list(registry.queryDatasets("flat", collections=[run1], 

857 instrument="DummyCam", 

858 exposure=exposure, 

859 detector=detector)) 

860 self.assertEqual(len(rows), 1) 

861 for detector in (3, 4, 5): 

862 with self.subTest(exposure=exposure, detector=detector): 

863 rows = registry.queryDatasets("flat", collections=[run2], 

864 instrument="DummyCam", 

865 exposure=exposure, 

866 detector=detector) 

867 self.assertEqual(len(list(rows)), 1) 

868 for detector in (1, 2, 4, 5): 

869 with self.subTest(exposure=exposure, detector=detector): 

870 rows = registry.queryDatasets("flat", collections=[run1, run2], 

871 instrument="DummyCam", 

872 exposure=exposure, 

873 detector=detector) 

874 self.assertEqual(len(list(rows)), 1) 

875 for detector in (3,): 

876 with self.subTest(exposure=exposure, detector=detector): 

877 rows = registry.queryDatasets("flat", collections=[run1, run2], 

878 instrument="DummyCam", 

879 exposure=exposure, 

880 detector=detector) 

881 self.assertEqual(len(list(rows)), 2) 

882 

883 def testAbstractFilterQuery(self): 

884 """Test that we can run a query that just lists the known 

885 abstract_filters. This is tricky because abstract_filter is 

886 backed by a query against physical_filter. 

887 """ 

888 registry = self.makeRegistry() 

889 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

890 registry.insertDimensionData( 

891 "physical_filter", 

892 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

893 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"), 

894 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

895 ) 

896 rows = list(registry.queryDimensions(["abstract_filter"])) 

897 self.assertCountEqual( 

898 rows, 

899 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions), 

900 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)] 

901 )