Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from datetime import datetime 

27import os 

28 

29import sqlalchemy 

30 

31from ...core import ( 

32 DataCoordinate, 

33 DatasetType, 

34 DimensionGraph, 

35 StorageClass, 

36 ddl, 

37 YamlRepoImportBackend 

38) 

39from .._registry import Registry, CollectionType, ConflictingDefinitionError, OrphanedRecordError 

40from ..wildcards import DatasetTypeRestriction 

41 

42 

43class RegistryTests(ABC): 

44 """Generic tests for the `Registry` class that can be subclassed to 

45 generate tests for different configurations. 

46 """ 

47 

48 @classmethod 

49 @abstractmethod 

50 def getDataDir(cls) -> str: 

51 """Return the root directory containing test data YAML files. 

52 """ 

53 raise NotImplementedError() 

54 

55 @abstractmethod 

56 def makeRegistry(self) -> Registry: 

57 """Return the Registry instance to be tested. 

58 """ 

59 raise NotImplementedError() 

60 

61 def loadData(self, registry: Registry, filename: str): 

62 """Load registry test data from ``getDataDir/<filename>``, 

63 which should be a YAML import/export file. 

64 """ 

65 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

66 backend = YamlRepoImportBackend(stream, registry) 

67 backend.register() 

68 backend.load(datastore=None) 

69 

70 def assertRowCount(self, registry: Registry, table: str, count: int): 

71 """Check the number of rows in table. 

72 """ 

73 # TODO: all tests that rely on this method should be rewritten, as it 

74 # needs to depend on Registry implementation details to have any chance 

75 # of working. 

76 sql = sqlalchemy.sql.select( 

77 [sqlalchemy.sql.func.count()] 

78 ).select_from( 

79 getattr(registry._tables, table) 

80 ) 

81 self.assertEqual(registry._db.query(sql).scalar(), count) 

82 

83 def testOpaque(self): 

84 """Tests for `Registry.registerOpaqueTable`, 

85 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

86 `Registry.deleteOpaqueData`. 

87 """ 

88 registry = self.makeRegistry() 

89 table = "opaque_table_for_testing" 

90 registry.registerOpaqueTable( 

91 table, 

92 spec=ddl.TableSpec( 

93 fields=[ 

94 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

95 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

96 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

97 ], 

98 ) 

99 ) 

100 rows = [ 

101 {"id": 1, "name": "one", "count": None}, 

102 {"id": 2, "name": "two", "count": 5}, 

103 {"id": 3, "name": "three", "count": 6}, 

104 ] 

105 registry.insertOpaqueData(table, *rows) 

106 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

107 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

108 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

109 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

110 registry.deleteOpaqueData(table, id=3) 

111 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

112 registry.deleteOpaqueData(table) 

113 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

114 

115 def testDatasetType(self): 

116 """Tests for `Registry.registerDatasetType` and 

117 `Registry.getDatasetType`. 

118 """ 

119 registry = self.makeRegistry() 

120 # Check valid insert 

121 datasetTypeName = "test" 

122 storageClass = StorageClass("testDatasetType") 

123 registry.storageClasses.registerStorageClass(storageClass) 

124 dimensions = registry.dimensions.extract(("instrument", "visit")) 

125 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

126 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

127 # Inserting for the first time should return True 

128 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

129 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

130 self.assertEqual(outDatasetType1, inDatasetType) 

131 

132 # Re-inserting should work 

133 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

134 # Except when they are not identical 

135 with self.assertRaises(ConflictingDefinitionError): 

136 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

137 registry.registerDatasetType(nonIdenticalDatasetType) 

138 

139 # Template can be None 

140 datasetTypeName = "testNoneTemplate" 

141 storageClass = StorageClass("testDatasetType2") 

142 registry.storageClasses.registerStorageClass(storageClass) 

143 dimensions = registry.dimensions.extract(("instrument", "visit")) 

144 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

145 registry.registerDatasetType(inDatasetType) 

146 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

147 self.assertEqual(outDatasetType2, inDatasetType) 

148 

149 allTypes = set(registry.queryDatasetTypes()) 

150 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

151 

152 def testDimensions(self): 

153 """Tests for `Registry.insertDimensionData` and 

154 `Registry.expandDataId`. 

155 """ 

156 registry = self.makeRegistry() 

157 dimensionName = "instrument" 

158 dimension = registry.dimensions[dimensionName] 

159 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2} 

160 registry.insertDimensionData(dimensionName, dimensionValue) 

161 # Inserting the same value twice should fail 

162 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

163 registry.insertDimensionData(dimensionName, dimensionValue) 

164 # expandDataId should retrieve the record we just inserted 

165 self.assertEqual( 

166 registry.expandDataId( 

167 instrument="DummyCam", 

168 graph=dimension.graph 

169 ).records[dimensionName].toDict(), 

170 dimensionValue 

171 ) 

172 # expandDataId should raise if there is no record with the given ID. 

173 with self.assertRaises(LookupError): 

174 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

175 # abstract_filter doesn't have a table; insert should fail. 

176 with self.assertRaises(TypeError): 

177 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"}) 

178 dimensionName2 = "physical_filter" 

179 dimension2 = registry.dimensions[dimensionName2] 

180 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"} 

181 # Missing required dependency ("instrument") should fail 

182 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

183 registry.insertDimensionData(dimensionName2, dimensionValue2) 

184 # Adding required dependency should fix the failure 

185 dimensionValue2["instrument"] = "DummyCam" 

186 registry.insertDimensionData(dimensionName2, dimensionValue2) 

187 # expandDataId should retrieve the record we just inserted. 

188 self.assertEqual( 

189 registry.expandDataId( 

190 instrument="DummyCam", physical_filter="DummyCam_i", 

191 graph=dimension2.graph 

192 ).records[dimensionName2].toDict(), 

193 dimensionValue2 

194 ) 

195 

196 def testDataset(self): 

197 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

198 and `Registry.removeDataset`. 

199 """ 

200 registry = self.makeRegistry() 

201 self.loadData(registry, "base.yaml") 

202 run = "test" 

203 registry.registerRun(run) 

204 datasetType = registry.getDatasetType("permabias") 

205 dataId = {"instrument": "Cam1", "detector": 2} 

206 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

207 outRef = registry.getDataset(ref.id) 

208 self.assertIsNotNone(ref.id) 

209 self.assertEqual(ref, outRef) 

210 with self.assertRaises(ConflictingDefinitionError): 

211 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

212 registry.removeDataset(ref) 

213 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

214 

215 def testComponents(self): 

216 """Tests for `Registry.attachComponent` and other dataset operations 

217 on composite datasets. 

218 """ 

219 registry = self.makeRegistry() 

220 self.loadData(registry, "base.yaml") 

221 run = "test" 

222 registry.registerRun(run) 

223 parentDatasetType = registry.getDatasetType("permabias") 

224 childDatasetType1 = registry.getDatasetType("permabias.image") 

225 childDatasetType2 = registry.getDatasetType("permabias.mask") 

226 dataId = {"instrument": "Cam1", "detector": 2} 

227 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run) 

228 children = {"image": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0], 

229 "mask": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]} 

230 for name, child in children.items(): 

231 registry.attachComponent(name, parent, child) 

232 self.assertEqual(parent.components, children) 

233 outParent = registry.getDataset(parent.id) 

234 self.assertEqual(outParent.components, children) 

235 # Remove the parent; this should remove all children. 

236 registry.removeDataset(parent) 

237 self.assertIsNone(registry.findDataset(parentDatasetType, dataId, collections=[run])) 

238 self.assertIsNone(registry.findDataset(childDatasetType1, dataId, collections=[run])) 

239 self.assertIsNone(registry.findDataset(childDatasetType2, dataId, collections=[run])) 

240 

241 def testFindDataset(self): 

242 """Tests for `Registry.findDataset`. 

243 """ 

244 registry = self.makeRegistry() 

245 self.loadData(registry, "base.yaml") 

246 run = "test" 

247 datasetType = registry.getDatasetType("permabias") 

248 dataId = {"instrument": "Cam1", "detector": 4} 

249 registry.registerRun(run) 

250 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

251 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

252 self.assertEqual(outputRef, inputRef) 

253 # Check that retrieval with invalid dataId raises 

254 with self.assertRaises(LookupError): 

255 dataId = {"instrument": "Cam1"} # no detector 

256 registry.findDataset(datasetType, dataId, collections=run) 

257 # Check that different dataIds match to different datasets 

258 dataId1 = {"instrument": "Cam1", "detector": 1} 

259 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

260 dataId2 = {"instrument": "Cam1", "detector": 2} 

261 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

262 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

263 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

264 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

265 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

266 # Check that requesting a non-existing dataId returns None 

267 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

268 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

269 

270 def testCollections(self): 

271 """Tests for registry methods that manage collections. 

272 """ 

273 registry = self.makeRegistry() 

274 self.loadData(registry, "base.yaml") 

275 self.loadData(registry, "datasets.yaml") 

276 run1 = "imported_g" 

277 run2 = "imported_r" 

278 datasetType = "permabias" 

279 # Find some datasets via their run's collection. 

280 dataId1 = {"instrument": "Cam1", "detector": 1} 

281 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

282 self.assertIsNotNone(ref1) 

283 dataId2 = {"instrument": "Cam1", "detector": 2} 

284 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

285 self.assertIsNotNone(ref2) 

286 # Associate those into a new collection,then look for them there. 

287 tag1 = "tag1" 

288 registry.registerCollection(tag1, type=CollectionType.TAGGED) 

289 registry.associate(tag1, [ref1, ref2]) 

290 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

291 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

292 # Disassociate one and verify that we can't it there anymore... 

293 registry.disassociate(tag1, [ref1]) 

294 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

295 # ...but we can still find ref2 in tag1, and ref1 in the run. 

296 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

297 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

298 collections = set(registry.queryCollections()) 

299 self.assertEqual(collections, {run1, run2, tag1}) 

300 # Associate both refs into tag1 again; ref2 is already there, but that 

301 # should be a harmless no-op. 

302 registry.associate(tag1, [ref1, ref2]) 

303 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

304 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

305 # Get a different dataset (from a different run) that has the same 

306 # dataset type and data ID as ref2. 

307 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

308 self.assertNotEqual(ref2, ref2b) 

309 # Attempting to associate that into tag1 should be an error. 

310 with self.assertRaises(ConflictingDefinitionError): 

311 registry.associate(tag1, [ref2b]) 

312 # That error shouldn't have messed up what we had before. 

313 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

314 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

315 # Attempt to associate the conflicting dataset again, this time with 

316 # a dataset that isn't in the collection and won't cause a conflict. 

317 # Should also fail without modifying anything. 

318 dataId3 = {"instrument": "Cam1", "detector": 3} 

319 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

320 with self.assertRaises(ConflictingDefinitionError): 

321 registry.associate(tag1, [ref3, ref2b]) 

322 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

323 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

324 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

325 # Register a chained collection that searches: 

326 # 1. 'tag1' 

327 # 2. 'run1', but only for the permaflat dataset 

328 # 3. 'run2' 

329 chain1 = "chain1" 

330 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

331 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

332 # Chained collection exists, but has no collections in it. 

333 self.assertFalse(registry.getCollectionChain(chain1)) 

334 # Attempt to set its child collections to something circular; that 

335 # should fail. 

336 with self.assertRaises(ValueError): 

337 registry.setCollectionChain(chain1, [tag1, chain1]) 

338 # Add the child collections. 

339 registry.setCollectionChain(chain1, [tag1, (run1, "permaflat"), run2]) 

340 self.assertEqual( 

341 list(registry.getCollectionChain(chain1)), 

342 [(tag1, DatasetTypeRestriction.any), 

343 (run1, DatasetTypeRestriction.fromExpression("permaflat")), 

344 (run2, DatasetTypeRestriction.any)] 

345 ) 

346 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

347 # ref2, because both are in tag1. 

348 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

349 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

350 # Now disassociate ref2 from tag1. The search (for permabias) with 

351 # dataId2 in chain1 should then: 

352 # 1. not find it in tag1 

353 # 2. not look in tag2, because it's restricted to permaflat here 

354 # 3. find a different dataset in run2 

355 registry.disassociate(tag1, [ref2]) 

356 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

357 self.assertNotEqual(ref2b, ref2) 

358 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

359 # Look in the chain for a permaflat that is in run1; should get the 

360 # same ref as if we'd searched run1 directly. 

361 dataId3 = {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"} 

362 self.assertEqual(registry.findDataset("permaflat", dataId3, collections=chain1), 

363 registry.findDataset("permaflat", dataId3, collections=run1),) 

364 # Define a new chain so we can test recursive chains. 

365 chain2 = "chain2" 

366 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

367 registry.setCollectionChain(chain2, [(run2, "permabias"), chain1]) 

368 # Search for permabias with dataId1 should find it via tag1 in chain2, 

369 # recursing, because is not in run1. 

370 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

371 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

372 # Search for permabias with dataId2 should find it in run2 (ref2b). 

373 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

374 # Search for a permaflat that is in run2. That should not be found 

375 # at the front of chain2, because of the restriction to permabias 

376 # on run2 there, but it should be found in at the end of chain1. 

377 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

378 ref4 = registry.findDataset("permaflat", dataId4, collections=run2) 

379 self.assertIsNotNone(ref4) 

380 self.assertEqual(ref4, registry.findDataset("permaflat", dataId4, collections=chain2)) 

381 

382 def testDatasetLocations(self): 

383 """Tests for `Registry.insertDatasetLocations`, 

384 `Registry.getDatasetLocations`, and `Registry.removeDatasetLocations`. 

385 """ 

386 registry = self.makeRegistry() 

387 self.loadData(registry, "base.yaml") 

388 self.loadData(registry, "datasets.yaml") 

389 run = "imported_g" 

390 ref = registry.findDataset("permabias", dataId={"instrument": "Cam1", "detector": 1}, collections=run) 

391 ref2 = registry.findDataset("permaflat", 

392 dataId={"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-G"}, 

393 collections=run) 

394 datastoreName = "dummystore" 

395 datastoreName2 = "dummystore2" 

396 # Test adding information about a new dataset 

397 registry.insertDatasetLocations(datastoreName, [ref]) 

398 addresses = registry.getDatasetLocations(ref) 

399 self.assertIn(datastoreName, addresses) 

400 self.assertEqual(len(addresses), 1) 

401 registry.insertDatasetLocations(datastoreName2, [ref, ref2]) 

402 addresses = registry.getDatasetLocations(ref) 

403 self.assertEqual(len(addresses), 2) 

404 self.assertIn(datastoreName, addresses) 

405 self.assertIn(datastoreName2, addresses) 

406 registry.removeDatasetLocation(datastoreName, ref) 

407 addresses = registry.getDatasetLocations(ref) 

408 self.assertEqual(len(addresses), 1) 

409 self.assertNotIn(datastoreName, addresses) 

410 self.assertIn(datastoreName2, addresses) 

411 with self.assertRaises(OrphanedRecordError): 

412 registry.removeDataset(ref) 

413 registry.removeDatasetLocation(datastoreName2, ref) 

414 addresses = registry.getDatasetLocations(ref) 

415 self.assertEqual(len(addresses), 0) 

416 self.assertNotIn(datastoreName2, addresses) 

417 registry.removeDataset(ref) # should not raise 

418 addresses = registry.getDatasetLocations(ref2) 

419 self.assertEqual(len(addresses), 1) 

420 self.assertIn(datastoreName2, addresses) 

421 

422 def testBasicTransaction(self): 

423 """Test that all operations within a single transaction block are 

424 rolled back if an exception propagates out of the block. 

425 """ 

426 registry = self.makeRegistry() 

427 storageClass = StorageClass("testDatasetType") 

428 registry.storageClasses.registerStorageClass(storageClass) 

429 dimensions = registry.dimensions.extract(("instrument",)) 

430 dataId = {"instrument": "DummyCam"} 

431 datasetTypeA = DatasetType(name="A", 

432 dimensions=dimensions, 

433 storageClass=storageClass) 

434 datasetTypeB = DatasetType(name="B", 

435 dimensions=dimensions, 

436 storageClass=storageClass) 

437 datasetTypeC = DatasetType(name="C", 

438 dimensions=dimensions, 

439 storageClass=storageClass) 

440 run = "test" 

441 registry.registerRun(run) 

442 refId = None 

443 with registry.transaction(): 

444 registry.registerDatasetType(datasetTypeA) 

445 with self.assertRaises(ValueError): 

446 with registry.transaction(): 

447 registry.registerDatasetType(datasetTypeB) 

448 registry.registerDatasetType(datasetTypeC) 

449 registry.insertDimensionData("instrument", {"instrument": "DummyCam"}) 

450 ref, = registry.insertDatasets(datasetTypeA, dataIds=[dataId], run=run) 

451 refId = ref.id 

452 raise ValueError("Oops, something went wrong") 

453 # A should exist 

454 self.assertEqual(registry.getDatasetType("A"), datasetTypeA) 

455 # But B and C should both not exist 

456 with self.assertRaises(KeyError): 

457 registry.getDatasetType("B") 

458 with self.assertRaises(KeyError): 

459 registry.getDatasetType("C") 

460 # And neither should the dataset 

461 self.assertIsNotNone(refId) 

462 self.assertIsNone(registry.getDataset(refId)) 

463 # Or the Dimension entries 

464 with self.assertRaises(LookupError): 

465 registry.expandDataId({"instrument": "DummyCam"}) 

466 

467 def testNestedTransaction(self): 

468 """Test that operations within a transaction block are not rolled back 

469 if an exception propagates out of an inner transaction block and is 

470 then caught. 

471 """ 

472 registry = self.makeRegistry() 

473 dimension = registry.dimensions["instrument"] 

474 dataId1 = {"instrument": "DummyCam"} 

475 dataId2 = {"instrument": "DummyCam2"} 

476 checkpointReached = False 

477 with registry.transaction(): 

478 # This should be added and (ultimately) committed. 

479 registry.insertDimensionData(dimension, dataId1) 

480 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

481 with registry.transaction(): 

482 # This does not conflict, and should succeed (but not 

483 # be committed). 

484 registry.insertDimensionData(dimension, dataId2) 

485 checkpointReached = True 

486 # This should conflict and raise, triggerring a rollback 

487 # of the previous insertion within the same transaction 

488 # context, but not the original insertion in the outer 

489 # block. 

490 registry.insertDimensionData(dimension, dataId1) 

491 self.assertTrue(checkpointReached) 

492 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

493 with self.assertRaises(LookupError): 

494 registry.expandDataId(dataId2, graph=dimension.graph) 

495 

496 def testInstrumentDimensions(self): 

497 """Test queries involving only instrument dimensions, with no joins to 

498 skymap.""" 

499 registry = self.makeRegistry() 

500 

501 # need a bunch of dimensions and datasets for test 

502 registry.insertDimensionData( 

503 "instrument", 

504 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

505 ) 

506 registry.insertDimensionData( 

507 "physical_filter", 

508 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

509 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

510 ) 

511 registry.insertDimensionData( 

512 "detector", 

513 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

514 ) 

515 registry.insertDimensionData( 

516 "visit", 

517 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

518 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"), 

519 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"), 

520 ) 

521 registry.insertDimensionData( 

522 "exposure", 

523 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i"), 

524 dict(instrument="DummyCam", id=101, name="101", visit=10, physical_filter="dummy_i"), 

525 dict(instrument="DummyCam", id=110, name="110", visit=11, physical_filter="dummy_r"), 

526 dict(instrument="DummyCam", id=111, name="111", visit=11, physical_filter="dummy_r"), 

527 dict(instrument="DummyCam", id=200, name="200", visit=20, physical_filter="dummy_r"), 

528 dict(instrument="DummyCam", id=201, name="201", visit=20, physical_filter="dummy_r"), 

529 ) 

530 # dataset types 

531 run1 = "test1_r" 

532 run2 = "test2_r" 

533 tagged2 = "test2_t" 

534 registry.registerRun(run1) 

535 registry.registerRun(run2) 

536 registry.registerCollection(tagged2) 

537 storageClass = StorageClass("testDataset") 

538 registry.storageClasses.registerStorageClass(storageClass) 

539 rawType = DatasetType(name="RAW", 

540 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

541 storageClass=storageClass) 

542 registry.registerDatasetType(rawType) 

543 calexpType = DatasetType(name="CALEXP", 

544 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

545 storageClass=storageClass) 

546 registry.registerDatasetType(calexpType) 

547 

548 # add pre-existing datasets 

549 for exposure in (100, 101, 110, 111): 

550 for detector in (1, 2, 3): 

551 # note that only 3 of 5 detectors have datasets 

552 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

553 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

554 # exposures 100 and 101 appear in both run1 and tagged2. 

555 # 100 has different datasets in the different collections 

556 # 101 has the same dataset in both collections. 

557 if exposure == 100: 

558 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

559 if exposure in (100, 101): 

560 registry.associate(tagged2, [ref]) 

561 # Add pre-existing datasets to tagged2. 

562 for exposure in (200, 201): 

563 for detector in (3, 4, 5): 

564 # note that only 3 of 5 detectors have datasets 

565 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

566 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

567 registry.associate(tagged2, [ref]) 

568 

569 dimensions = DimensionGraph( 

570 registry.dimensions, 

571 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

572 ) 

573 # Test that single dim string works as well as list of str 

574 rows = list(registry.queryDimensions("visit", datasets=rawType, collections=run1, expand=True)) 

575 rowsI = list(registry.queryDimensions(["visit"], datasets=rawType, collections=run1, expand=True)) 

576 self.assertEqual(rows, rowsI) 

577 # with empty expression 

578 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, expand=True)) 

579 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

580 for dataId in rows: 

581 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

582 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

583 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

584 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

585 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

586 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

587 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

588 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

589 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

590 (100, 101, 110, 111)) 

591 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

592 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

593 

594 # second collection 

595 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=tagged2)) 

596 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

597 for dataId in rows: 

598 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

599 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

600 (100, 101, 200, 201)) 

601 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

602 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

603 

604 # with two input datasets 

605 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=[run1, tagged2])) 

606 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

607 for dataId in rows: 

608 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure")) 

609 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

610 (100, 101, 110, 111, 200, 201)) 

611 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

612 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

613 

614 # limit to single visit 

615 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

616 where="visit = 10")) 

617 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

618 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

619 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

620 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

621 

622 # more limiting expression, using link names instead of Table.column 

623 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

624 where="visit = 10 and detector > 1")) 

625 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

626 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

627 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

628 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

629 

630 # expression excludes everything 

631 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

632 where="visit > 1000")) 

633 self.assertEqual(len(rows), 0) 

634 

635 # Selecting by physical_filter, this is not in the dimensions, but it 

636 # is a part of the full expression so it should work too. 

637 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, 

638 where="physical_filter = 'dummy_r'")) 

639 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

640 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

641 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

642 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

643 

644 def testSkyMapDimensions(self): 

645 """Tests involving only skymap dimensions, no joins to instrument.""" 

646 registry = self.makeRegistry() 

647 

648 # need a bunch of dimensions and datasets for test, we want 

649 # "abstract_filter" in the test so also have to add physical_filter 

650 # dimensions 

651 registry.insertDimensionData( 

652 "instrument", 

653 dict(instrument="DummyCam") 

654 ) 

655 registry.insertDimensionData( 

656 "physical_filter", 

657 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

658 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

659 ) 

660 registry.insertDimensionData( 

661 "skymap", 

662 dict(name="DummyMap", hash="sha!".encode("utf8")) 

663 ) 

664 for tract in range(10): 

665 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

666 registry.insertDimensionData( 

667 "patch", 

668 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

669 for patch in range(10)] 

670 ) 

671 

672 # dataset types 

673 run = "test" 

674 registry.registerRun(run) 

675 storageClass = StorageClass("testDataset") 

676 registry.storageClasses.registerStorageClass(storageClass) 

677 calexpType = DatasetType(name="deepCoadd_calexp", 

678 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

679 "abstract_filter")), 

680 storageClass=storageClass) 

681 registry.registerDatasetType(calexpType) 

682 mergeType = DatasetType(name="deepCoadd_mergeDet", 

683 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

684 storageClass=storageClass) 

685 registry.registerDatasetType(mergeType) 

686 measType = DatasetType(name="deepCoadd_meas", 

687 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

688 "abstract_filter")), 

689 storageClass=storageClass) 

690 registry.registerDatasetType(measType) 

691 

692 dimensions = DimensionGraph( 

693 registry.dimensions, 

694 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

695 | measType.dimensions.required) 

696 ) 

697 

698 # add pre-existing datasets 

699 for tract in (1, 3, 5): 

700 for patch in (2, 4, 6, 7): 

701 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

702 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

703 for aFilter in ("i", "r"): 

704 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter) 

705 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

706 

707 # with empty expression 

708 rows = list(registry.queryDimensions(dimensions, 

709 datasets=[calexpType, mergeType], collections=run)) 

710 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

711 for dataId in rows: 

712 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter")) 

713 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

714 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

715 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r")) 

716 

717 # limit to 2 tracts and 2 patches 

718 rows = list(registry.queryDimensions(dimensions, 

719 datasets=[calexpType, mergeType], collections=run, 

720 where="tract IN (1, 5) AND patch IN (2, 7)")) 

721 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

722 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

723 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

724 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r")) 

725 

726 # limit to single filter 

727 rows = list(registry.queryDimensions(dimensions, 

728 datasets=[calexpType, mergeType], collections=run, 

729 where="abstract_filter = 'i'")) 

730 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

731 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

732 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

733 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",)) 

734 

735 # expression excludes everything, specifying non-existing skymap is 

736 # not a fatal error, it's operator error 

737 rows = list(registry.queryDimensions(dimensions, 

738 datasets=[calexpType, mergeType], collections=run, 

739 where="skymap = 'Mars'")) 

740 self.assertEqual(len(rows), 0) 

741 

742 def testSpatialMatch(self): 

743 """Test involving spatial match using join tables. 

744 

745 Note that realistic test needs a reasonably-defined skypix and regions 

746 in registry tables which is hard to implement in this simple test. 

747 So we do not actually fill registry with any data and all queries will 

748 return empty result, but this is still useful for coverage of the code 

749 that generates query. 

750 """ 

751 registry = self.makeRegistry() 

752 

753 # dataset types 

754 collection = "test" 

755 registry.registerRun(name=collection) 

756 storageClass = StorageClass("testDataset") 

757 registry.storageClasses.registerStorageClass(storageClass) 

758 

759 calexpType = DatasetType(name="CALEXP", 

760 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

761 storageClass=storageClass) 

762 registry.registerDatasetType(calexpType) 

763 

764 coaddType = DatasetType(name="deepCoadd_calexp", 

765 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

766 "abstract_filter")), 

767 storageClass=storageClass) 

768 registry.registerDatasetType(coaddType) 

769 

770 dimensions = DimensionGraph( 

771 registry.dimensions, 

772 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required) 

773 ) 

774 

775 # without data this should run OK but return empty set 

776 rows = list(registry.queryDimensions(dimensions, datasets=calexpType, collections=collection)) 

777 self.assertEqual(len(rows), 0) 

778 

779 def testCalibrationLabelIndirection(self): 

780 """Test that we can look up datasets with calibration_label dimensions 

781 from a data ID with exposure dimensions. 

782 """ 

783 registry = self.makeRegistry() 

784 

785 flat = DatasetType( 

786 "flat", 

787 registry.dimensions.extract( 

788 ["instrument", "detector", "physical_filter", "calibration_label"] 

789 ), 

790 "ImageU" 

791 ) 

792 registry.registerDatasetType(flat) 

793 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

794 registry.insertDimensionData( 

795 "physical_filter", 

796 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

797 ) 

798 registry.insertDimensionData( 

799 "detector", 

800 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)] 

801 ) 

802 registry.insertDimensionData( 

803 "visit", 

804 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"), 

805 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_i"), 

806 ) 

807 registry.insertDimensionData( 

808 "exposure", 

809 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i", 

810 datetime_begin=datetime(2005, 12, 15, 2), datetime_end=datetime(2005, 12, 15, 3)), 

811 dict(instrument="DummyCam", id=101, name="101", visit=11, physical_filter="dummy_i", 

812 datetime_begin=datetime(2005, 12, 16, 2), datetime_end=datetime(2005, 12, 16, 3)), 

813 ) 

814 registry.insertDimensionData( 

815 "calibration_label", 

816 dict(instrument="DummyCam", name="first_night", 

817 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 15, 4)), 

818 dict(instrument="DummyCam", name="second_night", 

819 datetime_begin=datetime(2005, 12, 16, 1), datetime_end=datetime(2005, 12, 16, 4)), 

820 dict(instrument="DummyCam", name="both_nights", 

821 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 16, 4)), 

822 ) 

823 # Different flats for different nights for detectors 1-3 in first 

824 # collection. 

825 run1 = "calibs1" 

826 registry.registerRun(run1) 

827 for detector in (1, 2, 3): 

828 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night", 

829 physical_filter="dummy_i", detector=detector)], 

830 run=run1) 

831 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night", 

832 physical_filter="dummy_i", detector=detector)], 

833 run=run1) 

834 # The same flat for both nights for detectors 3-5 (so detector 3 has 

835 # multiple valid flats) in second collection. 

836 run2 = "calib2" 

837 registry.registerRun(run2) 

838 for detector in (3, 4, 5): 

839 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights", 

840 physical_filter="dummy_i", detector=detector)], 

841 run=run2) 

842 # Perform queries for individual exposure+detector combinations, which 

843 # should always return exactly one flat. 

844 for exposure in (100, 101): 

845 for detector in (1, 2, 3): 

846 with self.subTest(exposure=exposure, detector=detector): 

847 rows = list(registry.queryDatasets("flat", collections=[run1], 

848 instrument="DummyCam", 

849 exposure=exposure, 

850 detector=detector)) 

851 self.assertEqual(len(rows), 1) 

852 for detector in (3, 4, 5): 

853 with self.subTest(exposure=exposure, detector=detector): 

854 rows = registry.queryDatasets("flat", collections=[run2], 

855 instrument="DummyCam", 

856 exposure=exposure, 

857 detector=detector) 

858 self.assertEqual(len(list(rows)), 1) 

859 for detector in (1, 2, 4, 5): 

860 with self.subTest(exposure=exposure, detector=detector): 

861 rows = registry.queryDatasets("flat", collections=[run1, run2], 

862 instrument="DummyCam", 

863 exposure=exposure, 

864 detector=detector) 

865 self.assertEqual(len(list(rows)), 1) 

866 for detector in (3,): 

867 with self.subTest(exposure=exposure, detector=detector): 

868 rows = registry.queryDatasets("flat", collections=[run1, run2], 

869 instrument="DummyCam", 

870 exposure=exposure, 

871 detector=detector) 

872 self.assertEqual(len(list(rows)), 2) 

873 

874 def testAbstractFilterQuery(self): 

875 """Test that we can run a query that just lists the known 

876 abstract_filters. This is tricky because abstract_filter is 

877 backed by a query against physical_filter. 

878 """ 

879 registry = self.makeRegistry() 

880 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

881 registry.insertDimensionData( 

882 "physical_filter", 

883 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"), 

884 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"), 

885 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"), 

886 ) 

887 rows = list(registry.queryDimensions(["abstract_filter"])) 

888 self.assertCountEqual( 

889 rows, 

890 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions), 

891 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)] 

892 )