Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27from datetime import datetime, timedelta 

28import itertools 

29import logging 

30import os 

31import re 

32from typing import Iterator 

33import unittest 

34 

35import astropy.time 

36import sqlalchemy 

37from typing import Optional, Type, Union, TYPE_CHECKING 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45from ...core import ( 

46 DataCoordinate, 

47 DataCoordinateSequence, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 ddl, 

56 Timespan, 

57) 

58from ..summaries import CollectionSummary 

59from .._collectionType import CollectionType 

60from .._config import RegistryConfig 

61 

62from .._exceptions import ( 

63 ConflictingDefinitionError, 

64 InconsistentDataIdError, 

65 MissingCollectionError, 

66 OrphanedRecordError, 

67) 

68from ..interfaces import ButlerAttributeExistsError 

69 

70if TYPE_CHECKING: 70 ↛ 71line 70 didn't jump to line 71, because the condition on line 70 was never true

71 from .._registry import Registry 

72 

73 

74class RegistryTests(ABC): 

75 """Generic tests for the `Registry` class that can be subclassed to 

76 generate tests for different configurations. 

77 """ 

78 

79 collectionsManager: Optional[str] = None 

80 """Name of the collections manager class, if subclass provides value for 

81 this member then it overrides name specified in default configuration 

82 (`str`). 

83 """ 

84 

85 datasetsManager: Optional[str] = None 

86 """Name of the datasets manager class, if subclass provides value for 

87 this member then it overrides name specified in default configuration 

88 (`str`). 

89 """ 

90 

91 @classmethod 

92 @abstractmethod 

93 def getDataDir(cls) -> str: 

94 """Return the root directory containing test data YAML files. 

95 """ 

96 raise NotImplementedError() 

97 

98 def makeRegistryConfig(self) -> RegistryConfig: 

99 """Create RegistryConfig used to create a registry. 

100 

101 This method should be called by a subclass from `makeRegistry`. 

102 Returned instance will be pre-configured based on the values of class 

103 members, and default-configured for all other parametrs. Subclasses 

104 that need default configuration should just instantiate 

105 `RegistryConfig` directly. 

106 """ 

107 config = RegistryConfig() 

108 if self.collectionsManager: 

109 config["managers", "collections"] = self.collectionsManager 

110 if self.datasetsManager: 

111 config["managers", "datasets"] = self.datasetsManager 

112 return config 

113 

114 @abstractmethod 

115 def makeRegistry(self) -> Registry: 

116 """Return the Registry instance to be tested. 

117 """ 

118 raise NotImplementedError() 

119 

120 def loadData(self, registry: Registry, filename: str): 

121 """Load registry test data from ``getDataDir/<filename>``, 

122 which should be a YAML import/export file. 

123 """ 

124 from ...transfers import YamlRepoImportBackend 

125 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

126 backend = YamlRepoImportBackend(stream, registry) 

127 backend.register() 

128 backend.load(datastore=None) 

129 

130 def testOpaque(self): 

131 """Tests for `Registry.registerOpaqueTable`, 

132 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

133 `Registry.deleteOpaqueData`. 

134 """ 

135 registry = self.makeRegistry() 

136 table = "opaque_table_for_testing" 

137 registry.registerOpaqueTable( 

138 table, 

139 spec=ddl.TableSpec( 

140 fields=[ 

141 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

142 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

143 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

144 ], 

145 ) 

146 ) 

147 rows = [ 

148 {"id": 1, "name": "one", "count": None}, 

149 {"id": 2, "name": "two", "count": 5}, 

150 {"id": 3, "name": "three", "count": 6}, 

151 ] 

152 registry.insertOpaqueData(table, *rows) 

153 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

154 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

155 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

156 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

157 registry.deleteOpaqueData(table, id=3) 

158 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

159 registry.deleteOpaqueData(table) 

160 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

161 

162 def testDatasetType(self): 

163 """Tests for `Registry.registerDatasetType` and 

164 `Registry.getDatasetType`. 

165 """ 

166 registry = self.makeRegistry() 

167 # Check valid insert 

168 datasetTypeName = "test" 

169 storageClass = StorageClass("testDatasetType") 

170 registry.storageClasses.registerStorageClass(storageClass) 

171 dimensions = registry.dimensions.extract(("instrument", "visit")) 

172 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

173 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

174 # Inserting for the first time should return True 

175 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

176 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

177 self.assertEqual(outDatasetType1, inDatasetType) 

178 

179 # Re-inserting should work 

180 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

181 # Except when they are not identical 

182 with self.assertRaises(ConflictingDefinitionError): 

183 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

184 registry.registerDatasetType(nonIdenticalDatasetType) 

185 

186 # Template can be None 

187 datasetTypeName = "testNoneTemplate" 

188 storageClass = StorageClass("testDatasetType2") 

189 registry.storageClasses.registerStorageClass(storageClass) 

190 dimensions = registry.dimensions.extract(("instrument", "visit")) 

191 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

192 registry.registerDatasetType(inDatasetType) 

193 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

194 self.assertEqual(outDatasetType2, inDatasetType) 

195 

196 allTypes = set(registry.queryDatasetTypes()) 

197 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

198 

199 def testDimensions(self): 

200 """Tests for `Registry.insertDimensionData`, 

201 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

202 """ 

203 registry = self.makeRegistry() 

204 dimensionName = "instrument" 

205 dimension = registry.dimensions[dimensionName] 

206 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

207 "class_name": "lsst.obs.base.Instrument"} 

208 registry.insertDimensionData(dimensionName, dimensionValue) 

209 # Inserting the same value twice should fail 

210 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

211 registry.insertDimensionData(dimensionName, dimensionValue) 

212 # expandDataId should retrieve the record we just inserted 

213 self.assertEqual( 

214 registry.expandDataId( 

215 instrument="DummyCam", 

216 graph=dimension.graph 

217 ).records[dimensionName].toDict(), 

218 dimensionValue 

219 ) 

220 # expandDataId should raise if there is no record with the given ID. 

221 with self.assertRaises(LookupError): 

222 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

223 # band doesn't have a table; insert should fail. 

224 with self.assertRaises(TypeError): 

225 registry.insertDimensionData("band", {"band": "i"}) 

226 dimensionName2 = "physical_filter" 

227 dimension2 = registry.dimensions[dimensionName2] 

228 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

229 # Missing required dependency ("instrument") should fail 

230 with self.assertRaises(KeyError): 

231 registry.insertDimensionData(dimensionName2, dimensionValue2) 

232 # Adding required dependency should fix the failure 

233 dimensionValue2["instrument"] = "DummyCam" 

234 registry.insertDimensionData(dimensionName2, dimensionValue2) 

235 # expandDataId should retrieve the record we just inserted. 

236 self.assertEqual( 

237 registry.expandDataId( 

238 instrument="DummyCam", physical_filter="DummyCam_i", 

239 graph=dimension2.graph 

240 ).records[dimensionName2].toDict(), 

241 dimensionValue2 

242 ) 

243 # Use syncDimensionData to insert a new record successfully. 

244 dimensionName3 = "detector" 

245 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

246 "name_in_raft": "zero", "purpose": "SCIENCE"} 

247 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

248 # Sync that again. Note that one field ("raft") is NULL, and that 

249 # should be okay. 

250 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

251 # Now try that sync with the same primary key but a different value. 

252 # This should fail. 

253 with self.assertRaises(ConflictingDefinitionError): 

254 registry.syncDimensionData( 

255 dimensionName3, 

256 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

257 "name_in_raft": "four", "purpose": "SCIENCE"} 

258 ) 

259 

260 @unittest.skipIf(np is None, "numpy not available.") 

261 def testNumpyDataId(self): 

262 """Test that we can use a numpy int in a dataId.""" 

263 registry = self.makeRegistry() 

264 dimensionEntries = [ 

265 ("instrument", {"instrument": "DummyCam"}), 

266 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

267 # Using an np.int64 here fails unless Records.fromDict is also 

268 # patched to look for numbers.Integral 

269 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

270 ] 

271 for args in dimensionEntries: 

272 registry.insertDimensionData(*args) 

273 

274 # Try a normal integer and something that looks like an int but 

275 # is not. 

276 for visit_id in (42, np.int64(42)): 

277 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

278 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

279 self.assertEqual(expanded["visit"], int(visit_id)) 

280 self.assertIsInstance(expanded["visit"], int) 

281 

282 def testDataIdRelationships(self): 

283 """Test that `Registry.expandDataId` raises an exception when the given 

284 keys are inconsistent. 

285 """ 

286 registry = self.makeRegistry() 

287 self.loadData(registry, "base.yaml") 

288 # Insert a few more dimension records for the next test. 

289 registry.insertDimensionData( 

290 "exposure", 

291 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

292 ) 

293 registry.insertDimensionData( 

294 "exposure", 

295 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

296 ) 

297 registry.insertDimensionData( 

298 "visit_system", 

299 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

300 ) 

301 registry.insertDimensionData( 

302 "visit", 

303 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

304 ) 

305 registry.insertDimensionData( 

306 "visit_definition", 

307 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

308 ) 

309 with self.assertRaises(InconsistentDataIdError): 

310 registry.expandDataId( 

311 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

312 ) 

313 

314 def testDataset(self): 

315 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

316 and `Registry.removeDatasets`. 

317 """ 

318 registry = self.makeRegistry() 

319 self.loadData(registry, "base.yaml") 

320 run = "test" 

321 registry.registerRun(run) 

322 datasetType = registry.getDatasetType("bias") 

323 dataId = {"instrument": "Cam1", "detector": 2} 

324 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

325 outRef = registry.getDataset(ref.id) 

326 self.assertIsNotNone(ref.id) 

327 self.assertEqual(ref, outRef) 

328 with self.assertRaises(ConflictingDefinitionError): 

329 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

330 registry.removeDatasets([ref]) 

331 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

332 

333 def testFindDataset(self): 

334 """Tests for `Registry.findDataset`. 

335 """ 

336 registry = self.makeRegistry() 

337 self.loadData(registry, "base.yaml") 

338 run = "test" 

339 datasetType = registry.getDatasetType("bias") 

340 dataId = {"instrument": "Cam1", "detector": 4} 

341 registry.registerRun(run) 

342 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

343 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

344 self.assertEqual(outputRef, inputRef) 

345 # Check that retrieval with invalid dataId raises 

346 with self.assertRaises(LookupError): 

347 dataId = {"instrument": "Cam1"} # no detector 

348 registry.findDataset(datasetType, dataId, collections=run) 

349 # Check that different dataIds match to different datasets 

350 dataId1 = {"instrument": "Cam1", "detector": 1} 

351 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

352 dataId2 = {"instrument": "Cam1", "detector": 2} 

353 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

354 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

355 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

356 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

357 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

358 # Check that requesting a non-existing dataId returns None 

359 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

360 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

361 

362 def testRemoveDatasetTypeSuccess(self): 

363 """Test that Registry.removeDatasetType works when there are no 

364 datasets of that type present. 

365 """ 

366 registry = self.makeRegistry() 

367 self.loadData(registry, "base.yaml") 

368 registry.removeDatasetType("flat") 

369 with self.assertRaises(KeyError): 

370 registry.getDatasetType("flat") 

371 

372 def testRemoveDatasetTypeFailure(self): 

373 """Test that Registry.removeDatasetType raises when there are datasets 

374 of that type present or if the dataset type is for a component. 

375 """ 

376 registry = self.makeRegistry() 

377 self.loadData(registry, "base.yaml") 

378 self.loadData(registry, "datasets.yaml") 

379 with self.assertRaises(OrphanedRecordError): 

380 registry.removeDatasetType("flat") 

381 with self.assertRaises(ValueError): 

382 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

383 

384 def testDatasetTypeComponentQueries(self): 

385 """Test component options when querying for dataset types. 

386 """ 

387 registry = self.makeRegistry() 

388 self.loadData(registry, "base.yaml") 

389 self.loadData(registry, "datasets.yaml") 

390 # Test querying for dataset types with different inputs. 

391 # First query for all dataset types; components should only be included 

392 # when components=True. 

393 self.assertEqual( 

394 {"bias", "flat"}, 

395 NamedValueSet(registry.queryDatasetTypes()).names 

396 ) 

397 self.assertEqual( 

398 {"bias", "flat"}, 

399 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

400 ) 

401 self.assertLess( 

402 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

403 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

404 ) 

405 # Use a pattern that can match either parent or components. Again, 

406 # components are only returned if components=True. 

407 self.assertEqual( 

408 {"bias"}, 

409 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

410 ) 

411 self.assertEqual( 

412 {"bias"}, 

413 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

414 ) 

415 self.assertLess( 

416 {"bias", "bias.wcs"}, 

417 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

418 ) 

419 # This pattern matches only a component. In this case we also return 

420 # that component dataset type if components=None. 

421 self.assertEqual( 

422 {"bias.wcs"}, 

423 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

424 ) 

425 self.assertEqual( 

426 set(), 

427 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

428 ) 

429 self.assertEqual( 

430 {"bias.wcs"}, 

431 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

432 ) 

433 # Add a dataset type using a StorageClass that we'll then remove; check 

434 # that this does not affect our ability to query for dataset types 

435 # (though it will warn). 

436 tempStorageClass = StorageClass( 

437 name="TempStorageClass", 

438 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

439 ) 

440 registry.storageClasses.registerStorageClass(tempStorageClass) 

441 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

442 universe=registry.dimensions) 

443 registry.registerDatasetType(datasetType) 

444 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

445 datasetType._storageClass = None 

446 del tempStorageClass 

447 # Querying for all dataset types, including components, should include 

448 # at least all non-component dataset types (and I don't want to 

449 # enumerate all of the Exposure components for bias and flat here). 

450 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

451 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

452 self.assertIn("TempStorageClass", cm.output[0]) 

453 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

454 # It should not include "temporary.columns", because we tried to remove 

455 # the storage class that would tell it about that. So if the next line 

456 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

457 # this part of the test isn't doing anything, because the _unregister 

458 # call about isn't simulating the real-life case we want it to 

459 # simulate, in which different versions of daf_butler in entirely 

460 # different Python processes interact with the same repo. 

461 self.assertNotIn("temporary.data", everything.names) 

462 # Query for dataset types that start with "temp". This should again 

463 # not include the component, and also not fail. 

464 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

465 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

466 self.assertIn("TempStorageClass", cm.output[0]) 

467 self.assertEqual({"temporary"}, startsWithTemp.names) 

468 

469 def testComponentLookups(self): 

470 """Test searching for component datasets via their parents. 

471 """ 

472 registry = self.makeRegistry() 

473 self.loadData(registry, "base.yaml") 

474 self.loadData(registry, "datasets.yaml") 

475 # Test getting the child dataset type (which does still exist in the 

476 # Registry), and check for consistency with 

477 # DatasetRef.makeComponentRef. 

478 collection = "imported_g" 

479 parentType = registry.getDatasetType("bias") 

480 childType = registry.getDatasetType("bias.wcs") 

481 parentRefResolved = registry.findDataset(parentType, collections=collection, 

482 instrument="Cam1", detector=1) 

483 self.assertIsInstance(parentRefResolved, DatasetRef) 

484 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

485 # Search for a single dataset with findDataset. 

486 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

487 dataId=parentRefResolved.dataId) 

488 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

489 # Search for detector data IDs constrained by component dataset 

490 # existence with queryDataIds. 

491 dataIds = registry.queryDataIds( 

492 ["detector"], 

493 datasets=["bias.wcs"], 

494 collections=collection, 

495 ).toSet() 

496 self.assertEqual( 

497 dataIds, 

498 DataCoordinateSet( 

499 { 

500 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

501 for d in (1, 2, 3) 

502 }, 

503 parentType.dimensions, 

504 ) 

505 ) 

506 # Search for multiple datasets of a single type with queryDatasets. 

507 childRefs2 = set(registry.queryDatasets( 

508 "bias.wcs", 

509 collections=collection, 

510 )) 

511 self.assertEqual( 

512 {ref.unresolved() for ref in childRefs2}, 

513 {DatasetRef(childType, dataId) for dataId in dataIds} 

514 ) 

515 

516 def testCollections(self): 

517 """Tests for registry methods that manage collections. 

518 """ 

519 registry = self.makeRegistry() 

520 self.loadData(registry, "base.yaml") 

521 self.loadData(registry, "datasets.yaml") 

522 run1 = "imported_g" 

523 run2 = "imported_r" 

524 # Test setting a collection docstring after it has been created. 

525 registry.setCollectionDocumentation(run1, "doc for run1") 

526 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

527 registry.setCollectionDocumentation(run1, None) 

528 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

529 datasetType = "bias" 

530 # Find some datasets via their run's collection. 

531 dataId1 = {"instrument": "Cam1", "detector": 1} 

532 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

533 self.assertIsNotNone(ref1) 

534 dataId2 = {"instrument": "Cam1", "detector": 2} 

535 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

536 self.assertIsNotNone(ref2) 

537 # Associate those into a new collection,then look for them there. 

538 tag1 = "tag1" 

539 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

540 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

541 registry.associate(tag1, [ref1, ref2]) 

542 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

543 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

544 # Disassociate one and verify that we can't it there anymore... 

545 registry.disassociate(tag1, [ref1]) 

546 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

547 # ...but we can still find ref2 in tag1, and ref1 in the run. 

548 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

549 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

550 collections = set(registry.queryCollections()) 

551 self.assertEqual(collections, {run1, run2, tag1}) 

552 # Associate both refs into tag1 again; ref2 is already there, but that 

553 # should be a harmless no-op. 

554 registry.associate(tag1, [ref1, ref2]) 

555 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

556 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

557 # Get a different dataset (from a different run) that has the same 

558 # dataset type and data ID as ref2. 

559 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

560 self.assertNotEqual(ref2, ref2b) 

561 # Attempting to associate that into tag1 should be an error. 

562 with self.assertRaises(ConflictingDefinitionError): 

563 registry.associate(tag1, [ref2b]) 

564 # That error shouldn't have messed up what we had before. 

565 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

566 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

567 # Attempt to associate the conflicting dataset again, this time with 

568 # a dataset that isn't in the collection and won't cause a conflict. 

569 # Should also fail without modifying anything. 

570 dataId3 = {"instrument": "Cam1", "detector": 3} 

571 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

572 with self.assertRaises(ConflictingDefinitionError): 

573 registry.associate(tag1, [ref3, ref2b]) 

574 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

575 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

576 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

577 # Register a chained collection that searches [tag1, run2] 

578 chain1 = "chain1" 

579 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

580 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

581 # Chained collection exists, but has no collections in it. 

582 self.assertFalse(registry.getCollectionChain(chain1)) 

583 # If we query for all collections, we should get the chained collection 

584 # only if we don't ask to flatten it (i.e. yield only its children). 

585 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

586 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

587 # Attempt to set its child collections to something circular; that 

588 # should fail. 

589 with self.assertRaises(ValueError): 

590 registry.setCollectionChain(chain1, [tag1, chain1]) 

591 # Add the child collections. 

592 registry.setCollectionChain(chain1, [tag1, run2]) 

593 self.assertEqual( 

594 list(registry.getCollectionChain(chain1)), 

595 [tag1, run2] 

596 ) 

597 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

598 # ref2, because both are in tag1. 

599 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

600 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

601 # Now disassociate ref2 from tag1. The search (for bias) with 

602 # dataId2 in chain1 should then: 

603 # 1. not find it in tag1 

604 # 2. find a different dataset in run2 

605 registry.disassociate(tag1, [ref2]) 

606 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

607 self.assertNotEqual(ref2b, ref2) 

608 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

609 # Define a new chain so we can test recursive chains. 

610 chain2 = "chain2" 

611 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

612 registry.setCollectionChain(chain2, [run2, chain1]) 

613 # Query for collections matching a regex. 

614 self.assertCountEqual( 

615 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

616 ["imported_r", "imported_g"] 

617 ) 

618 # Query for collections matching a regex or an explicit str. 

619 self.assertCountEqual( 

620 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

621 ["imported_r", "imported_g", "chain1"] 

622 ) 

623 # Search for bias with dataId1 should find it via tag1 in chain2, 

624 # recursing, because is not in run1. 

625 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

626 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

627 # Search for bias with dataId2 should find it in run2 (ref2b). 

628 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

629 # Search for a flat that is in run2. That should not be found 

630 # at the front of chain2, because of the restriction to bias 

631 # on run2 there, but it should be found in at the end of chain1. 

632 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

633 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

634 self.assertIsNotNone(ref4) 

635 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

636 # Deleting a collection that's part of a CHAINED collection is not 

637 # allowed, and is exception-safe. 

638 with self.assertRaises(Exception): 

639 registry.removeCollection(run2) 

640 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

641 with self.assertRaises(Exception): 

642 registry.removeCollection(chain1) 

643 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

644 # Actually remove chain2, test that it's gone by asking for its type. 

645 registry.removeCollection(chain2) 

646 with self.assertRaises(MissingCollectionError): 

647 registry.getCollectionType(chain2) 

648 # Actually remove run2 and chain1, which should work now. 

649 registry.removeCollection(chain1) 

650 registry.removeCollection(run2) 

651 with self.assertRaises(MissingCollectionError): 

652 registry.getCollectionType(run2) 

653 with self.assertRaises(MissingCollectionError): 

654 registry.getCollectionType(chain1) 

655 # Remove tag1 as well, just to test that we can remove TAGGED 

656 # collections. 

657 registry.removeCollection(tag1) 

658 with self.assertRaises(MissingCollectionError): 

659 registry.getCollectionType(tag1) 

660 

661 def testCollectionChainFlatten(self): 

662 """Test that Registry.setCollectionChain obeys its 'flatten' option. 

663 """ 

664 registry = self.makeRegistry() 

665 registry.registerCollection("inner", CollectionType.CHAINED) 

666 registry.registerCollection("innermost", CollectionType.RUN) 

667 registry.setCollectionChain("inner", ["innermost"]) 

668 registry.registerCollection("outer", CollectionType.CHAINED) 

669 registry.setCollectionChain("outer", ["inner"], flatten=False) 

670 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

671 registry.setCollectionChain("outer", ["inner"], flatten=True) 

672 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

673 

674 def testBasicTransaction(self): 

675 """Test that all operations within a single transaction block are 

676 rolled back if an exception propagates out of the block. 

677 """ 

678 registry = self.makeRegistry() 

679 storageClass = StorageClass("testDatasetType") 

680 registry.storageClasses.registerStorageClass(storageClass) 

681 with registry.transaction(): 

682 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

683 with self.assertRaises(ValueError): 

684 with registry.transaction(): 

685 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

686 raise ValueError("Oops, something went wrong") 

687 # Cam1 should exist 

688 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

689 # But Cam2 and Cam3 should both not exist 

690 with self.assertRaises(LookupError): 

691 registry.expandDataId(instrument="Cam2") 

692 with self.assertRaises(LookupError): 

693 registry.expandDataId(instrument="Cam3") 

694 

695 def testNestedTransaction(self): 

696 """Test that operations within a transaction block are not rolled back 

697 if an exception propagates out of an inner transaction block and is 

698 then caught. 

699 """ 

700 registry = self.makeRegistry() 

701 dimension = registry.dimensions["instrument"] 

702 dataId1 = {"instrument": "DummyCam"} 

703 dataId2 = {"instrument": "DummyCam2"} 

704 checkpointReached = False 

705 with registry.transaction(): 

706 # This should be added and (ultimately) committed. 

707 registry.insertDimensionData(dimension, dataId1) 

708 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

709 with registry.transaction(savepoint=True): 

710 # This does not conflict, and should succeed (but not 

711 # be committed). 

712 registry.insertDimensionData(dimension, dataId2) 

713 checkpointReached = True 

714 # This should conflict and raise, triggerring a rollback 

715 # of the previous insertion within the same transaction 

716 # context, but not the original insertion in the outer 

717 # block. 

718 registry.insertDimensionData(dimension, dataId1) 

719 self.assertTrue(checkpointReached) 

720 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

721 with self.assertRaises(LookupError): 

722 registry.expandDataId(dataId2, graph=dimension.graph) 

723 

724 def testInstrumentDimensions(self): 

725 """Test queries involving only instrument dimensions, with no joins to 

726 skymap.""" 

727 registry = self.makeRegistry() 

728 

729 # need a bunch of dimensions and datasets for test 

730 registry.insertDimensionData( 

731 "instrument", 

732 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

733 ) 

734 registry.insertDimensionData( 

735 "physical_filter", 

736 dict(instrument="DummyCam", name="dummy_r", band="r"), 

737 dict(instrument="DummyCam", name="dummy_i", band="i"), 

738 ) 

739 registry.insertDimensionData( 

740 "detector", 

741 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

742 ) 

743 registry.insertDimensionData( 

744 "visit_system", 

745 dict(instrument="DummyCam", id=1, name="default"), 

746 ) 

747 registry.insertDimensionData( 

748 "visit", 

749 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

750 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

751 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

752 ) 

753 registry.insertDimensionData( 

754 "exposure", 

755 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

756 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

757 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

758 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

759 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

760 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

761 ) 

762 registry.insertDimensionData( 

763 "visit_definition", 

764 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

765 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

766 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

767 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

768 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

769 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

770 ) 

771 # dataset types 

772 run1 = "test1_r" 

773 run2 = "test2_r" 

774 tagged2 = "test2_t" 

775 registry.registerRun(run1) 

776 registry.registerRun(run2) 

777 registry.registerCollection(tagged2) 

778 storageClass = StorageClass("testDataset") 

779 registry.storageClasses.registerStorageClass(storageClass) 

780 rawType = DatasetType(name="RAW", 

781 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

782 storageClass=storageClass) 

783 registry.registerDatasetType(rawType) 

784 calexpType = DatasetType(name="CALEXP", 

785 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

786 storageClass=storageClass) 

787 registry.registerDatasetType(calexpType) 

788 

789 # add pre-existing datasets 

790 for exposure in (100, 101, 110, 111): 

791 for detector in (1, 2, 3): 

792 # note that only 3 of 5 detectors have datasets 

793 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

794 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

795 # exposures 100 and 101 appear in both run1 and tagged2. 

796 # 100 has different datasets in the different collections 

797 # 101 has the same dataset in both collections. 

798 if exposure == 100: 

799 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

800 if exposure in (100, 101): 

801 registry.associate(tagged2, [ref]) 

802 # Add pre-existing datasets to tagged2. 

803 for exposure in (200, 201): 

804 for detector in (3, 4, 5): 

805 # note that only 3 of 5 detectors have datasets 

806 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

807 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

808 registry.associate(tagged2, [ref]) 

809 

810 dimensions = DimensionGraph( 

811 registry.dimensions, 

812 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

813 ) 

814 # Test that single dim string works as well as list of str 

815 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

816 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

817 self.assertEqual(rows, rowsI) 

818 # with empty expression 

819 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

820 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

821 for dataId in rows: 

822 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

823 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

824 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

825 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

826 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

827 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

828 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

829 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

830 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

831 (100, 101, 110, 111)) 

832 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

833 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

834 

835 # second collection 

836 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

837 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

838 for dataId in rows: 

839 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

840 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

841 (100, 101, 200, 201)) 

842 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

843 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

844 

845 # with two input datasets 

846 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

847 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

848 for dataId in rows: 

849 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

850 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

851 (100, 101, 110, 111, 200, 201)) 

852 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

853 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

854 

855 # limit to single visit 

856 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

857 where="visit = 10", instrument="DummyCam").toSet() 

858 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

859 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

860 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

861 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

862 

863 # more limiting expression, using link names instead of Table.column 

864 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

865 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

866 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

867 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

868 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

869 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

870 

871 # expression excludes everything 

872 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

873 where="visit > 1000", instrument="DummyCam").toSet() 

874 self.assertEqual(len(rows), 0) 

875 

876 # Selecting by physical_filter, this is not in the dimensions, but it 

877 # is a part of the full expression so it should work too. 

878 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

879 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

880 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

881 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

882 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

883 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

884 

885 def testSkyMapDimensions(self): 

886 """Tests involving only skymap dimensions, no joins to instrument.""" 

887 registry = self.makeRegistry() 

888 

889 # need a bunch of dimensions and datasets for test, we want 

890 # "band" in the test so also have to add physical_filter 

891 # dimensions 

892 registry.insertDimensionData( 

893 "instrument", 

894 dict(instrument="DummyCam") 

895 ) 

896 registry.insertDimensionData( 

897 "physical_filter", 

898 dict(instrument="DummyCam", name="dummy_r", band="r"), 

899 dict(instrument="DummyCam", name="dummy_i", band="i"), 

900 ) 

901 registry.insertDimensionData( 

902 "skymap", 

903 dict(name="DummyMap", hash="sha!".encode("utf8")) 

904 ) 

905 for tract in range(10): 

906 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

907 registry.insertDimensionData( 

908 "patch", 

909 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

910 for patch in range(10)] 

911 ) 

912 

913 # dataset types 

914 run = "test" 

915 registry.registerRun(run) 

916 storageClass = StorageClass("testDataset") 

917 registry.storageClasses.registerStorageClass(storageClass) 

918 calexpType = DatasetType(name="deepCoadd_calexp", 

919 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

920 "band")), 

921 storageClass=storageClass) 

922 registry.registerDatasetType(calexpType) 

923 mergeType = DatasetType(name="deepCoadd_mergeDet", 

924 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

925 storageClass=storageClass) 

926 registry.registerDatasetType(mergeType) 

927 measType = DatasetType(name="deepCoadd_meas", 

928 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

929 "band")), 

930 storageClass=storageClass) 

931 registry.registerDatasetType(measType) 

932 

933 dimensions = DimensionGraph( 

934 registry.dimensions, 

935 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

936 | measType.dimensions.required) 

937 ) 

938 

939 # add pre-existing datasets 

940 for tract in (1, 3, 5): 

941 for patch in (2, 4, 6, 7): 

942 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

943 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

944 for aFilter in ("i", "r"): 

945 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

946 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

947 

948 # with empty expression 

949 rows = registry.queryDataIds(dimensions, 

950 datasets=[calexpType, mergeType], collections=run).toSet() 

951 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

952 for dataId in rows: 

953 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

954 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

955 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

956 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

957 

958 # limit to 2 tracts and 2 patches 

959 rows = registry.queryDataIds(dimensions, 

960 datasets=[calexpType, mergeType], collections=run, 

961 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

962 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

963 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

964 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

965 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

966 

967 # limit to single filter 

968 rows = registry.queryDataIds(dimensions, 

969 datasets=[calexpType, mergeType], collections=run, 

970 where="band = 'i'").toSet() 

971 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

972 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

973 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

974 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

975 

976 # expression excludes everything, specifying non-existing skymap is 

977 # not a fatal error, it's operator error 

978 rows = registry.queryDataIds(dimensions, 

979 datasets=[calexpType, mergeType], collections=run, 

980 where="skymap = 'Mars'").toSet() 

981 self.assertEqual(len(rows), 0) 

982 

983 def testSpatialJoin(self): 

984 """Test queries that involve spatial overlap joins. 

985 """ 

986 registry = self.makeRegistry() 

987 self.loadData(registry, "hsc-rc2-subset.yaml") 

988 

989 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

990 # the TopologicalFamily they belong to. We'll relate all elements in 

991 # each family to all of the elements in each other family. 

992 families = defaultdict(set) 

993 # Dictionary of {element.name: {dataId: region}}. 

994 regions = {} 

995 for element in registry.dimensions.getDatabaseElements(): 

996 if element.spatial is not None: 

997 families[element.spatial.name].add(element) 

998 regions[element.name] = { 

999 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1000 } 

1001 

1002 # If this check fails, it's not necessarily a problem - it may just be 

1003 # a reasonable change to the default dimension definitions - but the 

1004 # test below depends on there being more than one family to do anything 

1005 # useful. 

1006 self.assertEqual(len(families), 2) 

1007 

1008 # Overlap DatabaseDimensionElements with each other. 

1009 for family1, family2 in itertools.combinations(families, 2): 

1010 for element1, element2 in itertools.product(families[family1], families[family2]): 

1011 graph = DimensionGraph.union(element1.graph, element2.graph) 

1012 # Construct expected set of overlapping data IDs via a 

1013 # brute-force comparison of the regions we've already fetched. 

1014 expected = { 

1015 DataCoordinate.standardize( 

1016 {**dataId1.byName(), **dataId2.byName()}, 

1017 graph=graph 

1018 ) 

1019 for (dataId1, region1), (dataId2, region2) 

1020 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

1021 if not region1.isDisjointFrom(region2) 

1022 } 

1023 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1024 queried = set(registry.queryDataIds(graph)) 

1025 self.assertEqual(expected, queried) 

1026 

1027 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1028 commonSkyPix = registry.dimensions.commonSkyPix 

1029 for elementName, regions in regions.items(): 

1030 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1031 expected = set() 

1032 for dataId, region in regions.items(): 

1033 for begin, end in commonSkyPix.pixelization.envelope(region): 

1034 expected.update( 

1035 DataCoordinate.standardize( 

1036 {commonSkyPix.name: index, **dataId.byName()}, 

1037 graph=graph 

1038 ) 

1039 for index in range(begin, end) 

1040 ) 

1041 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1042 queried = set(registry.queryDataIds(graph)) 

1043 self.assertEqual(expected, queried) 

1044 

1045 def testAbstractQuery(self): 

1046 """Test that we can run a query that just lists the known 

1047 bands. This is tricky because band is 

1048 backed by a query against physical_filter. 

1049 """ 

1050 registry = self.makeRegistry() 

1051 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1052 registry.insertDimensionData( 

1053 "physical_filter", 

1054 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1055 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1056 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1057 ) 

1058 rows = registry.queryDataIds(["band"]).toSet() 

1059 self.assertCountEqual( 

1060 rows, 

1061 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1062 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1063 ) 

1064 

1065 def testAttributeManager(self): 

1066 """Test basic functionality of attribute manager. 

1067 """ 

1068 # number of attributes with schema versions in a fresh database, 

1069 # 6 managers with 3 records per manager, plus config for dimensions 

1070 VERSION_COUNT = 6 * 3 + 1 

1071 

1072 registry = self.makeRegistry() 

1073 attributes = registry._managers.attributes 

1074 

1075 # check what get() returns for non-existing key 

1076 self.assertIsNone(attributes.get("attr")) 

1077 self.assertEqual(attributes.get("attr", ""), "") 

1078 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1079 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1080 

1081 # cannot store empty key or value 

1082 with self.assertRaises(ValueError): 

1083 attributes.set("", "value") 

1084 with self.assertRaises(ValueError): 

1085 attributes.set("attr", "") 

1086 

1087 # set value of non-existing key 

1088 attributes.set("attr", "value") 

1089 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1090 self.assertEqual(attributes.get("attr"), "value") 

1091 

1092 # update value of existing key 

1093 with self.assertRaises(ButlerAttributeExistsError): 

1094 attributes.set("attr", "value2") 

1095 

1096 attributes.set("attr", "value2", force=True) 

1097 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1098 self.assertEqual(attributes.get("attr"), "value2") 

1099 

1100 # delete existing key 

1101 self.assertTrue(attributes.delete("attr")) 

1102 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1103 

1104 # delete non-existing key 

1105 self.assertFalse(attributes.delete("non-attr")) 

1106 

1107 # store bunch of keys and get the list back 

1108 data = [ 

1109 ("version.core", "1.2.3"), 

1110 ("version.dimensions", "3.2.1"), 

1111 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1112 ] 

1113 for key, value in data: 

1114 attributes.set(key, value) 

1115 items = dict(attributes.items()) 

1116 for key, value in data: 

1117 self.assertEqual(items[key], value) 

1118 

1119 def testQueryDatasetsDeduplication(self): 

1120 """Test that the findFirst option to queryDatasets selects datasets 

1121 from collections in the order given". 

1122 """ 

1123 registry = self.makeRegistry() 

1124 self.loadData(registry, "base.yaml") 

1125 self.loadData(registry, "datasets.yaml") 

1126 self.assertCountEqual( 

1127 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1128 [ 

1129 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1130 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1131 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1132 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1133 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1134 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1135 ] 

1136 ) 

1137 self.assertCountEqual( 

1138 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1139 findFirst=True)), 

1140 [ 

1141 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1142 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1143 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1144 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1145 ] 

1146 ) 

1147 self.assertCountEqual( 

1148 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1149 findFirst=True)), 

1150 [ 

1151 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1152 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1153 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1154 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1155 ] 

1156 ) 

1157 

1158 def testQueryResults(self): 

1159 """Test querying for data IDs and then manipulating the QueryResults 

1160 object returned to perform other queries. 

1161 """ 

1162 registry = self.makeRegistry() 

1163 self.loadData(registry, "base.yaml") 

1164 self.loadData(registry, "datasets.yaml") 

1165 bias = registry.getDatasetType("bias") 

1166 flat = registry.getDatasetType("flat") 

1167 # Obtain expected results from methods other than those we're testing 

1168 # here. That includes: 

1169 # - the dimensions of the data IDs we want to query: 

1170 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1171 # - the dimensions of some other data IDs we'll extract from that: 

1172 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1173 # - the data IDs we expect to obtain from the first queries: 

1174 expectedDataIds = DataCoordinateSet( 

1175 { 

1176 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1177 universe=registry.dimensions) 

1178 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1179 }, 

1180 graph=expectedGraph, 

1181 hasFull=False, 

1182 hasRecords=False, 

1183 ) 

1184 # - the flat datasets we expect to find from those data IDs, in just 

1185 # one collection (so deduplication is irrelevant): 

1186 expectedFlats = [ 

1187 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1188 collections="imported_r"), 

1189 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1190 collections="imported_r"), 

1191 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1192 collections="imported_r"), 

1193 ] 

1194 # - the data IDs we expect to extract from that: 

1195 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1196 # - the bias datasets we expect to find from those data IDs, after we 

1197 # subset-out the physical_filter dimension, both with duplicates: 

1198 expectedAllBiases = [ 

1199 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1200 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1201 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1202 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1203 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1204 ] 

1205 # - ...and without duplicates: 

1206 expectedDeduplicatedBiases = [ 

1207 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1208 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1209 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1210 ] 

1211 # Test against those expected results, using a "lazy" query for the 

1212 # data IDs (which re-executes that query each time we use it to do 

1213 # something new). 

1214 dataIds = registry.queryDataIds( 

1215 ["detector", "physical_filter"], 

1216 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1217 instrument="Cam1", 

1218 ) 

1219 self.assertEqual(dataIds.graph, expectedGraph) 

1220 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1221 self.assertCountEqual( 

1222 list( 

1223 dataIds.findDatasets( 

1224 flat, 

1225 collections=["imported_r"], 

1226 ) 

1227 ), 

1228 expectedFlats, 

1229 ) 

1230 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1231 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1232 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1233 self.assertCountEqual( 

1234 list( 

1235 subsetDataIds.findDatasets( 

1236 bias, 

1237 collections=["imported_r", "imported_g"], 

1238 findFirst=False 

1239 ) 

1240 ), 

1241 expectedAllBiases 

1242 ) 

1243 self.assertCountEqual( 

1244 list( 

1245 subsetDataIds.findDatasets( 

1246 bias, 

1247 collections=["imported_r", "imported_g"], 

1248 findFirst=True 

1249 ) 

1250 ), expectedDeduplicatedBiases 

1251 ) 

1252 # Materialize the bias dataset queries (only) by putting the results 

1253 # into temporary tables, then repeat those tests. 

1254 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1255 findFirst=False).materialize() as biases: 

1256 self.assertCountEqual(list(biases), expectedAllBiases) 

1257 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1258 findFirst=True).materialize() as biases: 

1259 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1260 # Materialize the data ID subset query, but not the dataset queries. 

1261 with subsetDataIds.materialize() as subsetDataIds: 

1262 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1263 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1264 self.assertCountEqual( 

1265 list( 

1266 subsetDataIds.findDatasets( 

1267 bias, 

1268 collections=["imported_r", "imported_g"], 

1269 findFirst=False 

1270 ) 

1271 ), 

1272 expectedAllBiases 

1273 ) 

1274 self.assertCountEqual( 

1275 list( 

1276 subsetDataIds.findDatasets( 

1277 bias, 

1278 collections=["imported_r", "imported_g"], 

1279 findFirst=True 

1280 ) 

1281 ), expectedDeduplicatedBiases 

1282 ) 

1283 # Materialize the dataset queries, too. 

1284 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1285 findFirst=False).materialize() as biases: 

1286 self.assertCountEqual(list(biases), expectedAllBiases) 

1287 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1288 findFirst=True).materialize() as biases: 

1289 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1290 # Materialize the original query, but none of the follow-up queries. 

1291 with dataIds.materialize() as dataIds: 

1292 self.assertEqual(dataIds.graph, expectedGraph) 

1293 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1294 self.assertCountEqual( 

1295 list( 

1296 dataIds.findDatasets( 

1297 flat, 

1298 collections=["imported_r"], 

1299 ) 

1300 ), 

1301 expectedFlats, 

1302 ) 

1303 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1304 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1305 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1306 self.assertCountEqual( 

1307 list( 

1308 subsetDataIds.findDatasets( 

1309 bias, 

1310 collections=["imported_r", "imported_g"], 

1311 findFirst=False 

1312 ) 

1313 ), 

1314 expectedAllBiases 

1315 ) 

1316 self.assertCountEqual( 

1317 list( 

1318 subsetDataIds.findDatasets( 

1319 bias, 

1320 collections=["imported_r", "imported_g"], 

1321 findFirst=True 

1322 ) 

1323 ), expectedDeduplicatedBiases 

1324 ) 

1325 # Materialize just the bias dataset queries. 

1326 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1327 findFirst=False).materialize() as biases: 

1328 self.assertCountEqual(list(biases), expectedAllBiases) 

1329 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1330 findFirst=True).materialize() as biases: 

1331 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1332 # Materialize the subset data ID query, but not the dataset 

1333 # queries. 

1334 with subsetDataIds.materialize() as subsetDataIds: 

1335 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1336 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1337 self.assertCountEqual( 

1338 list( 

1339 subsetDataIds.findDatasets( 

1340 bias, 

1341 collections=["imported_r", "imported_g"], 

1342 findFirst=False 

1343 ) 

1344 ), 

1345 expectedAllBiases 

1346 ) 

1347 self.assertCountEqual( 

1348 list( 

1349 subsetDataIds.findDatasets( 

1350 bias, 

1351 collections=["imported_r", "imported_g"], 

1352 findFirst=True 

1353 ) 

1354 ), expectedDeduplicatedBiases 

1355 ) 

1356 # Materialize the bias dataset queries, too, so now we're 

1357 # materializing every single step. 

1358 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1359 findFirst=False).materialize() as biases: 

1360 self.assertCountEqual(list(biases), expectedAllBiases) 

1361 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1362 findFirst=True).materialize() as biases: 

1363 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1364 

1365 def testEmptyDimensionsQueries(self): 

1366 """Test Query and QueryResults objects in the case where there are no 

1367 dimensions. 

1368 """ 

1369 # Set up test data: one dataset type, two runs, one dataset in each. 

1370 registry = self.makeRegistry() 

1371 self.loadData(registry, "base.yaml") 

1372 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1373 registry.registerDatasetType(schema) 

1374 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1375 run1 = "run1" 

1376 run2 = "run2" 

1377 registry.registerRun(run1) 

1378 registry.registerRun(run2) 

1379 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1380 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1381 # Query directly for both of the datasets, and each one, one at a time. 

1382 self.assertCountEqual( 

1383 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1384 [dataset1, dataset2] 

1385 ) 

1386 self.assertEqual( 

1387 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1388 [dataset1], 

1389 ) 

1390 self.assertEqual( 

1391 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1392 [dataset2], 

1393 ) 

1394 # Query for data IDs with no dimensions. 

1395 dataIds = registry.queryDataIds([]) 

1396 self.assertEqual( 

1397 dataIds.toSequence(), 

1398 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1399 ) 

1400 # Use queried data IDs to find the datasets. 

1401 self.assertCountEqual( 

1402 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1403 [dataset1, dataset2], 

1404 ) 

1405 self.assertEqual( 

1406 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1407 [dataset1], 

1408 ) 

1409 self.assertEqual( 

1410 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1411 [dataset2], 

1412 ) 

1413 # Now materialize the data ID query results and repeat those tests. 

1414 with dataIds.materialize() as dataIds: 

1415 self.assertEqual( 

1416 dataIds.toSequence(), 

1417 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1418 ) 

1419 self.assertCountEqual( 

1420 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1421 [dataset1, dataset2], 

1422 ) 

1423 self.assertEqual( 

1424 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1425 [dataset1], 

1426 ) 

1427 self.assertEqual( 

1428 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1429 [dataset2], 

1430 ) 

1431 # Query for non-empty data IDs, then subset that to get the empty one. 

1432 # Repeat the above tests starting from that. 

1433 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1434 self.assertEqual( 

1435 dataIds.toSequence(), 

1436 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1437 ) 

1438 self.assertCountEqual( 

1439 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1440 [dataset1, dataset2], 

1441 ) 

1442 self.assertEqual( 

1443 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1444 [dataset1], 

1445 ) 

1446 self.assertEqual( 

1447 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1448 [dataset2], 

1449 ) 

1450 with dataIds.materialize() as dataIds: 

1451 self.assertEqual( 

1452 dataIds.toSequence(), 

1453 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1454 ) 

1455 self.assertCountEqual( 

1456 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1457 [dataset1, dataset2], 

1458 ) 

1459 self.assertEqual( 

1460 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1461 [dataset1], 

1462 ) 

1463 self.assertEqual( 

1464 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1465 [dataset2], 

1466 ) 

1467 # Query for non-empty data IDs, then materialize, then subset to get 

1468 # the empty one. Repeat again. 

1469 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1470 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1471 self.assertEqual( 

1472 dataIds.toSequence(), 

1473 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1474 ) 

1475 self.assertCountEqual( 

1476 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1477 [dataset1, dataset2], 

1478 ) 

1479 self.assertEqual( 

1480 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1481 [dataset1], 

1482 ) 

1483 self.assertEqual( 

1484 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1485 [dataset2], 

1486 ) 

1487 with dataIds.materialize() as dataIds: 

1488 self.assertEqual( 

1489 dataIds.toSequence(), 

1490 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1491 ) 

1492 self.assertCountEqual( 

1493 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1494 [dataset1, dataset2], 

1495 ) 

1496 self.assertEqual( 

1497 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1498 [dataset1], 

1499 ) 

1500 self.assertEqual( 

1501 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1502 [dataset2], 

1503 ) 

1504 

1505 def testDimensionDataModifications(self): 

1506 """Test that modifying dimension records via: 

1507 syncDimensionData(..., update=True) and 

1508 insertDimensionData(..., replace=True) works as expected, even in the 

1509 presence of datasets using those dimensions and spatial overlap 

1510 relationships. 

1511 """ 

1512 

1513 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1514 """Unpack a sphgeom.RangeSet into the integers it contains. 

1515 """ 

1516 for begin, end in ranges: 

1517 yield from range(begin, end) 

1518 

1519 def range_set_hull( 

1520 ranges: lsst.sphgeom.RangeSet, 

1521 pixelization: lsst.sphgeom.HtmPixelization, 

1522 ) -> lsst.sphgeom.ConvexPolygon: 

1523 """Create a ConvexPolygon hull of the region defined by a set of 

1524 HTM pixelization index ranges. 

1525 """ 

1526 points = [] 

1527 for index in unpack_range_set(ranges): 

1528 points.extend(pixelization.triangle(index).getVertices()) 

1529 return lsst.sphgeom.ConvexPolygon(points) 

1530 

1531 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1532 # and four child regions (the trixels within the parent at the next 

1533 # level. We'll use the parent as a tract/visit region and the children 

1534 # as its patch/visit_detector regions. 

1535 registry = self.makeRegistry() 

1536 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1537 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1538 index = 12288 

1539 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1540 assert htm6.universe().contains(child_ranges_small) 

1541 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1542 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1543 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1544 ) 

1545 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1546 # Make a larger version of each child region, defined to be the set of 

1547 # htm6 trixels that overlap the original's bounding circle. Make a new 

1548 # parent that's the convex hull of the new children. 

1549 child_regions_large = [ 

1550 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) 

1551 for c in child_regions_small 

1552 ] 

1553 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1554 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1555 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1556 ) 

1557 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1558 assert parent_region_large.contains(parent_region_small) 

1559 assert not parent_region_small.contains(parent_region_large) 

1560 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1561 # Find some commonSkyPix indices that overlap the large regions but not 

1562 # overlap the small regions. We use commonSkyPix here to make sure the 

1563 # real tests later involve what's in the database, not just post-query 

1564 # region filtering. 

1565 child_difference_indices = [] 

1566 for large, small in zip(child_regions_large, child_regions_small): 

1567 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1568 assert difference, "if this is empty, we can't test anything useful with these regions" 

1569 assert all( 

1570 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1571 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1572 for d in difference 

1573 ) 

1574 child_difference_indices.append(difference) 

1575 parent_difference_indices = list( 

1576 unpack_range_set( 

1577 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1578 ) 

1579 ) 

1580 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1581 assert all( 

1582 ( 

1583 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1584 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1585 ) 

1586 for d in parent_difference_indices 

1587 ) 

1588 # Now that we've finally got those regions, we'll insert the large ones 

1589 # as tract/patch dimension records. 

1590 skymap_name = "testing_v1" 

1591 registry.insertDimensionData( 

1592 "skymap", { 

1593 "name": skymap_name, 

1594 "hash": bytes([42]), 

1595 "tract_max": 1, 

1596 "patch_nx_max": 2, 

1597 "patch_ny_max": 2, 

1598 } 

1599 ) 

1600 registry.insertDimensionData( 

1601 "tract", 

1602 {"skymap": skymap_name, "id": 0, "region": parent_region_large} 

1603 ) 

1604 registry.insertDimensionData( 

1605 "patch", 

1606 *[{ 

1607 "skymap": skymap_name, 

1608 "tract": 0, 

1609 "id": n, 

1610 "cell_x": n % 2, 

1611 "cell_y": n // 2, 

1612 "region": c 

1613 } for n, c in enumerate(child_regions_large)] 

1614 ) 

1615 # Add at dataset that uses these dimensions to make sure that modifying 

1616 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1617 # implement insert with replace=True as delete-then-insert). 

1618 dataset_type = DatasetType( 

1619 "coadd", 

1620 dimensions=["tract", "patch"], 

1621 universe=registry.dimensions, 

1622 storageClass="Exposure", 

1623 ) 

1624 registry.registerDatasetType(dataset_type) 

1625 registry.registerCollection("the_run", CollectionType.RUN) 

1626 registry.insertDatasets( 

1627 dataset_type, 

1628 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1629 run="the_run", 

1630 ) 

1631 # Query for tracts and patches that overlap some "difference" htm9 

1632 # pixels; there should be overlaps, because the database has 

1633 # the "large" suite of regions. 

1634 self.assertEqual( 

1635 {0}, 

1636 { 

1637 data_id["tract"] for data_id in registry.queryDataIds( 

1638 ["tract"], 

1639 skymap=skymap_name, 

1640 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1641 ) 

1642 } 

1643 ) 

1644 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1645 self.assertIn( 

1646 patch_id, 

1647 { 

1648 data_id["patch"] for data_id in registry.queryDataIds( 

1649 ["patch"], 

1650 skymap=skymap_name, 

1651 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1652 ) 

1653 } 

1654 ) 

1655 # Use sync to update the tract region and insert to update the patch 

1656 # regions, to the "small" suite. 

1657 updated = registry.syncDimensionData( 

1658 "tract", 

1659 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1660 update=True, 

1661 ) 

1662 self.assertEqual(updated, {"region": parent_region_large}) 

1663 registry.insertDimensionData( 

1664 "patch", 

1665 *[{ 

1666 "skymap": skymap_name, 

1667 "tract": 0, 

1668 "id": n, 

1669 "cell_x": n % 2, 

1670 "cell_y": n // 2, 

1671 "region": c 

1672 } for n, c in enumerate(child_regions_small)], 

1673 replace=True 

1674 ) 

1675 # Query again; there now should be no such overlaps, because the 

1676 # database has the "small" suite of regions. 

1677 self.assertFalse( 

1678 set( 

1679 registry.queryDataIds( 

1680 ["tract"], 

1681 skymap=skymap_name, 

1682 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1683 ) 

1684 ) 

1685 ) 

1686 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1687 self.assertNotIn( 

1688 patch_id, 

1689 { 

1690 data_id["patch"] for data_id in registry.queryDataIds( 

1691 ["patch"], 

1692 skymap=skymap_name, 

1693 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1694 ) 

1695 } 

1696 ) 

1697 # Update back to the large regions and query one more time. 

1698 updated = registry.syncDimensionData( 

1699 "tract", 

1700 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1701 update=True, 

1702 ) 

1703 self.assertEqual(updated, {"region": parent_region_small}) 

1704 registry.insertDimensionData( 

1705 "patch", 

1706 *[{ 

1707 "skymap": skymap_name, 

1708 "tract": 0, 

1709 "id": n, 

1710 "cell_x": n % 2, 

1711 "cell_y": n // 2, 

1712 "region": c 

1713 } for n, c in enumerate(child_regions_large)], 

1714 replace=True 

1715 ) 

1716 self.assertEqual( 

1717 {0}, 

1718 { 

1719 data_id["tract"] for data_id in registry.queryDataIds( 

1720 ["tract"], 

1721 skymap=skymap_name, 

1722 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1723 ) 

1724 } 

1725 ) 

1726 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1727 self.assertIn( 

1728 patch_id, 

1729 { 

1730 data_id["patch"] for data_id in registry.queryDataIds( 

1731 ["patch"], 

1732 skymap=skymap_name, 

1733 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1734 ) 

1735 } 

1736 ) 

1737 

1738 def testCalibrationCollections(self): 

1739 """Test operations on `~CollectionType.CALIBRATION` collections, 

1740 including `Registry.certify`, `Registry.decertify`, and 

1741 `Registry.findDataset`. 

1742 """ 

1743 # Setup - make a Registry, fill it with some datasets in 

1744 # non-calibration collections. 

1745 registry = self.makeRegistry() 

1746 self.loadData(registry, "base.yaml") 

1747 self.loadData(registry, "datasets.yaml") 

1748 # Set up some timestamps. 

1749 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1750 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1751 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1752 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1753 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1754 allTimespans = [ 

1755 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1756 ] 

1757 # Get references to some datasets. 

1758 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1759 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1760 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1761 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1762 # Register the main calibration collection we'll be working with. 

1763 collection = "Cam1/calibs/default" 

1764 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1765 # Cannot associate into a calibration collection (no timespan). 

1766 with self.assertRaises(TypeError): 

1767 registry.associate(collection, [bias2a]) 

1768 # Certify 2a dataset with [t2, t4) validity. 

1769 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1770 # We should not be able to certify 2b with anything overlapping that 

1771 # window. 

1772 with self.assertRaises(ConflictingDefinitionError): 

1773 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1774 with self.assertRaises(ConflictingDefinitionError): 

1775 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1776 with self.assertRaises(ConflictingDefinitionError): 

1777 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1778 with self.assertRaises(ConflictingDefinitionError): 

1779 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1780 with self.assertRaises(ConflictingDefinitionError): 

1781 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1782 with self.assertRaises(ConflictingDefinitionError): 

1783 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1784 with self.assertRaises(ConflictingDefinitionError): 

1785 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1786 with self.assertRaises(ConflictingDefinitionError): 

1787 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1788 # We should be able to certify 3a with a range overlapping that window, 

1789 # because it's for a different detector. 

1790 # We'll certify 3a over [t1, t3). 

1791 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1792 # Now we'll certify 2b and 3b together over [t4, ∞). 

1793 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1794 

1795 # Fetch all associations and check that they are what we expect. 

1796 self.assertCountEqual( 

1797 list( 

1798 registry.queryDatasetAssociations( 

1799 "bias", 

1800 collections=[collection, "imported_g", "imported_r"], 

1801 ) 

1802 ), 

1803 [ 

1804 DatasetAssociation( 

1805 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1806 collection="imported_g", 

1807 timespan=None, 

1808 ), 

1809 DatasetAssociation( 

1810 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1811 collection="imported_r", 

1812 timespan=None, 

1813 ), 

1814 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1815 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1816 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1817 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1818 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1819 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1820 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1821 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1822 ] 

1823 ) 

1824 

1825 class Ambiguous: 

1826 """Tag class to denote lookups that are expected to be ambiguous. 

1827 """ 

1828 pass 

1829 

1830 def assertLookup(detector: int, timespan: Timespan, 

1831 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1832 """Local function that asserts that a bias lookup returns the given 

1833 expected result. 

1834 """ 

1835 if expected is Ambiguous: 

1836 with self.assertRaises(RuntimeError): 

1837 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1838 detector=detector, timespan=timespan) 

1839 else: 

1840 self.assertEqual( 

1841 expected, 

1842 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1843 detector=detector, timespan=timespan) 

1844 ) 

1845 

1846 # Systematically test lookups against expected results. 

1847 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1848 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1849 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1850 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1851 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1852 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1853 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1854 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1855 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1856 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1857 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1858 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1859 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1860 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1861 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1862 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1863 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1864 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1865 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1866 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1867 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1868 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1869 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1870 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1871 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1872 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1873 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1874 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1875 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1876 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1877 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1878 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1879 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1880 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1881 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1882 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1883 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1884 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1885 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1886 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1887 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1888 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1889 

1890 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1891 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1892 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1893 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1894 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1895 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1896 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1897 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1898 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1899 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1900 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1901 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1902 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1903 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1904 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1905 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1906 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1907 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1908 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1909 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1910 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1911 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1912 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1913 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1914 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1915 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1916 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1917 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1918 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1919 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1920 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1921 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1922 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1923 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1924 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1925 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1926 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1927 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1928 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1929 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1930 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1931 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1932 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1933 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1934 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1935 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1936 

1937 # Decertify everything, this time with explicit data IDs, then check 

1938 # that no lookups succeed. 

1939 registry.decertify( 

1940 collection, "bias", Timespan(None, None), 

1941 dataIds=[ 

1942 dict(instrument="Cam1", detector=2), 

1943 dict(instrument="Cam1", detector=3), 

1944 ] 

1945 ) 

1946 for detector in (2, 3): 

1947 for timespan in allTimespans: 

1948 assertLookup(detector=detector, timespan=timespan, expected=None) 

1949 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1950 # those. 

1951 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1952 for timespan in allTimespans: 

1953 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1954 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1955 # Decertify just bias2 over [t2, t4). 

1956 # This should split a single certification row into two (and leave the 

1957 # other existing row, for bias3a, alone). 

1958 registry.decertify(collection, "bias", Timespan(t2, t4), 

1959 dataIds=[dict(instrument="Cam1", detector=2)]) 

1960 for timespan in allTimespans: 

1961 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1962 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1963 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1964 if overlapsBefore and overlapsAfter: 

1965 expected = Ambiguous 

1966 elif overlapsBefore or overlapsAfter: 

1967 expected = bias2a 

1968 else: 

1969 expected = None 

1970 assertLookup(detector=2, timespan=timespan, expected=expected) 

1971 

1972 def testIngestTimeQuery(self): 

1973 

1974 registry = self.makeRegistry() 

1975 self.loadData(registry, "base.yaml") 

1976 dt0 = datetime.utcnow() 

1977 self.loadData(registry, "datasets.yaml") 

1978 dt1 = datetime.utcnow() 

1979 

1980 datasets = list(registry.queryDatasets(..., collections=...)) 

1981 len0 = len(datasets) 

1982 self.assertGreater(len0, 0) 

1983 

1984 where = "ingest_date > T'2000-01-01'" 

1985 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1986 len1 = len(datasets) 

1987 self.assertEqual(len0, len1) 

1988 

1989 # no one will ever use this piece of software in 30 years 

1990 where = "ingest_date > T'2050-01-01'" 

1991 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

1992 len2 = len(datasets) 

1993 self.assertEqual(len2, 0) 

1994 

1995 # Check more exact timing to make sure there is no 37 seconds offset 

1996 # (after fixing DM-30124). SQLite time precision is 1 second, make 

1997 # sure that we don't test with higher precision. 

1998 tests = [ 

1999 # format: (timestamp, operator, expected_len) 

2000 (dt0 - timedelta(seconds=1), ">", len0), 

2001 (dt0 - timedelta(seconds=1), "<", 0), 

2002 (dt1 + timedelta(seconds=1), "<", len0), 

2003 (dt1 + timedelta(seconds=1), ">", 0), 

2004 ] 

2005 for dt, op, expect_len in tests: 

2006 dt_str = dt.isoformat(sep=" ") 

2007 

2008 where = f"ingest_date {op} T'{dt_str}'" 

2009 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2010 self.assertEqual(len(datasets), expect_len) 

2011 

2012 # same with bind using datetime or astropy Time 

2013 where = f"ingest_date {op} ingest_time" 

2014 datasets = list(registry.queryDatasets(..., collections=..., where=where, 

2015 bind={"ingest_time": dt})) 

2016 self.assertEqual(len(datasets), expect_len) 

2017 

2018 dt_astropy = astropy.time.Time(dt, format="datetime") 

2019 datasets = list(registry.queryDatasets(..., collections=..., where=where, 

2020 bind={"ingest_time": dt_astropy})) 

2021 self.assertEqual(len(datasets), expect_len) 

2022 

2023 def testTimespanQueries(self): 

2024 """Test query expressions involving timespans. 

2025 """ 

2026 registry = self.makeRegistry() 

2027 self.loadData(registry, "hsc-rc2-subset.yaml") 

2028 # All exposures in the database; mapping from ID to timespan. 

2029 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2030 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2031 # exposure IDs are monotonically increasing). 

2032 ids = sorted(visits.keys()) 

2033 self.assertGreater(len(ids), 20) 

2034 # Pick some quasi-random indexes into `ids` to play with. 

2035 i1 = int(len(ids)*0.1) 

2036 i2 = int(len(ids)*0.3) 

2037 i3 = int(len(ids)*0.6) 

2038 i4 = int(len(ids)*0.8) 

2039 # Extract some times from those: just before the beginning of i1 (which 

2040 # should be after the end of the exposure before), exactly the 

2041 # beginning of i2, just after the beginning of i3 (and before its end), 

2042 # and the exact end of i4. 

2043 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2044 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2045 t2 = visits[ids[i2]].begin 

2046 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2047 self.assertLess(t3, visits[ids[i3]].end) 

2048 t4 = visits[ids[i4]].end 

2049 # Make sure those are actually in order. 

2050 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2051 

2052 bind = { 

2053 "t1": t1, 

2054 "t2": t2, 

2055 "t3": t3, 

2056 "t4": t4, 

2057 "ts23": Timespan(t2, t3), 

2058 } 

2059 

2060 def query(where): 

2061 """Helper function that queries for visit data IDs and returns 

2062 results as a sorted, deduplicated list of visit IDs. 

2063 """ 

2064 return sorted( 

2065 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

2066 instrument="HSC", 

2067 bind=bind, 

2068 where=where)} 

2069 ) 

2070 

2071 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2072 # where they appear in the expression, and how we get the timespan into 

2073 # the expression. 

2074 

2075 # t1 is before the start of i1, so this should not include i1. 

2076 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2077 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2078 # should not include i2. 

2079 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2080 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2081 # t3 is in the middle of i3, so this should include i3. 

2082 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2083 # This one should not include t3 by the same reasoning. 

2084 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

2085 # t4 is exactly at the end of i4, so this should include i4. 

2086 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2087 # i4's upper bound of t4 is exclusive so this should not include t4. 

2088 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

2089 

2090 # Now some timespan vs. time scalar queries. 

2091 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2092 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2093 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

2094 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

2095 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

2096 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2097 

2098 # Empty timespans should not overlap anything. 

2099 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2100 

2101 def testCollectionSummaries(self): 

2102 """Test recording and retrieval of collection summaries. 

2103 """ 

2104 self.maxDiff = None 

2105 registry = self.makeRegistry() 

2106 # Importing datasets from yaml should go through the code path where 

2107 # we update collection summaries as we insert datasets. 

2108 self.loadData(registry, "base.yaml") 

2109 self.loadData(registry, "datasets.yaml") 

2110 flat = registry.getDatasetType("flat") 

2111 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2112 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2113 expected1.datasetTypes.add(flat) 

2114 expected1.dimensions.update_extract( 

2115 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2116 ) 

2117 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2118 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2119 # Create a chained collection with both of the imported runs; the 

2120 # summary should be the same, because it's a union with itself. 

2121 chain = "chain" 

2122 registry.registerCollection(chain, CollectionType.CHAINED) 

2123 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2124 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2125 # Associate flats only into a tagged collection and a calibration 

2126 # collection to check summaries of those. 

2127 tag = "tag" 

2128 registry.registerCollection(tag, CollectionType.TAGGED) 

2129 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2130 calibs = "calibs" 

2131 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2132 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"), 

2133 timespan=Timespan(None, None)) 

2134 expected2 = expected1.copy() 

2135 expected2.datasetTypes.discard("bias") 

2136 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2137 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2138 # Explicitly calling Registry.refresh() should load those same 

2139 # summaries, via a totally different code path. 

2140 registry.refresh() 

2141 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2142 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2143 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2144 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2145 

2146 def testUnrelatedDimensionQueries(self): 

2147 """Test that WHERE expressions in queries can reference dimensions that 

2148 are not in the result set. 

2149 """ 

2150 registry = self.makeRegistry() 

2151 # There is no data to back this query, but it should still return 

2152 # zero records instead of raising. 

2153 self.assertFalse( 

2154 set(registry.queryDataIds(["visit", "detector"], 

2155 where="instrument='Cam1' AND skymap='not_here' AND tract=0")), 

2156 ) 

2157 

2158 def testBindInQueryDatasets(self): 

2159 """Test that the bind parameter is correctly forwarded in 

2160 queryDatasets recursion. 

2161 """ 

2162 registry = self.makeRegistry() 

2163 # Importing datasets from yaml should go through the code path where 

2164 # we update collection summaries as we insert datasets. 

2165 self.loadData(registry, "base.yaml") 

2166 self.loadData(registry, "datasets.yaml") 

2167 self.assertEqual( 

2168 set(registry.queryDatasets("flat", band="r", collections=...)), 

2169 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2170 )