Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["RegistryTests"] 

24 

25from abc import ABC, abstractmethod 

26from collections import defaultdict 

27from datetime import datetime, timedelta 

28import itertools 

29import logging 

30import os 

31import re 

32from typing import Iterator 

33import unittest 

34 

35import astropy.time 

36import sqlalchemy 

37from typing import Optional, Type, Union, TYPE_CHECKING 

38 

39try: 

40 import numpy as np 

41except ImportError: 

42 np = None 

43 

44import lsst.sphgeom 

45from ...core import ( 

46 DataCoordinate, 

47 DataCoordinateSequence, 

48 DataCoordinateSet, 

49 DatasetAssociation, 

50 DatasetRef, 

51 DatasetType, 

52 DimensionGraph, 

53 NamedValueSet, 

54 StorageClass, 

55 ddl, 

56 Timespan, 

57) 

58from ..summaries import CollectionSummary 

59from .._collectionType import CollectionType 

60from .._config import RegistryConfig 

61 

62from .._exceptions import ( 

63 ConflictingDefinitionError, 

64 InconsistentDataIdError, 

65 MissingCollectionError, 

66 OrphanedRecordError, 

67) 

68from ..interfaces import ButlerAttributeExistsError 

69 

70if TYPE_CHECKING: 70 ↛ 71line 70 didn't jump to line 71, because the condition on line 70 was never true

71 from .._registry import Registry 

72 

73 

74class RegistryTests(ABC): 

75 """Generic tests for the `Registry` class that can be subclassed to 

76 generate tests for different configurations. 

77 """ 

78 

79 collectionsManager: Optional[str] = None 

80 """Name of the collections manager class, if subclass provides value for 

81 this member then it overrides name specified in default configuration 

82 (`str`). 

83 """ 

84 

85 datasetsManager: Optional[str] = None 

86 """Name of the datasets manager class, if subclass provides value for 

87 this member then it overrides name specified in default configuration 

88 (`str`). 

89 """ 

90 

91 @classmethod 

92 @abstractmethod 

93 def getDataDir(cls) -> str: 

94 """Return the root directory containing test data YAML files. 

95 """ 

96 raise NotImplementedError() 

97 

98 def makeRegistryConfig(self) -> RegistryConfig: 

99 """Create RegistryConfig used to create a registry. 

100 

101 This method should be called by a subclass from `makeRegistry`. 

102 Returned instance will be pre-configured based on the values of class 

103 members, and default-configured for all other parametrs. Subclasses 

104 that need default configuration should just instantiate 

105 `RegistryConfig` directly. 

106 """ 

107 config = RegistryConfig() 

108 if self.collectionsManager: 

109 config["managers", "collections"] = self.collectionsManager 

110 if self.datasetsManager: 

111 config["managers", "datasets"] = self.datasetsManager 

112 return config 

113 

114 @abstractmethod 

115 def makeRegistry(self) -> Registry: 

116 """Return the Registry instance to be tested. 

117 """ 

118 raise NotImplementedError() 

119 

120 def loadData(self, registry: Registry, filename: str): 

121 """Load registry test data from ``getDataDir/<filename>``, 

122 which should be a YAML import/export file. 

123 """ 

124 from ...transfers import YamlRepoImportBackend 

125 with open(os.path.join(self.getDataDir(), filename), 'r') as stream: 

126 backend = YamlRepoImportBackend(stream, registry) 

127 backend.register() 

128 backend.load(datastore=None) 

129 

130 def testOpaque(self): 

131 """Tests for `Registry.registerOpaqueTable`, 

132 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and 

133 `Registry.deleteOpaqueData`. 

134 """ 

135 registry = self.makeRegistry() 

136 table = "opaque_table_for_testing" 

137 registry.registerOpaqueTable( 

138 table, 

139 spec=ddl.TableSpec( 

140 fields=[ 

141 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True), 

142 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False), 

143 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True), 

144 ], 

145 ) 

146 ) 

147 rows = [ 

148 {"id": 1, "name": "one", "count": None}, 

149 {"id": 2, "name": "two", "count": 5}, 

150 {"id": 3, "name": "three", "count": 6}, 

151 ] 

152 registry.insertOpaqueData(table, *rows) 

153 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table))) 

154 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1))) 

155 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two"))) 

156 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two")))) 

157 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3)))) 

158 # Test very long IN clause which exceeds sqlite limit on number of 

159 # parameters. SQLite says the limit is 32k but it looks like it is 

160 # much higher. 

161 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000))))) 

162 # Two IN clauses, each longer than 1k batch size, first with 

163 # duplicates, second has matching elements in different batches (after 

164 # sorting). 

165 self.assertEqual(rows[0:2], list(registry.fetchOpaqueData( 

166 table, 

167 id=list(range(1000)) + list(range(100, 0, -1)), 

168 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"]))) 

169 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two"))) 

170 registry.deleteOpaqueData(table, id=3) 

171 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table))) 

172 registry.deleteOpaqueData(table) 

173 self.assertEqual([], list(registry.fetchOpaqueData(table))) 

174 

175 def testDatasetType(self): 

176 """Tests for `Registry.registerDatasetType` and 

177 `Registry.getDatasetType`. 

178 """ 

179 registry = self.makeRegistry() 

180 # Check valid insert 

181 datasetTypeName = "test" 

182 storageClass = StorageClass("testDatasetType") 

183 registry.storageClasses.registerStorageClass(storageClass) 

184 dimensions = registry.dimensions.extract(("instrument", "visit")) 

185 differentDimensions = registry.dimensions.extract(("instrument", "patch")) 

186 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

187 # Inserting for the first time should return True 

188 self.assertTrue(registry.registerDatasetType(inDatasetType)) 

189 outDatasetType1 = registry.getDatasetType(datasetTypeName) 

190 self.assertEqual(outDatasetType1, inDatasetType) 

191 

192 # Re-inserting should work 

193 self.assertFalse(registry.registerDatasetType(inDatasetType)) 

194 # Except when they are not identical 

195 with self.assertRaises(ConflictingDefinitionError): 

196 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass) 

197 registry.registerDatasetType(nonIdenticalDatasetType) 

198 

199 # Template can be None 

200 datasetTypeName = "testNoneTemplate" 

201 storageClass = StorageClass("testDatasetType2") 

202 registry.storageClasses.registerStorageClass(storageClass) 

203 dimensions = registry.dimensions.extract(("instrument", "visit")) 

204 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

205 registry.registerDatasetType(inDatasetType) 

206 outDatasetType2 = registry.getDatasetType(datasetTypeName) 

207 self.assertEqual(outDatasetType2, inDatasetType) 

208 

209 allTypes = set(registry.queryDatasetTypes()) 

210 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2}) 

211 

212 def testDimensions(self): 

213 """Tests for `Registry.insertDimensionData`, 

214 `Registry.syncDimensionData`, and `Registry.expandDataId`. 

215 """ 

216 registry = self.makeRegistry() 

217 dimensionName = "instrument" 

218 dimension = registry.dimensions[dimensionName] 

219 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2, 

220 "class_name": "lsst.obs.base.Instrument"} 

221 registry.insertDimensionData(dimensionName, dimensionValue) 

222 # Inserting the same value twice should fail 

223 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

224 registry.insertDimensionData(dimensionName, dimensionValue) 

225 # expandDataId should retrieve the record we just inserted 

226 self.assertEqual( 

227 registry.expandDataId( 

228 instrument="DummyCam", 

229 graph=dimension.graph 

230 ).records[dimensionName].toDict(), 

231 dimensionValue 

232 ) 

233 # expandDataId should raise if there is no record with the given ID. 

234 with self.assertRaises(LookupError): 

235 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph) 

236 # band doesn't have a table; insert should fail. 

237 with self.assertRaises(TypeError): 

238 registry.insertDimensionData("band", {"band": "i"}) 

239 dimensionName2 = "physical_filter" 

240 dimension2 = registry.dimensions[dimensionName2] 

241 dimensionValue2 = {"name": "DummyCam_i", "band": "i"} 

242 # Missing required dependency ("instrument") should fail 

243 with self.assertRaises(KeyError): 

244 registry.insertDimensionData(dimensionName2, dimensionValue2) 

245 # Adding required dependency should fix the failure 

246 dimensionValue2["instrument"] = "DummyCam" 

247 registry.insertDimensionData(dimensionName2, dimensionValue2) 

248 # expandDataId should retrieve the record we just inserted. 

249 self.assertEqual( 

250 registry.expandDataId( 

251 instrument="DummyCam", physical_filter="DummyCam_i", 

252 graph=dimension2.graph 

253 ).records[dimensionName2].toDict(), 

254 dimensionValue2 

255 ) 

256 # Use syncDimensionData to insert a new record successfully. 

257 dimensionName3 = "detector" 

258 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one", 

259 "name_in_raft": "zero", "purpose": "SCIENCE"} 

260 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

261 # Sync that again. Note that one field ("raft") is NULL, and that 

262 # should be okay. 

263 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3)) 

264 # Now try that sync with the same primary key but a different value. 

265 # This should fail. 

266 with self.assertRaises(ConflictingDefinitionError): 

267 registry.syncDimensionData( 

268 dimensionName3, 

269 {"instrument": "DummyCam", "id": 1, "full_name": "one", 

270 "name_in_raft": "four", "purpose": "SCIENCE"} 

271 ) 

272 

273 @unittest.skipIf(np is None, "numpy not available.") 

274 def testNumpyDataId(self): 

275 """Test that we can use a numpy int in a dataId.""" 

276 registry = self.makeRegistry() 

277 dimensionEntries = [ 

278 ("instrument", {"instrument": "DummyCam"}), 

279 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

280 # Using an np.int64 here fails unless Records.fromDict is also 

281 # patched to look for numbers.Integral 

282 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

283 ] 

284 for args in dimensionEntries: 

285 registry.insertDimensionData(*args) 

286 

287 # Try a normal integer and something that looks like an int but 

288 # is not. 

289 for visit_id in (42, np.int64(42)): 

290 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__): 

291 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id}) 

292 self.assertEqual(expanded["visit"], int(visit_id)) 

293 self.assertIsInstance(expanded["visit"], int) 

294 

295 def testDataIdRelationships(self): 

296 """Test that `Registry.expandDataId` raises an exception when the given 

297 keys are inconsistent. 

298 """ 

299 registry = self.makeRegistry() 

300 self.loadData(registry, "base.yaml") 

301 # Insert a few more dimension records for the next test. 

302 registry.insertDimensionData( 

303 "exposure", 

304 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"}, 

305 ) 

306 registry.insertDimensionData( 

307 "exposure", 

308 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"}, 

309 ) 

310 registry.insertDimensionData( 

311 "visit_system", 

312 {"instrument": "Cam1", "id": 0, "name": "one-to-one"}, 

313 ) 

314 registry.insertDimensionData( 

315 "visit", 

316 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0}, 

317 ) 

318 registry.insertDimensionData( 

319 "visit_definition", 

320 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0}, 

321 ) 

322 with self.assertRaises(InconsistentDataIdError): 

323 registry.expandDataId( 

324 {"instrument": "Cam1", "visit": 1, "exposure": 2}, 

325 ) 

326 

327 def testDataset(self): 

328 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`, 

329 and `Registry.removeDatasets`. 

330 """ 

331 registry = self.makeRegistry() 

332 self.loadData(registry, "base.yaml") 

333 run = "test" 

334 registry.registerRun(run) 

335 datasetType = registry.getDatasetType("bias") 

336 dataId = {"instrument": "Cam1", "detector": 2} 

337 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

338 outRef = registry.getDataset(ref.id) 

339 self.assertIsNotNone(ref.id) 

340 self.assertEqual(ref, outRef) 

341 with self.assertRaises(ConflictingDefinitionError): 

342 registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

343 registry.removeDatasets([ref]) 

344 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run])) 

345 

346 def testFindDataset(self): 

347 """Tests for `Registry.findDataset`. 

348 """ 

349 registry = self.makeRegistry() 

350 self.loadData(registry, "base.yaml") 

351 run = "test" 

352 datasetType = registry.getDatasetType("bias") 

353 dataId = {"instrument": "Cam1", "detector": 4} 

354 registry.registerRun(run) 

355 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run) 

356 outputRef = registry.findDataset(datasetType, dataId, collections=[run]) 

357 self.assertEqual(outputRef, inputRef) 

358 # Check that retrieval with invalid dataId raises 

359 with self.assertRaises(LookupError): 

360 dataId = {"instrument": "Cam1"} # no detector 

361 registry.findDataset(datasetType, dataId, collections=run) 

362 # Check that different dataIds match to different datasets 

363 dataId1 = {"instrument": "Cam1", "detector": 1} 

364 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run) 

365 dataId2 = {"instrument": "Cam1", "detector": 2} 

366 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run) 

367 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1) 

368 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2) 

369 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2) 

370 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1) 

371 # Check that requesting a non-existing dataId returns None 

372 nonExistingDataId = {"instrument": "Cam1", "detector": 3} 

373 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run)) 

374 

375 def testRemoveDatasetTypeSuccess(self): 

376 """Test that Registry.removeDatasetType works when there are no 

377 datasets of that type present. 

378 """ 

379 registry = self.makeRegistry() 

380 self.loadData(registry, "base.yaml") 

381 registry.removeDatasetType("flat") 

382 with self.assertRaises(KeyError): 

383 registry.getDatasetType("flat") 

384 

385 def testRemoveDatasetTypeFailure(self): 

386 """Test that Registry.removeDatasetType raises when there are datasets 

387 of that type present or if the dataset type is for a component. 

388 """ 

389 registry = self.makeRegistry() 

390 self.loadData(registry, "base.yaml") 

391 self.loadData(registry, "datasets.yaml") 

392 with self.assertRaises(OrphanedRecordError): 

393 registry.removeDatasetType("flat") 

394 with self.assertRaises(ValueError): 

395 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image")) 

396 

397 def testDatasetTypeComponentQueries(self): 

398 """Test component options when querying for dataset types. 

399 """ 

400 registry = self.makeRegistry() 

401 self.loadData(registry, "base.yaml") 

402 self.loadData(registry, "datasets.yaml") 

403 # Test querying for dataset types with different inputs. 

404 # First query for all dataset types; components should only be included 

405 # when components=True. 

406 self.assertEqual( 

407 {"bias", "flat"}, 

408 NamedValueSet(registry.queryDatasetTypes()).names 

409 ) 

410 self.assertEqual( 

411 {"bias", "flat"}, 

412 NamedValueSet(registry.queryDatasetTypes(components=False)).names 

413 ) 

414 self.assertLess( 

415 {"bias", "flat", "bias.wcs", "flat.photoCalib"}, 

416 NamedValueSet(registry.queryDatasetTypes(components=True)).names 

417 ) 

418 # Use a pattern that can match either parent or components. Again, 

419 # components are only returned if components=True. 

420 self.assertEqual( 

421 {"bias"}, 

422 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names 

423 ) 

424 self.assertEqual( 

425 {"bias"}, 

426 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names 

427 ) 

428 self.assertLess( 

429 {"bias", "bias.wcs"}, 

430 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names 

431 ) 

432 # This pattern matches only a component. In this case we also return 

433 # that component dataset type if components=None. 

434 self.assertEqual( 

435 {"bias.wcs"}, 

436 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names 

437 ) 

438 self.assertEqual( 

439 set(), 

440 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names 

441 ) 

442 self.assertEqual( 

443 {"bias.wcs"}, 

444 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names 

445 ) 

446 # Add a dataset type using a StorageClass that we'll then remove; check 

447 # that this does not affect our ability to query for dataset types 

448 # (though it will warn). 

449 tempStorageClass = StorageClass( 

450 name="TempStorageClass", 

451 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")} 

452 ) 

453 registry.storageClasses.registerStorageClass(tempStorageClass) 

454 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass, 

455 universe=registry.dimensions) 

456 registry.registerDatasetType(datasetType) 

457 registry.storageClasses._unregisterStorageClass(tempStorageClass.name) 

458 datasetType._storageClass = None 

459 del tempStorageClass 

460 # Querying for all dataset types, including components, should include 

461 # at least all non-component dataset types (and I don't want to 

462 # enumerate all of the Exposure components for bias and flat here). 

463 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

464 everything = NamedValueSet(registry.queryDatasetTypes(components=True)) 

465 self.assertIn("TempStorageClass", cm.output[0]) 

466 self.assertLess({"bias", "flat", "temporary"}, everything.names) 

467 # It should not include "temporary.columns", because we tried to remove 

468 # the storage class that would tell it about that. So if the next line 

469 # fails (i.e. "temporary.columns" _is_ in everything.names), it means 

470 # this part of the test isn't doing anything, because the _unregister 

471 # call about isn't simulating the real-life case we want it to 

472 # simulate, in which different versions of daf_butler in entirely 

473 # different Python processes interact with the same repo. 

474 self.assertNotIn("temporary.data", everything.names) 

475 # Query for dataset types that start with "temp". This should again 

476 # not include the component, and also not fail. 

477 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm: 

478 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"))) 

479 self.assertIn("TempStorageClass", cm.output[0]) 

480 self.assertEqual({"temporary"}, startsWithTemp.names) 

481 

482 def testComponentLookups(self): 

483 """Test searching for component datasets via their parents. 

484 """ 

485 registry = self.makeRegistry() 

486 self.loadData(registry, "base.yaml") 

487 self.loadData(registry, "datasets.yaml") 

488 # Test getting the child dataset type (which does still exist in the 

489 # Registry), and check for consistency with 

490 # DatasetRef.makeComponentRef. 

491 collection = "imported_g" 

492 parentType = registry.getDatasetType("bias") 

493 childType = registry.getDatasetType("bias.wcs") 

494 parentRefResolved = registry.findDataset(parentType, collections=collection, 

495 instrument="Cam1", detector=1) 

496 self.assertIsInstance(parentRefResolved, DatasetRef) 

497 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType) 

498 # Search for a single dataset with findDataset. 

499 childRef1 = registry.findDataset("bias.wcs", collections=collection, 

500 dataId=parentRefResolved.dataId) 

501 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs")) 

502 # Search for detector data IDs constrained by component dataset 

503 # existence with queryDataIds. 

504 dataIds = registry.queryDataIds( 

505 ["detector"], 

506 datasets=["bias.wcs"], 

507 collections=collection, 

508 ).toSet() 

509 self.assertEqual( 

510 dataIds, 

511 DataCoordinateSet( 

512 { 

513 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions) 

514 for d in (1, 2, 3) 

515 }, 

516 parentType.dimensions, 

517 ) 

518 ) 

519 # Search for multiple datasets of a single type with queryDatasets. 

520 childRefs2 = set(registry.queryDatasets( 

521 "bias.wcs", 

522 collections=collection, 

523 )) 

524 self.assertEqual( 

525 {ref.unresolved() for ref in childRefs2}, 

526 {DatasetRef(childType, dataId) for dataId in dataIds} 

527 ) 

528 

529 def testCollections(self): 

530 """Tests for registry methods that manage collections. 

531 """ 

532 registry = self.makeRegistry() 

533 self.loadData(registry, "base.yaml") 

534 self.loadData(registry, "datasets.yaml") 

535 run1 = "imported_g" 

536 run2 = "imported_r" 

537 # Test setting a collection docstring after it has been created. 

538 registry.setCollectionDocumentation(run1, "doc for run1") 

539 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1") 

540 registry.setCollectionDocumentation(run1, None) 

541 self.assertIsNone(registry.getCollectionDocumentation(run1)) 

542 datasetType = "bias" 

543 # Find some datasets via their run's collection. 

544 dataId1 = {"instrument": "Cam1", "detector": 1} 

545 ref1 = registry.findDataset(datasetType, dataId1, collections=run1) 

546 self.assertIsNotNone(ref1) 

547 dataId2 = {"instrument": "Cam1", "detector": 2} 

548 ref2 = registry.findDataset(datasetType, dataId2, collections=run1) 

549 self.assertIsNotNone(ref2) 

550 # Associate those into a new collection,then look for them there. 

551 tag1 = "tag1" 

552 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1") 

553 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1") 

554 registry.associate(tag1, [ref1, ref2]) 

555 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

556 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

557 # Disassociate one and verify that we can't it there anymore... 

558 registry.disassociate(tag1, [ref1]) 

559 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1)) 

560 # ...but we can still find ref2 in tag1, and ref1 in the run. 

561 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1) 

562 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

563 collections = set(registry.queryCollections()) 

564 self.assertEqual(collections, {run1, run2, tag1}) 

565 # Associate both refs into tag1 again; ref2 is already there, but that 

566 # should be a harmless no-op. 

567 registry.associate(tag1, [ref1, ref2]) 

568 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

569 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

570 # Get a different dataset (from a different run) that has the same 

571 # dataset type and data ID as ref2. 

572 ref2b = registry.findDataset(datasetType, dataId2, collections=run2) 

573 self.assertNotEqual(ref2, ref2b) 

574 # Attempting to associate that into tag1 should be an error. 

575 with self.assertRaises(ConflictingDefinitionError): 

576 registry.associate(tag1, [ref2b]) 

577 # That error shouldn't have messed up what we had before. 

578 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

579 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

580 # Attempt to associate the conflicting dataset again, this time with 

581 # a dataset that isn't in the collection and won't cause a conflict. 

582 # Should also fail without modifying anything. 

583 dataId3 = {"instrument": "Cam1", "detector": 3} 

584 ref3 = registry.findDataset(datasetType, dataId3, collections=run1) 

585 with self.assertRaises(ConflictingDefinitionError): 

586 registry.associate(tag1, [ref3, ref2b]) 

587 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1) 

588 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2) 

589 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1)) 

590 # Register a chained collection that searches [tag1, run2] 

591 chain1 = "chain1" 

592 registry.registerCollection(chain1, type=CollectionType.CHAINED) 

593 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED) 

594 # Chained collection exists, but has no collections in it. 

595 self.assertFalse(registry.getCollectionChain(chain1)) 

596 # If we query for all collections, we should get the chained collection 

597 # only if we don't ask to flatten it (i.e. yield only its children). 

598 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1}) 

599 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2}) 

600 # Attempt to set its child collections to something circular; that 

601 # should fail. 

602 with self.assertRaises(ValueError): 

603 registry.setCollectionChain(chain1, [tag1, chain1]) 

604 # Add the child collections. 

605 registry.setCollectionChain(chain1, [tag1, run2]) 

606 self.assertEqual( 

607 list(registry.getCollectionChain(chain1)), 

608 [tag1, run2] 

609 ) 

610 # Searching for dataId1 or dataId2 in the chain should return ref1 and 

611 # ref2, because both are in tag1. 

612 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1) 

613 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2) 

614 # Now disassociate ref2 from tag1. The search (for bias) with 

615 # dataId2 in chain1 should then: 

616 # 1. not find it in tag1 

617 # 2. find a different dataset in run2 

618 registry.disassociate(tag1, [ref2]) 

619 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1) 

620 self.assertNotEqual(ref2b, ref2) 

621 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2)) 

622 # Define a new chain so we can test recursive chains. 

623 chain2 = "chain2" 

624 registry.registerCollection(chain2, type=CollectionType.CHAINED) 

625 registry.setCollectionChain(chain2, [run2, chain1]) 

626 # Query for collections matching a regex. 

627 self.assertCountEqual( 

628 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)), 

629 ["imported_r", "imported_g"] 

630 ) 

631 # Query for collections matching a regex or an explicit str. 

632 self.assertCountEqual( 

633 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)), 

634 ["imported_r", "imported_g", "chain1"] 

635 ) 

636 # Search for bias with dataId1 should find it via tag1 in chain2, 

637 # recursing, because is not in run1. 

638 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2)) 

639 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1) 

640 # Search for bias with dataId2 should find it in run2 (ref2b). 

641 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b) 

642 # Search for a flat that is in run2. That should not be found 

643 # at the front of chain2, because of the restriction to bias 

644 # on run2 there, but it should be found in at the end of chain1. 

645 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"} 

646 ref4 = registry.findDataset("flat", dataId4, collections=run2) 

647 self.assertIsNotNone(ref4) 

648 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2)) 

649 # Deleting a collection that's part of a CHAINED collection is not 

650 # allowed, and is exception-safe. 

651 with self.assertRaises(Exception): 

652 registry.removeCollection(run2) 

653 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN) 

654 with self.assertRaises(Exception): 

655 registry.removeCollection(chain1) 

656 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED) 

657 # Actually remove chain2, test that it's gone by asking for its type. 

658 registry.removeCollection(chain2) 

659 with self.assertRaises(MissingCollectionError): 

660 registry.getCollectionType(chain2) 

661 # Actually remove run2 and chain1, which should work now. 

662 registry.removeCollection(chain1) 

663 registry.removeCollection(run2) 

664 with self.assertRaises(MissingCollectionError): 

665 registry.getCollectionType(run2) 

666 with self.assertRaises(MissingCollectionError): 

667 registry.getCollectionType(chain1) 

668 # Remove tag1 as well, just to test that we can remove TAGGED 

669 # collections. 

670 registry.removeCollection(tag1) 

671 with self.assertRaises(MissingCollectionError): 

672 registry.getCollectionType(tag1) 

673 

674 def testCollectionChainFlatten(self): 

675 """Test that Registry.setCollectionChain obeys its 'flatten' option. 

676 """ 

677 registry = self.makeRegistry() 

678 registry.registerCollection("inner", CollectionType.CHAINED) 

679 registry.registerCollection("innermost", CollectionType.RUN) 

680 registry.setCollectionChain("inner", ["innermost"]) 

681 registry.registerCollection("outer", CollectionType.CHAINED) 

682 registry.setCollectionChain("outer", ["inner"], flatten=False) 

683 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"]) 

684 registry.setCollectionChain("outer", ["inner"], flatten=True) 

685 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"]) 

686 

687 def testBasicTransaction(self): 

688 """Test that all operations within a single transaction block are 

689 rolled back if an exception propagates out of the block. 

690 """ 

691 registry = self.makeRegistry() 

692 storageClass = StorageClass("testDatasetType") 

693 registry.storageClasses.registerStorageClass(storageClass) 

694 with registry.transaction(): 

695 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"}) 

696 with self.assertRaises(ValueError): 

697 with registry.transaction(): 

698 registry.insertDimensionData("instrument", {"name": "Cam2"}) 

699 raise ValueError("Oops, something went wrong") 

700 # Cam1 should exist 

701 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A") 

702 # But Cam2 and Cam3 should both not exist 

703 with self.assertRaises(LookupError): 

704 registry.expandDataId(instrument="Cam2") 

705 with self.assertRaises(LookupError): 

706 registry.expandDataId(instrument="Cam3") 

707 

708 def testNestedTransaction(self): 

709 """Test that operations within a transaction block are not rolled back 

710 if an exception propagates out of an inner transaction block and is 

711 then caught. 

712 """ 

713 registry = self.makeRegistry() 

714 dimension = registry.dimensions["instrument"] 

715 dataId1 = {"instrument": "DummyCam"} 

716 dataId2 = {"instrument": "DummyCam2"} 

717 checkpointReached = False 

718 with registry.transaction(): 

719 # This should be added and (ultimately) committed. 

720 registry.insertDimensionData(dimension, dataId1) 

721 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

722 with registry.transaction(savepoint=True): 

723 # This does not conflict, and should succeed (but not 

724 # be committed). 

725 registry.insertDimensionData(dimension, dataId2) 

726 checkpointReached = True 

727 # This should conflict and raise, triggerring a rollback 

728 # of the previous insertion within the same transaction 

729 # context, but not the original insertion in the outer 

730 # block. 

731 registry.insertDimensionData(dimension, dataId1) 

732 self.assertTrue(checkpointReached) 

733 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph)) 

734 with self.assertRaises(LookupError): 

735 registry.expandDataId(dataId2, graph=dimension.graph) 

736 

737 def testInstrumentDimensions(self): 

738 """Test queries involving only instrument dimensions, with no joins to 

739 skymap.""" 

740 registry = self.makeRegistry() 

741 

742 # need a bunch of dimensions and datasets for test 

743 registry.insertDimensionData( 

744 "instrument", 

745 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6) 

746 ) 

747 registry.insertDimensionData( 

748 "physical_filter", 

749 dict(instrument="DummyCam", name="dummy_r", band="r"), 

750 dict(instrument="DummyCam", name="dummy_i", band="i"), 

751 ) 

752 registry.insertDimensionData( 

753 "detector", 

754 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)] 

755 ) 

756 registry.insertDimensionData( 

757 "visit_system", 

758 dict(instrument="DummyCam", id=1, name="default"), 

759 ) 

760 registry.insertDimensionData( 

761 "visit", 

762 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1), 

763 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1), 

764 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1), 

765 ) 

766 registry.insertDimensionData( 

767 "exposure", 

768 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"), 

769 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"), 

770 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"), 

771 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"), 

772 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"), 

773 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"), 

774 ) 

775 registry.insertDimensionData( 

776 "visit_definition", 

777 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10), 

778 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10), 

779 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11), 

780 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11), 

781 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20), 

782 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20), 

783 ) 

784 # dataset types 

785 run1 = "test1_r" 

786 run2 = "test2_r" 

787 tagged2 = "test2_t" 

788 registry.registerRun(run1) 

789 registry.registerRun(run2) 

790 registry.registerCollection(tagged2) 

791 storageClass = StorageClass("testDataset") 

792 registry.storageClasses.registerStorageClass(storageClass) 

793 rawType = DatasetType(name="RAW", 

794 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")), 

795 storageClass=storageClass) 

796 registry.registerDatasetType(rawType) 

797 calexpType = DatasetType(name="CALEXP", 

798 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")), 

799 storageClass=storageClass) 

800 registry.registerDatasetType(calexpType) 

801 

802 # add pre-existing datasets 

803 for exposure in (100, 101, 110, 111): 

804 for detector in (1, 2, 3): 

805 # note that only 3 of 5 detectors have datasets 

806 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

807 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1) 

808 # exposures 100 and 101 appear in both run1 and tagged2. 

809 # 100 has different datasets in the different collections 

810 # 101 has the same dataset in both collections. 

811 if exposure == 100: 

812 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

813 if exposure in (100, 101): 

814 registry.associate(tagged2, [ref]) 

815 # Add pre-existing datasets to tagged2. 

816 for exposure in (200, 201): 

817 for detector in (3, 4, 5): 

818 # note that only 3 of 5 detectors have datasets 

819 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector) 

820 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2) 

821 registry.associate(tagged2, [ref]) 

822 

823 dimensions = DimensionGraph( 

824 registry.dimensions, 

825 dimensions=(rawType.dimensions.required | calexpType.dimensions.required) 

826 ) 

827 # Test that single dim string works as well as list of str 

828 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet() 

829 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet() 

830 self.assertEqual(rows, rowsI) 

831 # with empty expression 

832 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet() 

833 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

834 for dataId in rows: 

835 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

836 packer1 = registry.dimensions.makePacker("visit_detector", dataId) 

837 packer2 = registry.dimensions.makePacker("exposure_detector", dataId) 

838 self.assertEqual(packer1.unpack(packer1.pack(dataId)), 

839 DataCoordinate.standardize(dataId, graph=packer1.dimensions)) 

840 self.assertEqual(packer2.unpack(packer2.pack(dataId)), 

841 DataCoordinate.standardize(dataId, graph=packer2.dimensions)) 

842 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId)) 

843 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

844 (100, 101, 110, 111)) 

845 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11)) 

846 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

847 

848 # second collection 

849 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet() 

850 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors 

851 for dataId in rows: 

852 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

853 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

854 (100, 101, 200, 201)) 

855 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20)) 

856 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

857 

858 # with two input datasets 

859 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet() 

860 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe 

861 for dataId in rows: 

862 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit")) 

863 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), 

864 (100, 101, 110, 111, 200, 201)) 

865 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20)) 

866 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5)) 

867 

868 # limit to single visit 

869 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

870 where="visit = 10", instrument="DummyCam").toSet() 

871 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

872 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

873 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

874 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

875 

876 # more limiting expression, using link names instead of Table.column 

877 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

878 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet() 

879 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors 

880 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101)) 

881 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,)) 

882 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3)) 

883 

884 # expression excludes everything 

885 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

886 where="visit > 1000", instrument="DummyCam").toSet() 

887 self.assertEqual(len(rows), 0) 

888 

889 # Selecting by physical_filter, this is not in the dimensions, but it 

890 # is a part of the full expression so it should work too. 

891 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1, 

892 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet() 

893 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors 

894 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111)) 

895 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,)) 

896 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3)) 

897 

898 def testSkyMapDimensions(self): 

899 """Tests involving only skymap dimensions, no joins to instrument.""" 

900 registry = self.makeRegistry() 

901 

902 # need a bunch of dimensions and datasets for test, we want 

903 # "band" in the test so also have to add physical_filter 

904 # dimensions 

905 registry.insertDimensionData( 

906 "instrument", 

907 dict(instrument="DummyCam") 

908 ) 

909 registry.insertDimensionData( 

910 "physical_filter", 

911 dict(instrument="DummyCam", name="dummy_r", band="r"), 

912 dict(instrument="DummyCam", name="dummy_i", band="i"), 

913 ) 

914 registry.insertDimensionData( 

915 "skymap", 

916 dict(name="DummyMap", hash="sha!".encode("utf8")) 

917 ) 

918 for tract in range(10): 

919 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract)) 

920 registry.insertDimensionData( 

921 "patch", 

922 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) 

923 for patch in range(10)] 

924 ) 

925 

926 # dataset types 

927 run = "test" 

928 registry.registerRun(run) 

929 storageClass = StorageClass("testDataset") 

930 registry.storageClasses.registerStorageClass(storageClass) 

931 calexpType = DatasetType(name="deepCoadd_calexp", 

932 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

933 "band")), 

934 storageClass=storageClass) 

935 registry.registerDatasetType(calexpType) 

936 mergeType = DatasetType(name="deepCoadd_mergeDet", 

937 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")), 

938 storageClass=storageClass) 

939 registry.registerDatasetType(mergeType) 

940 measType = DatasetType(name="deepCoadd_meas", 

941 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", 

942 "band")), 

943 storageClass=storageClass) 

944 registry.registerDatasetType(measType) 

945 

946 dimensions = DimensionGraph( 

947 registry.dimensions, 

948 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required 

949 | measType.dimensions.required) 

950 ) 

951 

952 # add pre-existing datasets 

953 for tract in (1, 3, 5): 

954 for patch in (2, 4, 6, 7): 

955 dataId = dict(skymap="DummyMap", tract=tract, patch=patch) 

956 registry.insertDatasets(mergeType, dataIds=[dataId], run=run) 

957 for aFilter in ("i", "r"): 

958 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter) 

959 registry.insertDatasets(calexpType, dataIds=[dataId], run=run) 

960 

961 # with empty expression 

962 rows = registry.queryDataIds(dimensions, 

963 datasets=[calexpType, mergeType], collections=run).toSet() 

964 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters 

965 for dataId in rows: 

966 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band")) 

967 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

968 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

969 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

970 

971 # limit to 2 tracts and 2 patches 

972 rows = registry.queryDataIds(dimensions, 

973 datasets=[calexpType, mergeType], collections=run, 

974 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet() 

975 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters 

976 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5)) 

977 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7)) 

978 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r")) 

979 

980 # limit to single filter 

981 rows = registry.queryDataIds(dimensions, 

982 datasets=[calexpType, mergeType], collections=run, 

983 where="band = 'i'").toSet() 

984 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters 

985 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5)) 

986 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7)) 

987 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",)) 

988 

989 # expression excludes everything, specifying non-existing skymap is 

990 # not a fatal error, it's operator error 

991 rows = registry.queryDataIds(dimensions, 

992 datasets=[calexpType, mergeType], collections=run, 

993 where="skymap = 'Mars'").toSet() 

994 self.assertEqual(len(rows), 0) 

995 

996 def testSpatialJoin(self): 

997 """Test queries that involve spatial overlap joins. 

998 """ 

999 registry = self.makeRegistry() 

1000 self.loadData(registry, "hsc-rc2-subset.yaml") 

1001 

1002 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of 

1003 # the TopologicalFamily they belong to. We'll relate all elements in 

1004 # each family to all of the elements in each other family. 

1005 families = defaultdict(set) 

1006 # Dictionary of {element.name: {dataId: region}}. 

1007 regions = {} 

1008 for element in registry.dimensions.getDatabaseElements(): 

1009 if element.spatial is not None: 

1010 families[element.spatial.name].add(element) 

1011 regions[element.name] = { 

1012 record.dataId: record.region for record in registry.queryDimensionRecords(element) 

1013 } 

1014 

1015 # If this check fails, it's not necessarily a problem - it may just be 

1016 # a reasonable change to the default dimension definitions - but the 

1017 # test below depends on there being more than one family to do anything 

1018 # useful. 

1019 self.assertEqual(len(families), 2) 

1020 

1021 # Overlap DatabaseDimensionElements with each other. 

1022 for family1, family2 in itertools.combinations(families, 2): 

1023 for element1, element2 in itertools.product(families[family1], families[family2]): 

1024 graph = DimensionGraph.union(element1.graph, element2.graph) 

1025 # Construct expected set of overlapping data IDs via a 

1026 # brute-force comparison of the regions we've already fetched. 

1027 expected = { 

1028 DataCoordinate.standardize( 

1029 {**dataId1.byName(), **dataId2.byName()}, 

1030 graph=graph 

1031 ) 

1032 for (dataId1, region1), (dataId2, region2) 

1033 in itertools.product(regions[element1.name].items(), regions[element2.name].items()) 

1034 if not region1.isDisjointFrom(region2) 

1035 } 

1036 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1037 queried = set(registry.queryDataIds(graph)) 

1038 self.assertEqual(expected, queried) 

1039 

1040 # Overlap each DatabaseDimensionElement with the commonSkyPix system. 

1041 commonSkyPix = registry.dimensions.commonSkyPix 

1042 for elementName, regions in regions.items(): 

1043 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph) 

1044 expected = set() 

1045 for dataId, region in regions.items(): 

1046 for begin, end in commonSkyPix.pixelization.envelope(region): 

1047 expected.update( 

1048 DataCoordinate.standardize( 

1049 {commonSkyPix.name: index, **dataId.byName()}, 

1050 graph=graph 

1051 ) 

1052 for index in range(begin, end) 

1053 ) 

1054 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.") 

1055 queried = set(registry.queryDataIds(graph)) 

1056 self.assertEqual(expected, queried) 

1057 

1058 def testAbstractQuery(self): 

1059 """Test that we can run a query that just lists the known 

1060 bands. This is tricky because band is 

1061 backed by a query against physical_filter. 

1062 """ 

1063 registry = self.makeRegistry() 

1064 registry.insertDimensionData("instrument", dict(name="DummyCam")) 

1065 registry.insertDimensionData( 

1066 "physical_filter", 

1067 dict(instrument="DummyCam", name="dummy_i", band="i"), 

1068 dict(instrument="DummyCam", name="dummy_i2", band="i"), 

1069 dict(instrument="DummyCam", name="dummy_r", band="r"), 

1070 ) 

1071 rows = registry.queryDataIds(["band"]).toSet() 

1072 self.assertCountEqual( 

1073 rows, 

1074 [DataCoordinate.standardize(band="i", universe=registry.dimensions), 

1075 DataCoordinate.standardize(band="r", universe=registry.dimensions)] 

1076 ) 

1077 

1078 def testAttributeManager(self): 

1079 """Test basic functionality of attribute manager. 

1080 """ 

1081 # number of attributes with schema versions in a fresh database, 

1082 # 6 managers with 3 records per manager, plus config for dimensions 

1083 VERSION_COUNT = 6 * 3 + 1 

1084 

1085 registry = self.makeRegistry() 

1086 attributes = registry._managers.attributes 

1087 

1088 # check what get() returns for non-existing key 

1089 self.assertIsNone(attributes.get("attr")) 

1090 self.assertEqual(attributes.get("attr", ""), "") 

1091 self.assertEqual(attributes.get("attr", "Value"), "Value") 

1092 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1093 

1094 # cannot store empty key or value 

1095 with self.assertRaises(ValueError): 

1096 attributes.set("", "value") 

1097 with self.assertRaises(ValueError): 

1098 attributes.set("attr", "") 

1099 

1100 # set value of non-existing key 

1101 attributes.set("attr", "value") 

1102 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1103 self.assertEqual(attributes.get("attr"), "value") 

1104 

1105 # update value of existing key 

1106 with self.assertRaises(ButlerAttributeExistsError): 

1107 attributes.set("attr", "value2") 

1108 

1109 attributes.set("attr", "value2", force=True) 

1110 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1) 

1111 self.assertEqual(attributes.get("attr"), "value2") 

1112 

1113 # delete existing key 

1114 self.assertTrue(attributes.delete("attr")) 

1115 self.assertEqual(len(list(attributes.items())), VERSION_COUNT) 

1116 

1117 # delete non-existing key 

1118 self.assertFalse(attributes.delete("non-attr")) 

1119 

1120 # store bunch of keys and get the list back 

1121 data = [ 

1122 ("version.core", "1.2.3"), 

1123 ("version.dimensions", "3.2.1"), 

1124 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"), 

1125 ] 

1126 for key, value in data: 

1127 attributes.set(key, value) 

1128 items = dict(attributes.items()) 

1129 for key, value in data: 

1130 self.assertEqual(items[key], value) 

1131 

1132 def testQueryDatasetsDeduplication(self): 

1133 """Test that the findFirst option to queryDatasets selects datasets 

1134 from collections in the order given". 

1135 """ 

1136 registry = self.makeRegistry() 

1137 self.loadData(registry, "base.yaml") 

1138 self.loadData(registry, "datasets.yaml") 

1139 self.assertCountEqual( 

1140 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])), 

1141 [ 

1142 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1143 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1144 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1145 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1146 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1147 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1148 ] 

1149 ) 

1150 self.assertCountEqual( 

1151 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], 

1152 findFirst=True)), 

1153 [ 

1154 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1155 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"), 

1156 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"), 

1157 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1158 ] 

1159 ) 

1160 self.assertCountEqual( 

1161 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], 

1162 findFirst=True)), 

1163 [ 

1164 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1165 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"), 

1166 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"), 

1167 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1168 ] 

1169 ) 

1170 

1171 def testQueryResults(self): 

1172 """Test querying for data IDs and then manipulating the QueryResults 

1173 object returned to perform other queries. 

1174 """ 

1175 registry = self.makeRegistry() 

1176 self.loadData(registry, "base.yaml") 

1177 self.loadData(registry, "datasets.yaml") 

1178 bias = registry.getDatasetType("bias") 

1179 flat = registry.getDatasetType("flat") 

1180 # Obtain expected results from methods other than those we're testing 

1181 # here. That includes: 

1182 # - the dimensions of the data IDs we want to query: 

1183 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"]) 

1184 # - the dimensions of some other data IDs we'll extract from that: 

1185 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"]) 

1186 # - the data IDs we expect to obtain from the first queries: 

1187 expectedDataIds = DataCoordinateSet( 

1188 { 

1189 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p, 

1190 universe=registry.dimensions) 

1191 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"}) 

1192 }, 

1193 graph=expectedGraph, 

1194 hasFull=False, 

1195 hasRecords=False, 

1196 ) 

1197 # - the flat datasets we expect to find from those data IDs, in just 

1198 # one collection (so deduplication is irrelevant): 

1199 expectedFlats = [ 

1200 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", 

1201 collections="imported_r"), 

1202 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", 

1203 collections="imported_r"), 

1204 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", 

1205 collections="imported_r"), 

1206 ] 

1207 # - the data IDs we expect to extract from that: 

1208 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph) 

1209 # - the bias datasets we expect to find from those data IDs, after we 

1210 # subset-out the physical_filter dimension, both with duplicates: 

1211 expectedAllBiases = [ 

1212 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1213 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"), 

1214 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"), 

1215 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1216 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1217 ] 

1218 # - ...and without duplicates: 

1219 expectedDeduplicatedBiases = [ 

1220 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"), 

1221 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"), 

1222 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"), 

1223 ] 

1224 # Test against those expected results, using a "lazy" query for the 

1225 # data IDs (which re-executes that query each time we use it to do 

1226 # something new). 

1227 dataIds = registry.queryDataIds( 

1228 ["detector", "physical_filter"], 

1229 where="detector.purpose = 'SCIENCE'", # this rejects detector=4 

1230 instrument="Cam1", 

1231 ) 

1232 self.assertEqual(dataIds.graph, expectedGraph) 

1233 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1234 self.assertCountEqual( 

1235 list( 

1236 dataIds.findDatasets( 

1237 flat, 

1238 collections=["imported_r"], 

1239 ) 

1240 ), 

1241 expectedFlats, 

1242 ) 

1243 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1244 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1245 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1246 self.assertCountEqual( 

1247 list( 

1248 subsetDataIds.findDatasets( 

1249 bias, 

1250 collections=["imported_r", "imported_g"], 

1251 findFirst=False 

1252 ) 

1253 ), 

1254 expectedAllBiases 

1255 ) 

1256 self.assertCountEqual( 

1257 list( 

1258 subsetDataIds.findDatasets( 

1259 bias, 

1260 collections=["imported_r", "imported_g"], 

1261 findFirst=True 

1262 ) 

1263 ), expectedDeduplicatedBiases 

1264 ) 

1265 # Materialize the bias dataset queries (only) by putting the results 

1266 # into temporary tables, then repeat those tests. 

1267 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1268 findFirst=False).materialize() as biases: 

1269 self.assertCountEqual(list(biases), expectedAllBiases) 

1270 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1271 findFirst=True).materialize() as biases: 

1272 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1273 # Materialize the data ID subset query, but not the dataset queries. 

1274 with subsetDataIds.materialize() as subsetDataIds: 

1275 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1276 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1277 self.assertCountEqual( 

1278 list( 

1279 subsetDataIds.findDatasets( 

1280 bias, 

1281 collections=["imported_r", "imported_g"], 

1282 findFirst=False 

1283 ) 

1284 ), 

1285 expectedAllBiases 

1286 ) 

1287 self.assertCountEqual( 

1288 list( 

1289 subsetDataIds.findDatasets( 

1290 bias, 

1291 collections=["imported_r", "imported_g"], 

1292 findFirst=True 

1293 ) 

1294 ), expectedDeduplicatedBiases 

1295 ) 

1296 # Materialize the dataset queries, too. 

1297 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1298 findFirst=False).materialize() as biases: 

1299 self.assertCountEqual(list(biases), expectedAllBiases) 

1300 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1301 findFirst=True).materialize() as biases: 

1302 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1303 # Materialize the original query, but none of the follow-up queries. 

1304 with dataIds.materialize() as dataIds: 

1305 self.assertEqual(dataIds.graph, expectedGraph) 

1306 self.assertEqual(dataIds.toSet(), expectedDataIds) 

1307 self.assertCountEqual( 

1308 list( 

1309 dataIds.findDatasets( 

1310 flat, 

1311 collections=["imported_r"], 

1312 ) 

1313 ), 

1314 expectedFlats, 

1315 ) 

1316 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True) 

1317 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1318 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1319 self.assertCountEqual( 

1320 list( 

1321 subsetDataIds.findDatasets( 

1322 bias, 

1323 collections=["imported_r", "imported_g"], 

1324 findFirst=False 

1325 ) 

1326 ), 

1327 expectedAllBiases 

1328 ) 

1329 self.assertCountEqual( 

1330 list( 

1331 subsetDataIds.findDatasets( 

1332 bias, 

1333 collections=["imported_r", "imported_g"], 

1334 findFirst=True 

1335 ) 

1336 ), expectedDeduplicatedBiases 

1337 ) 

1338 # Materialize just the bias dataset queries. 

1339 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1340 findFirst=False).materialize() as biases: 

1341 self.assertCountEqual(list(biases), expectedAllBiases) 

1342 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1343 findFirst=True).materialize() as biases: 

1344 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1345 # Materialize the subset data ID query, but not the dataset 

1346 # queries. 

1347 with subsetDataIds.materialize() as subsetDataIds: 

1348 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph) 

1349 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds) 

1350 self.assertCountEqual( 

1351 list( 

1352 subsetDataIds.findDatasets( 

1353 bias, 

1354 collections=["imported_r", "imported_g"], 

1355 findFirst=False 

1356 ) 

1357 ), 

1358 expectedAllBiases 

1359 ) 

1360 self.assertCountEqual( 

1361 list( 

1362 subsetDataIds.findDatasets( 

1363 bias, 

1364 collections=["imported_r", "imported_g"], 

1365 findFirst=True 

1366 ) 

1367 ), expectedDeduplicatedBiases 

1368 ) 

1369 # Materialize the bias dataset queries, too, so now we're 

1370 # materializing every single step. 

1371 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1372 findFirst=False).materialize() as biases: 

1373 self.assertCountEqual(list(biases), expectedAllBiases) 

1374 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], 

1375 findFirst=True).materialize() as biases: 

1376 self.assertCountEqual(list(biases), expectedDeduplicatedBiases) 

1377 

1378 def testEmptyDimensionsQueries(self): 

1379 """Test Query and QueryResults objects in the case where there are no 

1380 dimensions. 

1381 """ 

1382 # Set up test data: one dataset type, two runs, one dataset in each. 

1383 registry = self.makeRegistry() 

1384 self.loadData(registry, "base.yaml") 

1385 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog") 

1386 registry.registerDatasetType(schema) 

1387 dataId = DataCoordinate.makeEmpty(registry.dimensions) 

1388 run1 = "run1" 

1389 run2 = "run2" 

1390 registry.registerRun(run1) 

1391 registry.registerRun(run2) 

1392 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1) 

1393 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2) 

1394 # Query directly for both of the datasets, and each one, one at a time. 

1395 self.assertCountEqual( 

1396 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)), 

1397 [dataset1, dataset2] 

1398 ) 

1399 self.assertEqual( 

1400 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)), 

1401 [dataset1], 

1402 ) 

1403 self.assertEqual( 

1404 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)), 

1405 [dataset2], 

1406 ) 

1407 # Query for data IDs with no dimensions. 

1408 dataIds = registry.queryDataIds([]) 

1409 self.assertEqual( 

1410 dataIds.toSequence(), 

1411 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1412 ) 

1413 # Use queried data IDs to find the datasets. 

1414 self.assertCountEqual( 

1415 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1416 [dataset1, dataset2], 

1417 ) 

1418 self.assertEqual( 

1419 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1420 [dataset1], 

1421 ) 

1422 self.assertEqual( 

1423 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1424 [dataset2], 

1425 ) 

1426 # Now materialize the data ID query results and repeat those tests. 

1427 with dataIds.materialize() as dataIds: 

1428 self.assertEqual( 

1429 dataIds.toSequence(), 

1430 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1431 ) 

1432 self.assertCountEqual( 

1433 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1434 [dataset1, dataset2], 

1435 ) 

1436 self.assertEqual( 

1437 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1438 [dataset1], 

1439 ) 

1440 self.assertEqual( 

1441 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1442 [dataset2], 

1443 ) 

1444 # Query for non-empty data IDs, then subset that to get the empty one. 

1445 # Repeat the above tests starting from that. 

1446 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True) 

1447 self.assertEqual( 

1448 dataIds.toSequence(), 

1449 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1450 ) 

1451 self.assertCountEqual( 

1452 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1453 [dataset1, dataset2], 

1454 ) 

1455 self.assertEqual( 

1456 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1457 [dataset1], 

1458 ) 

1459 self.assertEqual( 

1460 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1461 [dataset2], 

1462 ) 

1463 with dataIds.materialize() as dataIds: 

1464 self.assertEqual( 

1465 dataIds.toSequence(), 

1466 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1467 ) 

1468 self.assertCountEqual( 

1469 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1470 [dataset1, dataset2], 

1471 ) 

1472 self.assertEqual( 

1473 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1474 [dataset1], 

1475 ) 

1476 self.assertEqual( 

1477 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1478 [dataset2], 

1479 ) 

1480 # Query for non-empty data IDs, then materialize, then subset to get 

1481 # the empty one. Repeat again. 

1482 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds: 

1483 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True) 

1484 self.assertEqual( 

1485 dataIds.toSequence(), 

1486 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1487 ) 

1488 self.assertCountEqual( 

1489 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1490 [dataset1, dataset2], 

1491 ) 

1492 self.assertEqual( 

1493 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1494 [dataset1], 

1495 ) 

1496 self.assertEqual( 

1497 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1498 [dataset2], 

1499 ) 

1500 with dataIds.materialize() as dataIds: 

1501 self.assertEqual( 

1502 dataIds.toSequence(), 

1503 DataCoordinateSequence([dataId], registry.dimensions.empty) 

1504 ) 

1505 self.assertCountEqual( 

1506 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)), 

1507 [dataset1, dataset2], 

1508 ) 

1509 self.assertEqual( 

1510 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)), 

1511 [dataset1], 

1512 ) 

1513 self.assertEqual( 

1514 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)), 

1515 [dataset2], 

1516 ) 

1517 

1518 def testDimensionDataModifications(self): 

1519 """Test that modifying dimension records via: 

1520 syncDimensionData(..., update=True) and 

1521 insertDimensionData(..., replace=True) works as expected, even in the 

1522 presence of datasets using those dimensions and spatial overlap 

1523 relationships. 

1524 """ 

1525 

1526 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]: 

1527 """Unpack a sphgeom.RangeSet into the integers it contains. 

1528 """ 

1529 for begin, end in ranges: 

1530 yield from range(begin, end) 

1531 

1532 def range_set_hull( 

1533 ranges: lsst.sphgeom.RangeSet, 

1534 pixelization: lsst.sphgeom.HtmPixelization, 

1535 ) -> lsst.sphgeom.ConvexPolygon: 

1536 """Create a ConvexPolygon hull of the region defined by a set of 

1537 HTM pixelization index ranges. 

1538 """ 

1539 points = [] 

1540 for index in unpack_range_set(ranges): 

1541 points.extend(pixelization.triangle(index).getVertices()) 

1542 return lsst.sphgeom.ConvexPolygon(points) 

1543 

1544 # Use HTM to set up an initial parent region (one arbitrary trixel) 

1545 # and four child regions (the trixels within the parent at the next 

1546 # level. We'll use the parent as a tract/visit region and the children 

1547 # as its patch/visit_detector regions. 

1548 registry = self.makeRegistry() 

1549 htm6 = registry.dimensions.skypix["htm"][6].pixelization 

1550 commonSkyPix = registry.dimensions.commonSkyPix.pixelization 

1551 index = 12288 

1552 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4) 

1553 assert htm6.universe().contains(child_ranges_small) 

1554 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)] 

1555 parent_region_small = lsst.sphgeom.ConvexPolygon( 

1556 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small)) 

1557 ) 

1558 assert all(parent_region_small.contains(c) for c in child_regions_small) 

1559 # Make a larger version of each child region, defined to be the set of 

1560 # htm6 trixels that overlap the original's bounding circle. Make a new 

1561 # parent that's the convex hull of the new children. 

1562 child_regions_large = [ 

1563 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) 

1564 for c in child_regions_small 

1565 ] 

1566 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small)) 

1567 parent_region_large = lsst.sphgeom.ConvexPolygon( 

1568 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large)) 

1569 ) 

1570 assert all(parent_region_large.contains(c) for c in child_regions_large) 

1571 assert parent_region_large.contains(parent_region_small) 

1572 assert not parent_region_small.contains(parent_region_large) 

1573 assert not all(parent_region_small.contains(c) for c in child_regions_large) 

1574 # Find some commonSkyPix indices that overlap the large regions but not 

1575 # overlap the small regions. We use commonSkyPix here to make sure the 

1576 # real tests later involve what's in the database, not just post-query 

1577 # region filtering. 

1578 child_difference_indices = [] 

1579 for large, small in zip(child_regions_large, child_regions_small): 

1580 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small))) 

1581 assert difference, "if this is empty, we can't test anything useful with these regions" 

1582 assert all( 

1583 not commonSkyPix.triangle(d).isDisjointFrom(large) 

1584 and commonSkyPix.triangle(d).isDisjointFrom(small) 

1585 for d in difference 

1586 ) 

1587 child_difference_indices.append(difference) 

1588 parent_difference_indices = list( 

1589 unpack_range_set( 

1590 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small) 

1591 ) 

1592 ) 

1593 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions" 

1594 assert all( 

1595 ( 

1596 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large) 

1597 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small) 

1598 ) 

1599 for d in parent_difference_indices 

1600 ) 

1601 # Now that we've finally got those regions, we'll insert the large ones 

1602 # as tract/patch dimension records. 

1603 skymap_name = "testing_v1" 

1604 registry.insertDimensionData( 

1605 "skymap", { 

1606 "name": skymap_name, 

1607 "hash": bytes([42]), 

1608 "tract_max": 1, 

1609 "patch_nx_max": 2, 

1610 "patch_ny_max": 2, 

1611 } 

1612 ) 

1613 registry.insertDimensionData( 

1614 "tract", 

1615 {"skymap": skymap_name, "id": 0, "region": parent_region_large} 

1616 ) 

1617 registry.insertDimensionData( 

1618 "patch", 

1619 *[{ 

1620 "skymap": skymap_name, 

1621 "tract": 0, 

1622 "id": n, 

1623 "cell_x": n % 2, 

1624 "cell_y": n // 2, 

1625 "region": c 

1626 } for n, c in enumerate(child_regions_large)] 

1627 ) 

1628 # Add at dataset that uses these dimensions to make sure that modifying 

1629 # them doesn't disrupt foreign keys (need to make sure DB doesn't 

1630 # implement insert with replace=True as delete-then-insert). 

1631 dataset_type = DatasetType( 

1632 "coadd", 

1633 dimensions=["tract", "patch"], 

1634 universe=registry.dimensions, 

1635 storageClass="Exposure", 

1636 ) 

1637 registry.registerDatasetType(dataset_type) 

1638 registry.registerCollection("the_run", CollectionType.RUN) 

1639 registry.insertDatasets( 

1640 dataset_type, 

1641 [{"skymap": skymap_name, "tract": 0, "patch": 2}], 

1642 run="the_run", 

1643 ) 

1644 # Query for tracts and patches that overlap some "difference" htm9 

1645 # pixels; there should be overlaps, because the database has 

1646 # the "large" suite of regions. 

1647 self.assertEqual( 

1648 {0}, 

1649 { 

1650 data_id["tract"] for data_id in registry.queryDataIds( 

1651 ["tract"], 

1652 skymap=skymap_name, 

1653 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1654 ) 

1655 } 

1656 ) 

1657 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1658 self.assertIn( 

1659 patch_id, 

1660 { 

1661 data_id["patch"] for data_id in registry.queryDataIds( 

1662 ["patch"], 

1663 skymap=skymap_name, 

1664 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1665 ) 

1666 } 

1667 ) 

1668 # Use sync to update the tract region and insert to update the patch 

1669 # regions, to the "small" suite. 

1670 updated = registry.syncDimensionData( 

1671 "tract", 

1672 {"skymap": skymap_name, "id": 0, "region": parent_region_small}, 

1673 update=True, 

1674 ) 

1675 self.assertEqual(updated, {"region": parent_region_large}) 

1676 registry.insertDimensionData( 

1677 "patch", 

1678 *[{ 

1679 "skymap": skymap_name, 

1680 "tract": 0, 

1681 "id": n, 

1682 "cell_x": n % 2, 

1683 "cell_y": n // 2, 

1684 "region": c 

1685 } for n, c in enumerate(child_regions_small)], 

1686 replace=True 

1687 ) 

1688 # Query again; there now should be no such overlaps, because the 

1689 # database has the "small" suite of regions. 

1690 self.assertFalse( 

1691 set( 

1692 registry.queryDataIds( 

1693 ["tract"], 

1694 skymap=skymap_name, 

1695 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1696 ) 

1697 ) 

1698 ) 

1699 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1700 self.assertNotIn( 

1701 patch_id, 

1702 { 

1703 data_id["patch"] for data_id in registry.queryDataIds( 

1704 ["patch"], 

1705 skymap=skymap_name, 

1706 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1707 ) 

1708 } 

1709 ) 

1710 # Update back to the large regions and query one more time. 

1711 updated = registry.syncDimensionData( 

1712 "tract", 

1713 {"skymap": skymap_name, "id": 0, "region": parent_region_large}, 

1714 update=True, 

1715 ) 

1716 self.assertEqual(updated, {"region": parent_region_small}) 

1717 registry.insertDimensionData( 

1718 "patch", 

1719 *[{ 

1720 "skymap": skymap_name, 

1721 "tract": 0, 

1722 "id": n, 

1723 "cell_x": n % 2, 

1724 "cell_y": n // 2, 

1725 "region": c 

1726 } for n, c in enumerate(child_regions_large)], 

1727 replace=True 

1728 ) 

1729 self.assertEqual( 

1730 {0}, 

1731 { 

1732 data_id["tract"] for data_id in registry.queryDataIds( 

1733 ["tract"], 

1734 skymap=skymap_name, 

1735 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]}, 

1736 ) 

1737 } 

1738 ) 

1739 for patch_id, patch_difference_indices in enumerate(child_difference_indices): 

1740 self.assertIn( 

1741 patch_id, 

1742 { 

1743 data_id["patch"] for data_id in registry.queryDataIds( 

1744 ["patch"], 

1745 skymap=skymap_name, 

1746 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]}, 

1747 ) 

1748 } 

1749 ) 

1750 

1751 def testCalibrationCollections(self): 

1752 """Test operations on `~CollectionType.CALIBRATION` collections, 

1753 including `Registry.certify`, `Registry.decertify`, and 

1754 `Registry.findDataset`. 

1755 """ 

1756 # Setup - make a Registry, fill it with some datasets in 

1757 # non-calibration collections. 

1758 registry = self.makeRegistry() 

1759 self.loadData(registry, "base.yaml") 

1760 self.loadData(registry, "datasets.yaml") 

1761 # Set up some timestamps. 

1762 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai") 

1763 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai") 

1764 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai") 

1765 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai") 

1766 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai") 

1767 allTimespans = [ 

1768 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2) 

1769 ] 

1770 # Get references to some datasets. 

1771 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

1772 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

1773 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

1774 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

1775 # Register the main calibration collection we'll be working with. 

1776 collection = "Cam1/calibs/default" 

1777 registry.registerCollection(collection, type=CollectionType.CALIBRATION) 

1778 # Cannot associate into a calibration collection (no timespan). 

1779 with self.assertRaises(TypeError): 

1780 registry.associate(collection, [bias2a]) 

1781 # Certify 2a dataset with [t2, t4) validity. 

1782 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4)) 

1783 # We should not be able to certify 2b with anything overlapping that 

1784 # window. 

1785 with self.assertRaises(ConflictingDefinitionError): 

1786 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3)) 

1787 with self.assertRaises(ConflictingDefinitionError): 

1788 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5)) 

1789 with self.assertRaises(ConflictingDefinitionError): 

1790 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3)) 

1791 with self.assertRaises(ConflictingDefinitionError): 

1792 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5)) 

1793 with self.assertRaises(ConflictingDefinitionError): 

1794 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None)) 

1795 with self.assertRaises(ConflictingDefinitionError): 

1796 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3)) 

1797 with self.assertRaises(ConflictingDefinitionError): 

1798 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5)) 

1799 with self.assertRaises(ConflictingDefinitionError): 

1800 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None)) 

1801 # We should be able to certify 3a with a range overlapping that window, 

1802 # because it's for a different detector. 

1803 # We'll certify 3a over [t1, t3). 

1804 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3)) 

1805 # Now we'll certify 2b and 3b together over [t4, ∞). 

1806 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None)) 

1807 

1808 # Fetch all associations and check that they are what we expect. 

1809 self.assertCountEqual( 

1810 list( 

1811 registry.queryDatasetAssociations( 

1812 "bias", 

1813 collections=[collection, "imported_g", "imported_r"], 

1814 ) 

1815 ), 

1816 [ 

1817 DatasetAssociation( 

1818 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"), 

1819 collection="imported_g", 

1820 timespan=None, 

1821 ), 

1822 DatasetAssociation( 

1823 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"), 

1824 collection="imported_r", 

1825 timespan=None, 

1826 ), 

1827 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None), 

1828 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None), 

1829 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None), 

1830 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None), 

1831 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)), 

1832 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)), 

1833 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1834 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)), 

1835 ] 

1836 ) 

1837 

1838 class Ambiguous: 

1839 """Tag class to denote lookups that are expected to be ambiguous. 

1840 """ 

1841 pass 

1842 

1843 def assertLookup(detector: int, timespan: Timespan, 

1844 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None: 

1845 """Local function that asserts that a bias lookup returns the given 

1846 expected result. 

1847 """ 

1848 if expected is Ambiguous: 

1849 with self.assertRaises(RuntimeError): 

1850 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1851 detector=detector, timespan=timespan) 

1852 else: 

1853 self.assertEqual( 

1854 expected, 

1855 registry.findDataset("bias", collections=collection, instrument="Cam1", 

1856 detector=detector, timespan=timespan) 

1857 ) 

1858 

1859 # Systematically test lookups against expected results. 

1860 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1861 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1862 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1863 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1864 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous) 

1865 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1866 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1867 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1868 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1869 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous) 

1870 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1871 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1872 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1873 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous) 

1874 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1875 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a) 

1876 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous) 

1877 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous) 

1878 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b) 

1879 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1880 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1881 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1882 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1883 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1884 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1885 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous) 

1886 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1887 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1888 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1889 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1890 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous) 

1891 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1892 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1893 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1894 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous) 

1895 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1896 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1897 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b) 

1898 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1899 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b) 

1900 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1901 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1902 

1903 # Decertify [t3, t5) for all data IDs, and do test lookups again. 

1904 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at 

1905 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞). 

1906 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5)) 

1907 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None) 

1908 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None) 

1909 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a) 

1910 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a) 

1911 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a) 

1912 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous) 

1913 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None) 

1914 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a) 

1915 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a) 

1916 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a) 

1917 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous) 

1918 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a) 

1919 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a) 

1920 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a) 

1921 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous) 

1922 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None) 

1923 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None) 

1924 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b) 

1925 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None) 

1926 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b) 

1927 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b) 

1928 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None) 

1929 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a) 

1930 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a) 

1931 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a) 

1932 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a) 

1933 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous) 

1934 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a) 

1935 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a) 

1936 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a) 

1937 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a) 

1938 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous) 

1939 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a) 

1940 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a) 

1941 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a) 

1942 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous) 

1943 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None) 

1944 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None) 

1945 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b) 

1946 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None) 

1947 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b) 

1948 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b) 

1949 

1950 # Decertify everything, this time with explicit data IDs, then check 

1951 # that no lookups succeed. 

1952 registry.decertify( 

1953 collection, "bias", Timespan(None, None), 

1954 dataIds=[ 

1955 dict(instrument="Cam1", detector=2), 

1956 dict(instrument="Cam1", detector=3), 

1957 ] 

1958 ) 

1959 for detector in (2, 3): 

1960 for timespan in allTimespans: 

1961 assertLookup(detector=detector, timespan=timespan, expected=None) 

1962 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return 

1963 # those. 

1964 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),) 

1965 for timespan in allTimespans: 

1966 assertLookup(detector=2, timespan=timespan, expected=bias2a) 

1967 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1968 # Decertify just bias2 over [t2, t4). 

1969 # This should split a single certification row into two (and leave the 

1970 # other existing row, for bias3a, alone). 

1971 registry.decertify(collection, "bias", Timespan(t2, t4), 

1972 dataIds=[dict(instrument="Cam1", detector=2)]) 

1973 for timespan in allTimespans: 

1974 assertLookup(detector=3, timespan=timespan, expected=bias3a) 

1975 overlapsBefore = timespan.overlaps(Timespan(None, t2)) 

1976 overlapsAfter = timespan.overlaps(Timespan(t4, None)) 

1977 if overlapsBefore and overlapsAfter: 

1978 expected = Ambiguous 

1979 elif overlapsBefore or overlapsAfter: 

1980 expected = bias2a 

1981 else: 

1982 expected = None 

1983 assertLookup(detector=2, timespan=timespan, expected=expected) 

1984 

1985 def testSkipCalibs(self): 

1986 """Test how queries handle skipping of calibration collections. 

1987 """ 

1988 registry = self.makeRegistry() 

1989 self.loadData(registry, "base.yaml") 

1990 self.loadData(registry, "datasets.yaml") 

1991 

1992 coll_calib = "Cam1/calibs/default" 

1993 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION) 

1994 

1995 coll_list = [coll_calib, "imported_g", "imported_r"] 

1996 chain = "Cam1/chain" 

1997 registry.registerCollection(chain, type=CollectionType.CHAINED) 

1998 registry.setCollectionChain(chain, coll_list) 

1999 

2000 # explicit list will raise 

2001 with self.assertRaises(NotImplementedError): 

2002 registry.queryDatasets("bias", collections=coll_list) 

2003 with self.assertRaises(NotImplementedError): 

2004 registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=coll_list) 

2005 

2006 # chain will skip 

2007 datasets = list(registry.queryDatasets("bias", collections=chain)) 

2008 self.assertGreater(len(datasets), 0) 

2009 

2010 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", 

2011 collections=chain)) 

2012 self.assertGreater(len(dataIds), 0) 

2013 

2014 # glob will skip too 

2015 datasets = list(registry.queryDatasets("bias", collections="*d*")) 

2016 self.assertGreater(len(datasets), 0) 

2017 

2018 # regular expression will skip too 

2019 pattern = re.compile(".*") 

2020 datasets = list(registry.queryDatasets("bias", collections=pattern)) 

2021 self.assertGreater(len(datasets), 0) 

2022 

2023 # ellipsis should work as usual 

2024 datasets = list(registry.queryDatasets("bias", collections=...)) 

2025 self.assertGreater(len(datasets), 0) 

2026 

2027 # few tests with findFirst 

2028 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True)) 

2029 self.assertGreater(len(datasets), 0) 

2030 

2031 with self.assertRaises(NotImplementedError): 

2032 registry.queryDatasets("bias", collections=coll_list, findFirst=True) 

2033 

2034 def testIngestTimeQuery(self): 

2035 

2036 registry = self.makeRegistry() 

2037 self.loadData(registry, "base.yaml") 

2038 dt0 = datetime.utcnow() 

2039 self.loadData(registry, "datasets.yaml") 

2040 dt1 = datetime.utcnow() 

2041 

2042 datasets = list(registry.queryDatasets(..., collections=...)) 

2043 len0 = len(datasets) 

2044 self.assertGreater(len0, 0) 

2045 

2046 where = "ingest_date > T'2000-01-01'" 

2047 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2048 len1 = len(datasets) 

2049 self.assertEqual(len0, len1) 

2050 

2051 # no one will ever use this piece of software in 30 years 

2052 where = "ingest_date > T'2050-01-01'" 

2053 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2054 len2 = len(datasets) 

2055 self.assertEqual(len2, 0) 

2056 

2057 # Check more exact timing to make sure there is no 37 seconds offset 

2058 # (after fixing DM-30124). SQLite time precision is 1 second, make 

2059 # sure that we don't test with higher precision. 

2060 tests = [ 

2061 # format: (timestamp, operator, expected_len) 

2062 (dt0 - timedelta(seconds=1), ">", len0), 

2063 (dt0 - timedelta(seconds=1), "<", 0), 

2064 (dt1 + timedelta(seconds=1), "<", len0), 

2065 (dt1 + timedelta(seconds=1), ">", 0), 

2066 ] 

2067 for dt, op, expect_len in tests: 

2068 dt_str = dt.isoformat(sep=" ") 

2069 

2070 where = f"ingest_date {op} T'{dt_str}'" 

2071 datasets = list(registry.queryDatasets(..., collections=..., where=where)) 

2072 self.assertEqual(len(datasets), expect_len) 

2073 

2074 # same with bind using datetime or astropy Time 

2075 where = f"ingest_date {op} ingest_time" 

2076 datasets = list(registry.queryDatasets(..., collections=..., where=where, 

2077 bind={"ingest_time": dt})) 

2078 self.assertEqual(len(datasets), expect_len) 

2079 

2080 dt_astropy = astropy.time.Time(dt, format="datetime") 

2081 datasets = list(registry.queryDatasets(..., collections=..., where=where, 

2082 bind={"ingest_time": dt_astropy})) 

2083 self.assertEqual(len(datasets), expect_len) 

2084 

2085 def testTimespanQueries(self): 

2086 """Test query expressions involving timespans. 

2087 """ 

2088 registry = self.makeRegistry() 

2089 self.loadData(registry, "hsc-rc2-subset.yaml") 

2090 # All exposures in the database; mapping from ID to timespan. 

2091 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")} 

2092 # Just those IDs, sorted (which is also temporal sorting, because HSC 

2093 # exposure IDs are monotonically increasing). 

2094 ids = sorted(visits.keys()) 

2095 self.assertGreater(len(ids), 20) 

2096 # Pick some quasi-random indexes into `ids` to play with. 

2097 i1 = int(len(ids)*0.1) 

2098 i2 = int(len(ids)*0.3) 

2099 i3 = int(len(ids)*0.6) 

2100 i4 = int(len(ids)*0.8) 

2101 # Extract some times from those: just before the beginning of i1 (which 

2102 # should be after the end of the exposure before), exactly the 

2103 # beginning of i2, just after the beginning of i3 (and before its end), 

2104 # and the exact end of i4. 

2105 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec") 

2106 self.assertGreater(t1, visits[ids[i1 - 1]].end) 

2107 t2 = visits[ids[i2]].begin 

2108 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec") 

2109 self.assertLess(t3, visits[ids[i3]].end) 

2110 t4 = visits[ids[i4]].end 

2111 # Make sure those are actually in order. 

2112 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1])) 

2113 

2114 bind = { 

2115 "t1": t1, 

2116 "t2": t2, 

2117 "t3": t3, 

2118 "t4": t4, 

2119 "ts23": Timespan(t2, t3), 

2120 } 

2121 

2122 def query(where): 

2123 """Helper function that queries for visit data IDs and returns 

2124 results as a sorted, deduplicated list of visit IDs. 

2125 """ 

2126 return sorted( 

2127 {dataId["visit"] for dataId in registry.queryDataIds("visit", 

2128 instrument="HSC", 

2129 bind=bind, 

2130 where=where)} 

2131 ) 

2132 

2133 # Try a bunch of timespan queries, mixing up the bounds themselves, 

2134 # where they appear in the expression, and how we get the timespan into 

2135 # the expression. 

2136 

2137 # t1 is before the start of i1, so this should not include i1. 

2138 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)")) 

2139 # t2 is exactly at the start of i2, but ends are exclusive, so these 

2140 # should not include i2. 

2141 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan")) 

2142 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)")) 

2143 # t3 is in the middle of i3, so this should include i3. 

2144 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23")) 

2145 # This one should not include t3 by the same reasoning. 

2146 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)")) 

2147 # t4 is exactly at the end of i4, so this should include i4. 

2148 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)")) 

2149 # i4's upper bound of t4 is exclusive so this should not include t4. 

2150 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)")) 

2151 

2152 # Now some timespan vs. time scalar queries. 

2153 self.assertEqual(ids[:i2], query("visit.timespan < t2")) 

2154 self.assertEqual(ids[:i2], query("t2 > visit.timespan")) 

2155 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3")) 

2156 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan")) 

2157 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3")) 

2158 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan")) 

2159 

2160 # Empty timespans should not overlap anything. 

2161 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)")) 

2162 

2163 def testCollectionSummaries(self): 

2164 """Test recording and retrieval of collection summaries. 

2165 """ 

2166 self.maxDiff = None 

2167 registry = self.makeRegistry() 

2168 # Importing datasets from yaml should go through the code path where 

2169 # we update collection summaries as we insert datasets. 

2170 self.loadData(registry, "base.yaml") 

2171 self.loadData(registry, "datasets.yaml") 

2172 flat = registry.getDatasetType("flat") 

2173 expected1 = CollectionSummary.makeEmpty(registry.dimensions) 

2174 expected1.datasetTypes.add(registry.getDatasetType("bias")) 

2175 expected1.datasetTypes.add(flat) 

2176 expected1.dimensions.update_extract( 

2177 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions) 

2178 ) 

2179 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2180 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2181 # Create a chained collection with both of the imported runs; the 

2182 # summary should be the same, because it's a union with itself. 

2183 chain = "chain" 

2184 registry.registerCollection(chain, CollectionType.CHAINED) 

2185 registry.setCollectionChain(chain, ["imported_r", "imported_g"]) 

2186 self.assertEqual(registry.getCollectionSummary(chain), expected1) 

2187 # Associate flats only into a tagged collection and a calibration 

2188 # collection to check summaries of those. 

2189 tag = "tag" 

2190 registry.registerCollection(tag, CollectionType.TAGGED) 

2191 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g")) 

2192 calibs = "calibs" 

2193 registry.registerCollection(calibs, CollectionType.CALIBRATION) 

2194 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"), 

2195 timespan=Timespan(None, None)) 

2196 expected2 = expected1.copy() 

2197 expected2.datasetTypes.discard("bias") 

2198 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2199 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2200 # Explicitly calling Registry.refresh() should load those same 

2201 # summaries, via a totally different code path. 

2202 registry.refresh() 

2203 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1) 

2204 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1) 

2205 self.assertEqual(registry.getCollectionSummary(tag), expected2) 

2206 self.assertEqual(registry.getCollectionSummary(calibs), expected2) 

2207 

2208 def testUnrelatedDimensionQueries(self): 

2209 """Test that WHERE expressions in queries can reference dimensions that 

2210 are not in the result set. 

2211 """ 

2212 registry = self.makeRegistry() 

2213 # There is no data to back this query, but it should still return 

2214 # zero records instead of raising. 

2215 self.assertFalse( 

2216 set(registry.queryDataIds(["visit", "detector"], 

2217 where="instrument='Cam1' AND skymap='not_here' AND tract=0")), 

2218 ) 

2219 

2220 def testBindInQueryDatasets(self): 

2221 """Test that the bind parameter is correctly forwarded in 

2222 queryDatasets recursion. 

2223 """ 

2224 registry = self.makeRegistry() 

2225 # Importing datasets from yaml should go through the code path where 

2226 # we update collection summaries as we insert datasets. 

2227 self.loadData(registry, "base.yaml") 

2228 self.loadData(registry, "datasets.yaml") 

2229 self.assertEqual( 

2230 set(registry.queryDatasets("flat", band="r", collections=...)), 

2231 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)), 

2232 )