Coverage for python/lsst/obs/base/gen2to3/convertRepo.py: 27%

265 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-08 14:44 -0800

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"] 

24 

25import fnmatch 

26import os 

27from dataclasses import dataclass 

28from multiprocessing import Pool 

29from typing import Iterable, List, Optional, Tuple 

30 

31from lsst.daf.butler import Butler as Butler3 

32from lsst.daf.butler import CollectionType, SkyPixDimension 

33from lsst.daf.butler.registry import DataIdError 

34from lsst.pex.config import Config, ConfigDictField, ConfigurableField, DictField, Field, ListField 

35from lsst.pipe.base import Task 

36from lsst.resources import ResourcePath 

37from lsst.skymap import BaseSkyMap, skyMapRegistry 

38 

39from .._instrument import Instrument 

40from ..defineVisits import DefineVisitsTask 

41from ..ingest import RawIngestTask 

42from .calibRepoConverter import CalibRepoConverter 

43from .repoConverter import ConversionSubset 

44from .rootRepoConverter import RootRepoConverter 

45from .standardRepoConverter import StandardRepoConverter 

46 

47 

48@dataclass 

49class ConfiguredSkyMap: 

50 """Struct containing information about a skymap that may appear in a Gen2 

51 repository. 

52 """ 

53 

54 name: str 

55 """Name of the skymap used in Gen3 data IDs. 

56 """ 

57 

58 sha1: bytes 

59 """Hash computed by `BaseSkyMap.getSha1`. 

60 """ 

61 

62 instance: BaseSkyMap 

63 """Name of the skymap used in Gen3 data IDs. 

64 """ 

65 

66 used: bool = False 

67 """Whether this skymap has been found in at least one repository being 

68 converted. 

69 """ 

70 

71 

72def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]: 

73 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`. 

74 Otherwise return ``s`` and `False`. 

75 """ 

76 if s.startswith(prefix): 

77 return s[len(prefix) :], True 

78 return s, False 

79 

80 

81@dataclass 

82class Rerun: 

83 """Specification for a Gen2 processing-output repository to convert.""" 

84 

85 path: str 

86 """Absolute or relative (to the root repository) path to the Gen2 

87 repository (`str`). 

88 """ 

89 

90 runName: Optional[str] 

91 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets 

92 will be inserted into (`str` or `None`). 

93 

94 If `None`, a name will be guessed by calling `guessCollectionNames`. 

95 """ 

96 

97 chainName: Optional[str] 

98 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will 

99 combine this repository's datasets with those of its parent repositories 

100 (`str` or `None`). 

101 

102 If `None`, a name will be guessed by calling `guessCollectionNames`. 

103 """ 

104 

105 parents: List[str] 

106 """Collection names associated with parent repositories, used to define the 

107 chained collection (`list` [ `str` ]). 

108 

109 Ignored if `chainName` is `None`. Runs used in the root repo are 

110 automatically included. 

111 """ 

112 

113 def guessCollectionNames(self, instrument: Instrument, root: str) -> None: 

114 """Update `runName` and `chainName` with guesses that match Gen3 naming 

115 conventions. 

116 

117 If `chainName` is not `None`, and `runName` is, `runName` will be set 

118 from it. If `runName` is already set, nothing will be changed, and 

119 if `chainName` is `None`, no chained collection will be created. 

120 

121 Parameters 

122 ---------- 

123 instrument : `Instrument` 

124 Instrument object for the repository being converted. 

125 root : `str` 

126 Path to the root repository. If this is present at the start of 

127 ``self.path``, it will be stripped as part of generating the run 

128 name. 

129 

130 Raises 

131 ------ 

132 ValueError 

133 Raised if the appropriate collection names cannot be inferred. 

134 """ 

135 if self.runName is not None: 

136 return 

137 if self.chainName is None: 

138 if os.path.isabs(self.path): 

139 rerunURI = ResourcePath(self.path) 

140 rootURI = ResourcePath(root) 

141 chainName = rerunURI.relative_to(rootURI) 

142 if chainName is None: 

143 raise ValueError( 

144 f"Cannot guess run name collection for rerun at '{self.path}': " 

145 f"no clear relationship to root '{root}'." 

146 ) 

147 else: 

148 chainName = self.path 

149 chainName, _ = _dropPrefix(chainName, "rerun/") 

150 chainName, isPersonal = _dropPrefix(chainName, "private/") 

151 if isPersonal: 

152 chainName = f"u/{chainName}" 

153 else: 

154 chainName, _ = _dropPrefix(chainName, "shared/") 

155 chainName = instrument.makeCollectionName("runs", chainName) 

156 self.chainName = chainName 

157 self.runName = f"{self.chainName}/direct" 

158 

159 

160@dataclass 

161class CalibRepo: 

162 """Specification for a Gen2 calibration repository to convert.""" 

163 

164 path: Optional[str] 

165 """Absolute or relative (to the root repository) path to the Gen2 

166 repository (`str` or `None`). 

167 

168 If `None`, no calibration datasets will be converted from Gen2, but 

169 curated calibrations may still be written. 

170 """ 

171 

172 curated: bool = True 

173 """If `True`, write curated calibrations into the associated 

174 ``CALIBRATION`` collection (`bool`). 

175 """ 

176 

177 labels: Tuple[str, ...] = () 

178 """Extra strings to insert into collection names, including both the 

179 ``RUN`` collections that datasets are ingested directly into and the 

180 ``CALIBRATION`` collection that associates them with validity ranges. 

181 

182 An empty tuple will directly populate the default calibration collection 

183 for this instrument with the converted datasets, and is incompatible with 

184 ``default=False``. This is a good choice for test data repositories where 

185 only one ``CALIBRATION`` collection will ever exist. In other cases, this 

186 should be a non-empty tuple, so the default calibration collection can 

187 actually be a ``CHAINED`` collection pointer that points to the current 

188 recommended ``CALIBRATION`` collection. 

189 """ 

190 

191 default: bool = True 

192 """If `True`, the created ``CALIBRATION`` collection should be the default 

193 for this instrument. 

194 

195 This field may only be `True` for one converted calibration collection if 

196 more than one is passed to `ConvertRepoTask.run`. It defaults to `True` 

197 because the vast majority of the time only one calibration collection is 

198 being converted. If ``labels`` is not empty, ``default=True`` will cause 

199 a ``CHAINED`` collection that points to the converted ``CALIBRATION`` 

200 collection to be defined. If ``labels`` is empty, ``default`` *must* be 

201 `True` and no ``CHAINED`` collection pointer is necessary. 

202 """ 

203 

204 def __post_init__(self) -> None: 

205 if not self.labels and not self.default: 

206 raise ValueError("labels=() requires default=True") 

207 

208 

209class ConvertRepoSkyMapConfig(Config): 

210 """Sub-config used to hold the parameters of a SkyMap. 

211 

212 Notes 

213 ----- 

214 This config only needs to exist because we can't put a 

215 `~lsst.pex.config.RegistryField` directly inside a 

216 `~lsst.pex.config.ConfigDictField`. 

217 

218 It needs to have its only field named "skyMap" for compatibility with the 

219 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 

220 use one config file in an obs package to configure both. 

221 

222 This name leads to unfortunate repetition with the field named 

223 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 

224 unavoidable. 

225 """ 

226 

227 skyMap = skyMapRegistry.makeField( 

228 doc="Type and parameters for the SkyMap itself.", 

229 default="dodeca", 

230 ) 

231 

232 

233class ConvertRepoConfig(Config): 

234 raws = ConfigurableField( 

235 "Configuration for subtask responsible for ingesting raws and adding exposure dimension entries.", 

236 target=RawIngestTask, 

237 ) 

238 defineVisits = ConfigurableField( 

239 "Configuration for the subtask responsible for defining visits from exposures.", 

240 target=DefineVisitsTask, 

241 ) 

242 skyMaps = ConfigDictField( 

243 "Mapping from Gen3 skymap name to the parameters used to construct a " 

244 "BaseSkyMap instance. This will be used to associate names with " 

245 "existing skymaps found in the Gen2 repo.", 

246 keytype=str, 

247 itemtype=ConvertRepoSkyMapConfig, 

248 default={}, 

249 ) 

250 rootSkyMapName = Field( 

251 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for " 

252 "datasets in the root repository when no SkyMap is found there. ", 

253 dtype=str, 

254 optional=True, 

255 default=None, 

256 ) 

257 runs = DictField( 

258 "A mapping from dataset type name to the RUN collection they should " 

259 "be inserted into. This must include all datasets that can be found " 

260 "in the root repository; other repositories will use per-repository " 

261 "runs.", 

262 keytype=str, 

263 itemtype=str, 

264 default={}, 

265 ) 

266 runsForced = DictField( 

267 "Like ``runs``, but is used even when the dataset is present in a " 

268 "non-root repository (i.e. rerun), overriding the non-root " 

269 "repository's main collection.", 

270 keytype=str, 

271 itemtype=str, 

272 default={ 

273 "brightObjectMask": "masks", 

274 }, 

275 ) 

276 storageClasses = DictField( 

277 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 

278 "or 'persistable') to the Gen3 StorageClass name.", 

279 keytype=str, 

280 itemtype=str, 

281 default={ 

282 "bias": "ExposureF", 

283 "dark": "ExposureF", 

284 "flat": "ExposureF", 

285 "defects": "Defects", 

286 "crosstalk": "CrosstalkCalib", 

287 "BaseSkyMap": "SkyMap", 

288 "BaseCatalog": "Catalog", 

289 "BackgroundList": "Background", 

290 "raw": "Exposure", 

291 "MultilevelParquetTable": "DataFrame", 

292 "ParquetTable": "DataFrame", 

293 "SkyWcs": "Wcs", 

294 }, 

295 ) 

296 formatterClasses = DictField( 

297 "Mapping from dataset type name to formatter class. " 

298 "By default these are derived from the formatters listed in the" 

299 " Gen3 datastore configuration.", 

300 keytype=str, 

301 itemtype=str, 

302 default={}, 

303 ) 

304 targetHandlerClasses = DictField( 

305 "Mapping from dataset type name to target handler class.", keytype=str, itemtype=str, default={} 

306 ) 

307 doRegisterInstrument = Field( 

308 "If True (default), add dimension records for the Instrument and its " 

309 "filters and detectors to the registry instead of assuming they are " 

310 "already present.", 

311 dtype=bool, 

312 default=True, 

313 ) 

314 refCats = ListField( 

315 "The names of reference catalogs (subdirectories under ref_cats) to be converted", 

316 dtype=str, 

317 default=[], 

318 ) 

319 fileIgnorePatterns = ListField( 

320 "Filename globs that should be ignored instead of being treated as datasets.", 

321 dtype=str, 

322 default=[ 

323 "README.txt", 

324 "*.*~*", 

325 "butler.yaml", 

326 "gen3.sqlite3", 

327 "registry.sqlite3", 

328 "calibRegistry.sqlite3", 

329 "_mapper", 

330 "_parent", 

331 "repositoryCfg.yaml", 

332 ], 

333 ) 

334 rawDatasetType = Field( 

335 "Gen2 dataset type to use for raw data.", 

336 dtype=str, 

337 default="raw", 

338 ) 

339 datasetIncludePatterns = ListField( 

340 "Glob-style patterns for dataset type names that should be converted.", dtype=str, default=["*"] 

341 ) 

342 datasetIgnorePatterns = ListField( 

343 "Glob-style patterns for dataset type names that should not be " 

344 "converted despite matching a pattern in datasetIncludePatterns.", 

345 dtype=str, 

346 default=[], 

347 ) 

348 datasetTemplateOverrides = DictField( 

349 "Overrides for Gen2 filename templates, keyed by dataset type. " 

350 "This can be used to support conversions of Gen2 repos whose mapper " 

351 "templates were modified in obs_* packages since the datasets were " 

352 "written.", 

353 keytype=str, 

354 itemtype=str, 

355 default={}, 

356 ) 

357 ccdKey = Field( 

358 "Key used for the Gen2 equivalent of 'detector' in data IDs.", 

359 dtype=str, 

360 default="ccd", 

361 ) 

362 relatedOnly = Field( 

363 "If True (default), only convert datasets that are related to the " 

364 "ingested visits. Ignored unless a list of visits is passed to " 

365 "run().", 

366 dtype=bool, 

367 default=False, 

368 ) 

369 doExpandDataIds = Field( 

370 "If True (default), expand data IDs to include extra metadata before " 

371 "ingesting them. " 

372 "This may be required in order to associate calibration datasets with " 

373 "validity ranges or populate file templates, so setting this to False " 

374 "is considered advanced usage (and it may not always work). When it " 

375 "does, it can provide a considerable speedup.", 

376 dtype=bool, 

377 default=True, 

378 ) 

379 doMakeUmbrellaCollection = Field( 

380 "If True (default), define an '<instrument>/defaults' CHAINED " 

381 "collection that includes everything found in the root repo as well " 

382 "as the default calibration collection.", 

383 dtype=bool, 

384 default=True, 

385 ) 

386 extraUmbrellaChildren = ListField( 

387 "Additional child collections to include in the umbrella collection. " 

388 "Ignored if doMakeUmbrellaCollection=False.", 

389 dtype=str, 

390 default=[], 

391 ) 

392 

393 @property 

394 def transfer(self): 

395 return self.raws.transfer 

396 

397 @transfer.setter 

398 def transfer(self, value): 

399 self.raws.transfer = value 

400 

401 def setDefaults(self): 

402 self.transfer = None 

403 

404 def validate(self): 

405 super().validate() 

406 if self.relatedOnly and not self.doExpandDataIds: 

407 raise ValueError("relatedOnly requires doExpandDataIds.") 

408 

409 

410class ConvertRepoTask(Task): 

411 """A task that converts one or more related Gen2 data repositories to a 

412 single Gen3 data repository (with multiple collections). 

413 

414 Parameters 

415 ---------- 

416 config: `ConvertRepoConfig` 

417 Configuration for this task. 

418 butler3: `lsst.daf.butler.Butler` 

419 A writeable Gen3 Butler instance that represents the data repository 

420 that datasets will be ingested into. If the 'raw' dataset is 

421 configured to be included in the conversion, ``butler3.run`` should be 

422 set to the name of the collection raws should be ingested into, and 

423 ``butler3.collections`` should include a calibration collection from 

424 which the ``camera`` dataset can be loaded, unless a calibration repo 

425 is converted and ``doWriteCuratedCalibrations`` is `True`. 

426 instrument : `lsst.obs.base.Instrument` 

427 The Gen3 instrument that should be used for this conversion. 

428 dry_run : `bool`, optional 

429 If `True` (`False` is default), make no changes to the Gen3 data 

430 repository while running as many steps as possible. This option is 

431 best used with a read-only ``butler3`` argument to ensure unexpected 

432 edge cases respect this argument (and fail rather than write if they 

433 do not). 

434 **kwargs 

435 Other keyword arguments are forwarded to the `Task` constructor. 

436 

437 Notes 

438 ----- 

439 Most of the work of converting repositories is delegated to instances of 

440 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 

441 only state that is relevant for all Gen2 repositories being ingested, while 

442 each `RepoConverter` instance holds only state relevant for the conversion 

443 of a single Gen2 repository. Both the task and the `RepoConverter` 

444 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 

445 methods may only be called once on a particular instance. 

446 """ 

447 

448 ConfigClass = ConvertRepoConfig 

449 

450 _DefaultName = "convertRepo" 

451 

452 def __init__( 

453 self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False, **kwargs 

454 ): 

455 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here. 

456 super().__init__(config, **kwargs) 

457 # Make self.butler3 one that doesn't have any collections associated 

458 # with it - those are needed by RawIngestTask and DefineVisitsTask, but 

459 # we don't want them messing with converted datasets, because those 

460 # have their own logic for figuring out which collections to write to. 

461 self.butler3 = Butler3(butler=butler3) 

462 self.registry = self.butler3.registry 

463 self.universe = self.registry.dimensions 

464 if self.isDatasetTypeIncluded("raw"): 

465 self.makeSubtask("raws", butler=butler3) 

466 self.makeSubtask("defineVisits", butler=butler3) 

467 else: 

468 self.raws = None 

469 self.defineVisits = None 

470 self.instrument = instrument 

471 self._configuredSkyMapsBySha1 = {} 

472 self._configuredSkyMapsByName = {} 

473 for name, config in self.config.skyMaps.items(): 

474 instance = config.skyMap.apply() 

475 self._populateSkyMapDicts(name, instance) 

476 self._usedSkyPix = set() 

477 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory() 

478 self.translatorFactory.log = self.log.getChild("translators") 

479 self.dry_run = dry_run 

480 

481 def _reduce_kwargs(self): 

482 # Add extra parameters to pickle 

483 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument) 

484 

485 def _populateSkyMapDicts(self, name, instance): 

486 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance) 

487 self._configuredSkyMapsBySha1[struct.sha1] = struct 

488 self._configuredSkyMapsByName[struct.name] = struct 

489 

490 def isDatasetTypeIncluded(self, datasetTypeName: str): 

491 """Return `True` if configuration indicates that the given dataset type 

492 should be converted. 

493 

494 This method is intended to be called primarily by the 

495 `RepoConverter` instances used interally by the task. 

496 

497 Parameters 

498 ---------- 

499 datasetTypeName: str 

500 Name of the dataset type. 

501 

502 Returns 

503 ------- 

504 included : `bool` 

505 Whether the dataset should be included in the conversion. 

506 """ 

507 return any( 

508 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIncludePatterns 

509 ) and not any( 

510 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIgnorePatterns 

511 ) 

512 

513 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str: 

514 """Indicate that a repository uses the given SkyMap. 

515 

516 This method is intended to be called primarily by the 

517 `RepoConverter` instances used interally by the task. 

518 

519 Parameters 

520 ---------- 

521 skyMap : `lsst.skymap.BaseSkyMap` 

522 SkyMap instance being used, typically retrieved from a Gen2 

523 data repository. 

524 skyMapName : `str` 

525 The name of the gen2 skymap, for error reporting. 

526 

527 Returns 

528 ------- 

529 name : `str` 

530 The name of the skymap in Gen3 data IDs. 

531 

532 Raises 

533 ------ 

534 LookupError 

535 Raised if the specified skymap cannot be found. 

536 """ 

537 sha1 = skyMap.getSha1() 

538 if sha1 not in self._configuredSkyMapsBySha1: 

539 self._populateSkyMapDicts(skyMapName, skyMap) 

540 try: 

541 struct = self._configuredSkyMapsBySha1[sha1] 

542 except KeyError as err: 

543 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration." 

544 raise LookupError(msg) from err 

545 struct.used = True 

546 return struct.name 

547 

548 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]): 

549 """Register all skymaps that have been marked as used. 

550 

551 This method is intended to be called primarily by the 

552 `RepoConverter` instances used interally by the task. 

553 

554 Parameters 

555 ---------- 

556 subset : `ConversionSubset`, optional 

557 Object that will be used to filter converted datasets by data ID. 

558 If given, it will be updated with the tracts of this skymap that 

559 overlap the visits in the subset. 

560 """ 

561 for struct in self._configuredSkyMapsBySha1.values(): 

562 if struct.used: 

563 if not self.dry_run: 

564 try: 

565 # If the skymap isn't registerd, this will raise. 

566 self.butler3.registry.expandDataId(skymap=struct.name) 

567 except DataIdError: 

568 self.log.info("Registering skymap %s.", struct.name) 

569 struct.instance.register(struct.name, self.butler3) 

570 if subset is not None and self.config.relatedOnly: 

571 subset.addSkyMap(self.registry, struct.name) 

572 

573 def useSkyPix(self, dimension: SkyPixDimension): 

574 """Indicate that a repository uses the given SkyPix dimension. 

575 

576 This method is intended to be called primarily by the 

577 `RepoConverter` instances used interally by the task. 

578 

579 Parameters 

580 ---------- 

581 dimension : `lsst.daf.butler.SkyPixDimension` 

582 Dimension represening a pixelization of the sky. 

583 """ 

584 self._usedSkyPix.add(dimension) 

585 

586 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]): 

587 """Register all skymaps that have been marked as used. 

588 

589 This method is intended to be called primarily by the 

590 `RepoConverter` instances used interally by the task. 

591 

592 Parameters 

593 ---------- 

594 subset : `ConversionSubset`, optional 

595 Object that will be used to filter converted datasets by data ID. 

596 If given, it will be updated with the pixelization IDs that 

597 overlap the visits in the subset. 

598 """ 

599 if subset is not None and self.config.relatedOnly: 

600 for dimension in self._usedSkyPix: 

601 subset.addSkyPix(self.registry, dimension) 

602 

603 def run( 

604 self, 

605 root: str, 

606 *, 

607 calibs: Optional[List[CalibRepo]] = None, 

608 reruns: Optional[List[Rerun]] = None, 

609 visits: Optional[Iterable[int]] = None, 

610 pool: Optional[Pool] = None, 

611 processes: int = 1, 

612 ): 

613 """Convert a group of related data repositories. 

614 

615 Parameters 

616 ---------- 

617 root : `str` 

618 Complete path to the root Gen2 data repository. This should be 

619 a data repository that includes a Gen2 registry and any raw files 

620 and/or reference catalogs. 

621 calibs : `list` of `CalibRepo` 

622 Specifications for Gen2 calibration repos to convert. If `None` 

623 (default), curated calibrations only will be written to the default 

624 calibration collection for this instrument; set to ``()`` explictly 

625 to disable this. 

626 reruns : `list` of `Rerun` 

627 Specifications for rerun (processing output) repos to convert. If 

628 `None` (default), no reruns are converted. 

629 visits : iterable of `int`, optional 

630 The integer IDs of visits to convert. If not provided, all visits 

631 in the Gen2 root repository will be converted. 

632 pool : `multiprocessing.Pool`, optional 

633 If not `None`, a process pool with which to parallelize some 

634 operations. 

635 processes : `int`, optional 

636 The number of processes to use for conversion. 

637 """ 

638 if pool is None and processes > 1: 

639 pool = Pool(processes) 

640 if calibs is None: 

641 calibs = [CalibRepo(path=None)] 

642 elif calibs and not self.config.doExpandDataIds: 

643 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.") 

644 if visits is not None: 

645 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits)) 

646 else: 

647 if self.config.relatedOnly: 

648 self.log.warning( 

649 "config.relatedOnly is True but all visits are being ingested; " 

650 "no filtering will be done." 

651 ) 

652 subset = None 

653 if not self.config.doExpandDataIds and self.butler3.datastore.needs_expanded_data_ids( 

654 self.config.transfer 

655 ): 

656 self.log.warning( 

657 "config.doExpandDataIds=False but datastore reports that expanded data IDs may be needed.", 

658 self.config.transfer, 

659 ) 

660 

661 # Check that at most one CalibRepo is marked as default, to fail before 

662 # we actually write anything. 

663 defaultCalibRepos = [c.path for c in calibs if c.default] 

664 if len(defaultCalibRepos) > 1: 

665 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.") 

666 

667 # Make converters for all Gen2 repos. 

668 converters = [] 

669 # Start with the root repo, which must always be given even if we are 

670 # not configured to convert anything from it. 

671 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument) 

672 converters.append(rootConverter) 

673 # Calibration repos are next. 

674 for spec in calibs: 

675 calibRoot = spec.path 

676 if calibRoot is not None: 

677 if not os.path.isabs(calibRoot): 

678 calibRoot = os.path.join(rootConverter.root, calibRoot) 

679 converter = CalibRepoConverter( 

680 task=self, 

681 root=calibRoot, 

682 labels=spec.labels, 

683 instrument=self.instrument, 

684 mapper=rootConverter.mapper, 

685 subset=rootConverter.subset, 

686 ) 

687 converters.append(converter) 

688 # CalibRepo entries that don't have a path are just there for 

689 # curated calibs and maybe to set up a collection pointer; that's 

690 # handled further down (after we've done everything we can that 

691 # doesn't involve actually writing to the output Gen3 repo). 

692 # And now reruns. 

693 rerunConverters = {} 

694 for spec in reruns: 

695 runRoot = spec.path 

696 if not os.path.isabs(runRoot): 

697 runRoot = os.path.join(rootConverter.root, runRoot) 

698 spec.guessCollectionNames(self.instrument, rootConverter.root) 

699 converter = StandardRepoConverter( 

700 task=self, 

701 root=runRoot, 

702 run=spec.runName, 

703 instrument=self.instrument, 

704 subset=rootConverter.subset, 

705 ) 

706 converters.append(converter) 

707 rerunConverters[spec.runName] = converter 

708 

709 # Walk Gen2 repos to find datasets to convert. 

710 for converter in converters: 

711 converter.prep() 

712 

713 # Register the instrument if we're configured to do so. 

714 if self.config.doRegisterInstrument and not self.dry_run: 

715 self.instrument.register(self.registry) 

716 

717 # Run raw ingest (does nothing if we weren't configured to convert the 

718 # 'raw' dataset type). 

719 rootConverter.runRawIngest(pool=pool) 

720 

721 # Write curated calibrations to all calibration collections where they 

722 # were requested (which may be implicit, by passing calibs=None). Also 

723 # set up a CHAINED collection that points to the default CALIBRATION 

724 # collection if one is needed. 

725 if not self.dry_run: 

726 for spec in calibs: 

727 if spec.curated: 

728 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels) 

729 if spec.default and spec.labels: 

730 # This is guaranteed to be True at most once in the loop by 

731 # logic at the top of this method. 

732 defaultCalibName = self.instrument.makeCalibrationCollectionName() 

733 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED) 

734 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels) 

735 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION) 

736 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName]) 

737 

738 # Define visits (also does nothing if we weren't configurd to convert 

739 # the 'raw' dataset type). 

740 rootConverter.runDefineVisits() 

741 

742 # Insert dimensions that are potentially shared by all Gen2 

743 # repositories (and are hence managed directly by the Task, rather 

744 # than a converter instance). 

745 # This also finishes setting up the (shared) converter.subsets object 

746 # that is used to filter data IDs for config.relatedOnly. 

747 self.registerUsedSkyMaps(rootConverter.subset) 

748 self.registerUsedSkyPix(rootConverter.subset) 

749 

750 # Look for datasets, generally by scanning the filesystem. 

751 # This requires dimensions to have already been inserted so we can use 

752 # dimension information to identify related datasets. 

753 for converter in converters: 

754 converter.findDatasets() 

755 

756 # Expand data IDs. 

757 if self.config.doExpandDataIds: 

758 for converter in converters: 

759 converter.expandDataIds() 

760 

761 if self.dry_run: 

762 return 

763 

764 # Actually ingest datasets. 

765 for converter in converters: 

766 converter.ingest() 

767 

768 # Perform any post-ingest processing. 

769 for converter in converters: 

770 converter.finish() 

771 

772 # Make the umbrella collection, if desired. 

773 if self.config.doMakeUmbrellaCollection: 

774 umbrella = self.instrument.makeUmbrellaCollectionName() 

775 self.registry.registerCollection(umbrella, CollectionType.CHAINED) 

776 children = list(self.registry.getCollectionChain(umbrella)) 

777 children.extend(rootConverter.getCollectionChain()) 

778 children.append(self.instrument.makeCalibrationCollectionName()) 

779 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children: 

780 # Ensure the umbrella collection includes the global skymap 

781 # collection, even if it's currently empty. 

782 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

783 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

784 children.extend(self.config.extraUmbrellaChildren) 

785 self.log.info("Defining %s from chain %s.", umbrella, children) 

786 self.registry.setCollectionChain(umbrella, children) 

787 

788 # Add chained collections for reruns. 

789 for spec in reruns: 

790 if spec.chainName is not None: 

791 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED) 

792 chain = [spec.runName] 

793 chain.extend(rerunConverters[spec.runName].getCollectionChain()) 

794 for parent in spec.parents: 

795 chain.append(parent) 

796 parentConverter = rerunConverters.get(parent) 

797 if parentConverter is not None: 

798 chain.extend(parentConverter.getCollectionChain()) 

799 chain.extend(rootConverter.getCollectionChain()) 

800 if len(calibs) == 1: 

801 # Exactly one calibration repo being converted, so it's 

802 # safe-ish to assume that's the one the rerun used. 

803 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels)) 

804 self.log.info("Defining %s from chain %s.", spec.chainName, chain) 

805 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)