Coverage for python/lsst/obs/base/gen2to3/convertRepo.py: 26%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

252 statements  

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"] 

24 

25import fnmatch 

26import os 

27from dataclasses import dataclass 

28from multiprocessing import Pool 

29from typing import Iterable, List, Optional, Tuple 

30 

31from lsst.daf.butler import Butler as Butler3 

32from lsst.daf.butler import ButlerURI, CollectionType, SkyPixDimension 

33from lsst.pex.config import Config, ConfigDictField, ConfigurableField, DictField, Field, ListField 

34from lsst.pipe.base import Task 

35from lsst.skymap import BaseSkyMap, skyMapRegistry 

36 

37from .._instrument import Instrument 

38from ..defineVisits import DefineVisitsTask 

39from ..ingest import RawIngestTask 

40from .calibRepoConverter import CalibRepoConverter 

41from .repoConverter import ConversionSubset 

42from .rootRepoConverter import RootRepoConverter 

43from .standardRepoConverter import StandardRepoConverter 

44 

45 

46@dataclass 

47class ConfiguredSkyMap: 

48 """Struct containing information about a skymap that may appear in a Gen2 

49 repository. 

50 """ 

51 

52 name: str 

53 """Name of the skymap used in Gen3 data IDs. 

54 """ 

55 

56 sha1: bytes 

57 """Hash computed by `BaseSkyMap.getSha1`. 

58 """ 

59 

60 instance: BaseSkyMap 

61 """Name of the skymap used in Gen3 data IDs. 

62 """ 

63 

64 used: bool = False 

65 """Whether this skymap has been found in at least one repository being 

66 converted. 

67 """ 

68 

69 

70def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]: 

71 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`. 

72 Otherwise return ``s`` and `False`. 

73 """ 

74 if s.startswith(prefix): 

75 return s[len(prefix) :], True 

76 return s, False 

77 

78 

79@dataclass 

80class Rerun: 

81 """Specification for a Gen2 processing-output repository to convert.""" 

82 

83 path: str 

84 """Absolute or relative (to the root repository) path to the Gen2 

85 repository (`str`). 

86 """ 

87 

88 runName: Optional[str] 

89 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets 

90 will be inserted into (`str` or `None`). 

91 

92 If `None`, a name will be guessed by calling `guessCollectionNames`. 

93 """ 

94 

95 chainName: Optional[str] 

96 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will 

97 combine this repository's datasets with those of its parent repositories 

98 (`str` or `None`). 

99 

100 If `None`, a name will be guessed by calling `guessCollectionNames`. 

101 """ 

102 

103 parents: List[str] 

104 """Collection names associated with parent repositories, used to define the 

105 chained collection (`list` [ `str` ]). 

106 

107 Ignored if `chainName` is `None`. Runs used in the root repo are 

108 automatically included. 

109 """ 

110 

111 def guessCollectionNames(self, instrument: Instrument, root: str) -> None: 

112 """Update `runName` and `chainName` with guesses that match Gen3 naming 

113 conventions. 

114 

115 If `chainName` is not `None`, and `runName` is, `runName` will be set 

116 from it. If `runName` is already set, nothing will be changed, and 

117 if `chainName` is `None`, no chained collection will be created. 

118 

119 Parameters 

120 ---------- 

121 instrument : `Instrument` 

122 Instrument object for the repository being converted. 

123 root : `str` 

124 Path to the root repository. If this is present at the start of 

125 ``self.path``, it will be stripped as part of generating the run 

126 name. 

127 

128 Raises 

129 ------ 

130 ValueError 

131 Raised if the appropriate collection names cannot be inferred. 

132 """ 

133 if self.runName is not None: 

134 return 

135 if self.chainName is None: 

136 if os.path.isabs(self.path): 

137 rerunURI = ButlerURI(self.path) 

138 rootURI = ButlerURI(root) 

139 chainName = rerunURI.relative_to(rootURI) 

140 if chainName is None: 

141 raise ValueError( 

142 f"Cannot guess run name collection for rerun at '{self.path}': " 

143 f"no clear relationship to root '{root}'." 

144 ) 

145 else: 

146 chainName = self.path 

147 chainName, _ = _dropPrefix(chainName, "rerun/") 

148 chainName, isPersonal = _dropPrefix(chainName, "private/") 

149 if isPersonal: 

150 chainName = f"u/{chainName}" 

151 else: 

152 chainName, _ = _dropPrefix(chainName, "shared/") 

153 chainName = instrument.makeCollectionName("runs", chainName) 

154 self.chainName = chainName 

155 self.runName = f"{self.chainName}/direct" 

156 

157 

158@dataclass 

159class CalibRepo: 

160 """Specification for a Gen2 calibration repository to convert.""" 

161 

162 path: Optional[str] 

163 """Absolute or relative (to the root repository) path to the Gen2 

164 repository (`str` or `None`). 

165 

166 If `None`, no calibration datasets will be converted from Gen2, but 

167 curated calibrations may still be written. 

168 """ 

169 

170 curated: bool = True 

171 """If `True`, write curated calibrations into the associated 

172 ``CALIBRATION`` collection (`bool`). 

173 """ 

174 

175 labels: Tuple[str, ...] = () 

176 """Extra strings to insert into collection names, including both the 

177 ``RUN`` collections that datasets are ingested directly into and the 

178 ``CALIBRATION`` collection that associates them with validity ranges. 

179 

180 An empty tuple will directly populate the default calibration collection 

181 for this instrument with the converted datasets, and is incompatible with 

182 ``default=False``. This is a good choice for test data repositories where 

183 only one ``CALIBRATION`` collection will ever exist. In other cases, this 

184 should be a non-empty tuple, so the default calibration collection can 

185 actually be a ``CHAINED`` collection pointer that points to the current 

186 recommended ``CALIBRATION`` collection. 

187 """ 

188 

189 default: bool = True 

190 """If `True`, the created ``CALIBRATION`` collection should be the default 

191 for this instrument. 

192 

193 This field may only be `True` for one converted calibration collection if 

194 more than one is passed to `ConvertRepoTask.run`. It defaults to `True` 

195 because the vast majority of the time only one calibration collection is 

196 being converted. If ``labels`` is not empty, ``default=True`` will cause 

197 a ``CHAINED`` collection that points to the converted ``CALIBRATION`` 

198 collection to be defined. If ``labels`` is empty, ``default`` *must* be 

199 `True` and no ``CHAINED`` collection pointer is necessary. 

200 """ 

201 

202 def __post_init__(self) -> None: 

203 if not self.labels and not self.default: 

204 raise ValueError("labels=() requires default=True") 

205 

206 

207class ConvertRepoSkyMapConfig(Config): 

208 """Sub-config used to hold the parameters of a SkyMap. 

209 

210 Notes 

211 ----- 

212 This config only needs to exist because we can't put a 

213 `~lsst.pex.config.RegistryField` directly inside a 

214 `~lsst.pex.config.ConfigDictField`. 

215 

216 It needs to have its only field named "skyMap" for compatibility with the 

217 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 

218 use one config file in an obs package to configure both. 

219 

220 This name leads to unfortunate repetition with the field named 

221 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 

222 unavoidable. 

223 """ 

224 

225 skyMap = skyMapRegistry.makeField( 

226 doc="Type and parameters for the SkyMap itself.", 

227 default="dodeca", 

228 ) 

229 

230 

231class ConvertRepoConfig(Config): 

232 raws = ConfigurableField( 

233 "Configuration for subtask responsible for ingesting raws and adding exposure dimension entries.", 

234 target=RawIngestTask, 

235 ) 

236 defineVisits = ConfigurableField( 

237 "Configuration for the subtask responsible for defining visits from exposures.", 

238 target=DefineVisitsTask, 

239 ) 

240 skyMaps = ConfigDictField( 

241 "Mapping from Gen3 skymap name to the parameters used to construct a " 

242 "BaseSkyMap instance. This will be used to associate names with " 

243 "existing skymaps found in the Gen2 repo.", 

244 keytype=str, 

245 itemtype=ConvertRepoSkyMapConfig, 

246 default={}, 

247 ) 

248 rootSkyMapName = Field( 

249 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for " 

250 "datasets in the root repository when no SkyMap is found there. ", 

251 dtype=str, 

252 optional=True, 

253 default=None, 

254 ) 

255 runs = DictField( 

256 "A mapping from dataset type name to the RUN collection they should " 

257 "be inserted into. This must include all datasets that can be found " 

258 "in the root repository; other repositories will use per-repository " 

259 "runs.", 

260 keytype=str, 

261 itemtype=str, 

262 default={}, 

263 ) 

264 runsForced = DictField( 

265 "Like ``runs``, but is used even when the dataset is present in a " 

266 "non-root repository (i.e. rerun), overriding the non-root " 

267 "repository's main collection.", 

268 keytype=str, 

269 itemtype=str, 

270 default={ 

271 "brightObjectMask": "masks", 

272 }, 

273 ) 

274 storageClasses = DictField( 

275 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 

276 "or 'persistable') to the Gen3 StorageClass name.", 

277 keytype=str, 

278 itemtype=str, 

279 default={ 

280 "bias": "ExposureF", 

281 "dark": "ExposureF", 

282 "flat": "ExposureF", 

283 "defects": "Defects", 

284 "crosstalk": "CrosstalkCalib", 

285 "BaseSkyMap": "SkyMap", 

286 "BaseCatalog": "Catalog", 

287 "BackgroundList": "Background", 

288 "raw": "Exposure", 

289 "MultilevelParquetTable": "DataFrame", 

290 "ParquetTable": "DataFrame", 

291 "SkyWcs": "Wcs", 

292 }, 

293 ) 

294 formatterClasses = DictField( 

295 "Mapping from dataset type name to formatter class. " 

296 "By default these are derived from the formatters listed in the" 

297 " Gen3 datastore configuration.", 

298 keytype=str, 

299 itemtype=str, 

300 default={}, 

301 ) 

302 targetHandlerClasses = DictField( 

303 "Mapping from dataset type name to target handler class.", keytype=str, itemtype=str, default={} 

304 ) 

305 doRegisterInstrument = Field( 

306 "If True (default), add dimension records for the Instrument and its " 

307 "filters and detectors to the registry instead of assuming they are " 

308 "already present.", 

309 dtype=bool, 

310 default=True, 

311 ) 

312 refCats = ListField( 

313 "The names of reference catalogs (subdirectories under ref_cats) to be converted", 

314 dtype=str, 

315 default=[], 

316 ) 

317 fileIgnorePatterns = ListField( 

318 "Filename globs that should be ignored instead of being treated as datasets.", 

319 dtype=str, 

320 default=[ 

321 "README.txt", 

322 "*.*~*", 

323 "butler.yaml", 

324 "gen3.sqlite3", 

325 "registry.sqlite3", 

326 "calibRegistry.sqlite3", 

327 "_mapper", 

328 "_parent", 

329 "repositoryCfg.yaml", 

330 ], 

331 ) 

332 rawDatasetType = Field( 

333 "Gen2 dataset type to use for raw data.", 

334 dtype=str, 

335 default="raw", 

336 ) 

337 datasetIncludePatterns = ListField( 

338 "Glob-style patterns for dataset type names that should be converted.", dtype=str, default=["*"] 

339 ) 

340 datasetIgnorePatterns = ListField( 

341 "Glob-style patterns for dataset type names that should not be " 

342 "converted despite matching a pattern in datasetIncludePatterns.", 

343 dtype=str, 

344 default=[], 

345 ) 

346 datasetTemplateOverrides = DictField( 

347 "Overrides for Gen2 filename templates, keyed by dataset type. " 

348 "This can be used to support conversions of Gen2 repos whose mapper " 

349 "templates were modified in obs_* packages since the datasets were " 

350 "written.", 

351 keytype=str, 

352 itemtype=str, 

353 default={}, 

354 ) 

355 ccdKey = Field( 

356 "Key used for the Gen2 equivalent of 'detector' in data IDs.", 

357 dtype=str, 

358 default="ccd", 

359 ) 

360 relatedOnly = Field( 

361 "If True (default), only convert datasets that are related to the " 

362 "ingested visits. Ignored unless a list of visits is passed to " 

363 "run().", 

364 dtype=bool, 

365 default=False, 

366 ) 

367 doExpandDataIds = Field( 

368 "If True (default), expand data IDs to include extra metadata before " 

369 "ingesting them. " 

370 "This may be required in order to associate calibration datasets with " 

371 "validity ranges or populate file templates, so setting this to False " 

372 "is considered advanced usage (and it may not always work). When it " 

373 "does, it can provide a considerable speedup.", 

374 dtype=bool, 

375 default=True, 

376 ) 

377 doMakeUmbrellaCollection = Field( 

378 "If True (default), define an '<instrument>/defaults' CHAINED " 

379 "collection that includes everything found in the root repo as well " 

380 "as the default calibration collection.", 

381 dtype=bool, 

382 default=True, 

383 ) 

384 extraUmbrellaChildren = ListField( 

385 "Additional child collections to include in the umbrella collection. " 

386 "Ignored if doMakeUmbrellaCollection=False.", 

387 dtype=str, 

388 default=[], 

389 ) 

390 

391 @property 

392 def transfer(self): 

393 return self.raws.transfer 

394 

395 @transfer.setter 

396 def transfer(self, value): 

397 self.raws.transfer = value 

398 

399 def setDefaults(self): 

400 self.transfer = None 

401 

402 def validate(self): 

403 super().validate() 

404 if self.relatedOnly and not self.doExpandDataIds: 

405 raise ValueError("relatedOnly requires doExpandDataIds.") 

406 

407 

408class ConvertRepoTask(Task): 

409 """A task that converts one or more related Gen2 data repositories to a 

410 single Gen3 data repository (with multiple collections). 

411 

412 Parameters 

413 ---------- 

414 config: `ConvertRepoConfig` 

415 Configuration for this task. 

416 butler3: `lsst.daf.butler.Butler` 

417 A writeable Gen3 Butler instance that represents the data repository 

418 that datasets will be ingested into. If the 'raw' dataset is 

419 configured to be included in the conversion, ``butler3.run`` should be 

420 set to the name of the collection raws should be ingested into, and 

421 ``butler3.collections`` should include a calibration collection from 

422 which the ``camera`` dataset can be loaded, unless a calibration repo 

423 is converted and ``doWriteCuratedCalibrations`` is `True`. 

424 instrument : `lsst.obs.base.Instrument` 

425 The Gen3 instrument that should be used for this conversion. 

426 dry_run : `bool`, optional 

427 If `True` (`False` is default), make no changes to the Gen3 data 

428 repository while running as many steps as possible. This option is 

429 best used with a read-only ``butler3`` argument to ensure unexpected 

430 edge cases respect this argument (and fail rather than write if they 

431 do not). 

432 **kwargs 

433 Other keyword arguments are forwarded to the `Task` constructor. 

434 

435 Notes 

436 ----- 

437 Most of the work of converting repositories is delegated to instances of 

438 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 

439 only state that is relevant for all Gen2 repositories being ingested, while 

440 each `RepoConverter` instance holds only state relevant for the conversion 

441 of a single Gen2 repository. Both the task and the `RepoConverter` 

442 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 

443 methods may only be called once on a particular instance. 

444 """ 

445 

446 ConfigClass = ConvertRepoConfig 

447 

448 _DefaultName = "convertRepo" 

449 

450 def __init__( 

451 self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False, **kwargs 

452 ): 

453 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here. 

454 super().__init__(config, **kwargs) 

455 # Make self.butler3 one that doesn't have any collections associated 

456 # with it - those are needed by RawIngestTask and DefineVisitsTask, but 

457 # we don't want them messing with converted datasets, because those 

458 # have their own logic for figuring out which collections to write to. 

459 self.butler3 = Butler3(butler=butler3) 

460 self.registry = self.butler3.registry 

461 self.universe = self.registry.dimensions 

462 if self.isDatasetTypeIncluded("raw"): 

463 self.makeSubtask("raws", butler=butler3) 

464 self.makeSubtask("defineVisits", butler=butler3) 

465 else: 

466 self.raws = None 

467 self.defineVisits = None 

468 self.instrument = instrument 

469 self._configuredSkyMapsBySha1 = {} 

470 self._configuredSkyMapsByName = {} 

471 for name, config in self.config.skyMaps.items(): 

472 instance = config.skyMap.apply() 

473 self._populateSkyMapDicts(name, instance) 

474 self._usedSkyPix = set() 

475 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory() 

476 self.translatorFactory.log = self.log.getChild("translators") 

477 self.dry_run = dry_run 

478 

479 def _reduce_kwargs(self): 

480 # Add extra parameters to pickle 

481 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument) 

482 

483 def _populateSkyMapDicts(self, name, instance): 

484 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance) 

485 self._configuredSkyMapsBySha1[struct.sha1] = struct 

486 self._configuredSkyMapsByName[struct.name] = struct 

487 

488 def isDatasetTypeIncluded(self, datasetTypeName: str): 

489 """Return `True` if configuration indicates that the given dataset type 

490 should be converted. 

491 

492 This method is intended to be called primarily by the 

493 `RepoConverter` instances used interally by the task. 

494 

495 Parameters 

496 ---------- 

497 datasetTypeName: str 

498 Name of the dataset type. 

499 

500 Returns 

501 ------- 

502 included : `bool` 

503 Whether the dataset should be included in the conversion. 

504 """ 

505 return any( 

506 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIncludePatterns 

507 ) and not any( 

508 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIgnorePatterns 

509 ) 

510 

511 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str: 

512 """Indicate that a repository uses the given SkyMap. 

513 

514 This method is intended to be called primarily by the 

515 `RepoConverter` instances used interally by the task. 

516 

517 Parameters 

518 ---------- 

519 skyMap : `lsst.skymap.BaseSkyMap` 

520 SkyMap instance being used, typically retrieved from a Gen2 

521 data repository. 

522 skyMapName : `str` 

523 The name of the gen2 skymap, for error reporting. 

524 

525 Returns 

526 ------- 

527 name : `str` 

528 The name of the skymap in Gen3 data IDs. 

529 

530 Raises 

531 ------ 

532 LookupError 

533 Raised if the specified skymap cannot be found. 

534 """ 

535 sha1 = skyMap.getSha1() 

536 if sha1 not in self._configuredSkyMapsBySha1: 

537 self._populateSkyMapDicts(skyMapName, skyMap) 

538 try: 

539 struct = self._configuredSkyMapsBySha1[sha1] 

540 except KeyError as err: 

541 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration." 

542 raise LookupError(msg) from err 

543 struct.used = True 

544 return struct.name 

545 

546 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]): 

547 """Register all skymaps that have been marked as used. 

548 

549 This method is intended to be called primarily by the 

550 `RepoConverter` instances used interally by the task. 

551 

552 Parameters 

553 ---------- 

554 subset : `ConversionSubset`, optional 

555 Object that will be used to filter converted datasets by data ID. 

556 If given, it will be updated with the tracts of this skymap that 

557 overlap the visits in the subset. 

558 """ 

559 for struct in self._configuredSkyMapsBySha1.values(): 

560 if struct.used: 

561 if not self.dry_run: 

562 try: 

563 # If the skymap isn't registerd, this will raise. 

564 self.butler3.registry.expandDataId(skymap=struct.name) 

565 except LookupError: 

566 self.log.info("Registering skymap %s.", struct.name) 

567 struct.instance.register(struct.name, self.butler3) 

568 if subset is not None and self.config.relatedOnly: 

569 subset.addSkyMap(self.registry, struct.name) 

570 

571 def useSkyPix(self, dimension: SkyPixDimension): 

572 """Indicate that a repository uses the given SkyPix dimension. 

573 

574 This method is intended to be called primarily by the 

575 `RepoConverter` instances used interally by the task. 

576 

577 Parameters 

578 ---------- 

579 dimension : `lsst.daf.butler.SkyPixDimension` 

580 Dimension represening a pixelization of the sky. 

581 """ 

582 self._usedSkyPix.add(dimension) 

583 

584 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]): 

585 """Register all skymaps that have been marked as used. 

586 

587 This method is intended to be called primarily by the 

588 `RepoConverter` instances used interally by the task. 

589 

590 Parameters 

591 ---------- 

592 subset : `ConversionSubset`, optional 

593 Object that will be used to filter converted datasets by data ID. 

594 If given, it will be updated with the pixelization IDs that 

595 overlap the visits in the subset. 

596 """ 

597 if subset is not None and self.config.relatedOnly: 

598 for dimension in self._usedSkyPix: 

599 subset.addSkyPix(self.registry, dimension) 

600 

601 def run( 

602 self, 

603 root: str, 

604 *, 

605 calibs: Optional[List[CalibRepo]] = None, 

606 reruns: Optional[List[Rerun]] = None, 

607 visits: Optional[Iterable[int]] = None, 

608 pool: Optional[Pool] = None, 

609 processes: int = 1, 

610 ): 

611 """Convert a group of related data repositories. 

612 

613 Parameters 

614 ---------- 

615 root : `str` 

616 Complete path to the root Gen2 data repository. This should be 

617 a data repository that includes a Gen2 registry and any raw files 

618 and/or reference catalogs. 

619 calibs : `list` of `CalibRepo` 

620 Specifications for Gen2 calibration repos to convert. If `None` 

621 (default), curated calibrations only will be written to the default 

622 calibration collection for this instrument; set to ``()`` explictly 

623 to disable this. 

624 reruns : `list` of `Rerun` 

625 Specifications for rerun (processing output) repos to convert. If 

626 `None` (default), no reruns are converted. 

627 visits : iterable of `int`, optional 

628 The integer IDs of visits to convert. If not provided, all visits 

629 in the Gen2 root repository will be converted. 

630 pool : `multiprocessing.Pool`, optional 

631 If not `None`, a process pool with which to parallelize some 

632 operations. 

633 processes : `int`, optional 

634 The number of processes to use for conversion. 

635 """ 

636 if pool is None and processes > 1: 

637 pool = Pool(processes) 

638 if calibs is None: 

639 calibs = [CalibRepo(path=None)] 

640 elif calibs and not self.config.doExpandDataIds: 

641 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.") 

642 if visits is not None: 

643 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits)) 

644 else: 

645 if self.config.relatedOnly: 

646 self.log.warning( 

647 "config.relatedOnly is True but all visits are being ingested; " 

648 "no filtering will be done." 

649 ) 

650 subset = None 

651 if not self.config.doExpandDataIds and self.butler3.datastore.needs_expanded_data_ids( 

652 self.config.transfer 

653 ): 

654 self.log.warning( 

655 "config.doExpandDataIds=False but datastore reports that expanded data IDs may be needed.", 

656 self.config.transfer, 

657 ) 

658 

659 # Check that at most one CalibRepo is marked as default, to fail before 

660 # we actually write anything. 

661 defaultCalibRepos = [c.path for c in calibs if c.default] 

662 if len(defaultCalibRepos) > 1: 

663 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.") 

664 

665 # Make converters for all Gen2 repos. 

666 converters = [] 

667 # Start with the root repo, which must always be given even if we are 

668 # not configured to convert anything from it. 

669 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument) 

670 converters.append(rootConverter) 

671 # Calibration repos are next. 

672 for spec in calibs: 

673 calibRoot = spec.path 

674 if calibRoot is not None: 

675 if not os.path.isabs(calibRoot): 

676 calibRoot = os.path.join(rootConverter.root, calibRoot) 

677 converter = CalibRepoConverter( 

678 task=self, 

679 root=calibRoot, 

680 labels=spec.labels, 

681 instrument=self.instrument, 

682 mapper=rootConverter.mapper, 

683 subset=rootConverter.subset, 

684 ) 

685 converters.append(converter) 

686 # CalibRepo entries that don't have a path are just there for 

687 # curated calibs and maybe to set up a collection pointer; that's 

688 # handled further down (after we've done everything we can that 

689 # doesn't involve actually writing to the output Gen3 repo). 

690 # And now reruns. 

691 rerunConverters = {} 

692 for spec in reruns: 

693 runRoot = spec.path 

694 if not os.path.isabs(runRoot): 

695 runRoot = os.path.join(rootConverter.root, runRoot) 

696 spec.guessCollectionNames(self.instrument, rootConverter.root) 

697 converter = StandardRepoConverter( 

698 task=self, 

699 root=runRoot, 

700 run=spec.runName, 

701 instrument=self.instrument, 

702 subset=rootConverter.subset, 

703 ) 

704 converters.append(converter) 

705 rerunConverters[spec.runName] = converter 

706 

707 # Walk Gen2 repos to find datasets to convert. 

708 for converter in converters: 

709 converter.prep() 

710 

711 # Register the instrument if we're configured to do so. 

712 if self.config.doRegisterInstrument and not self.dry_run: 

713 self.instrument.register(self.registry) 

714 

715 # Run raw ingest (does nothing if we weren't configured to convert the 

716 # 'raw' dataset type). 

717 rootConverter.runRawIngest(pool=pool) 

718 

719 # Write curated calibrations to all calibration collections where they 

720 # were requested (which may be implicit, by passing calibs=None). Also 

721 # set up a CHAINED collection that points to the default CALIBRATION 

722 # collection if one is needed. 

723 if not self.dry_run: 

724 for spec in calibs: 

725 if spec.curated: 

726 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels) 

727 if spec.default and spec.labels: 

728 # This is guaranteed to be True at most once in the loop by 

729 # logic at the top of this method. 

730 defaultCalibName = self.instrument.makeCalibrationCollectionName() 

731 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED) 

732 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels) 

733 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION) 

734 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName]) 

735 

736 # Define visits (also does nothing if we weren't configurd to convert 

737 # the 'raw' dataset type). 

738 rootConverter.runDefineVisits(pool=pool) 

739 

740 # Insert dimensions that are potentially shared by all Gen2 

741 # repositories (and are hence managed directly by the Task, rather 

742 # than a converter instance). 

743 # This also finishes setting up the (shared) converter.subsets object 

744 # that is used to filter data IDs for config.relatedOnly. 

745 self.registerUsedSkyMaps(rootConverter.subset) 

746 self.registerUsedSkyPix(rootConverter.subset) 

747 

748 # Look for datasets, generally by scanning the filesystem. 

749 # This requires dimensions to have already been inserted so we can use 

750 # dimension information to identify related datasets. 

751 for converter in converters: 

752 converter.findDatasets() 

753 

754 # Expand data IDs. 

755 if self.config.doExpandDataIds: 

756 for converter in converters: 

757 converter.expandDataIds() 

758 

759 if self.dry_run: 

760 return 

761 

762 # Actually ingest datasets. 

763 for converter in converters: 

764 converter.ingest() 

765 

766 # Perform any post-ingest processing. 

767 for converter in converters: 

768 converter.finish() 

769 

770 # Make the umbrella collection, if desired. 

771 if self.config.doMakeUmbrellaCollection: 

772 umbrella = self.instrument.makeUmbrellaCollectionName() 

773 self.registry.registerCollection(umbrella, CollectionType.CHAINED) 

774 children = list(self.registry.getCollectionChain(umbrella)) 

775 children.extend(rootConverter.getCollectionChain()) 

776 children.append(self.instrument.makeCalibrationCollectionName()) 

777 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children: 

778 # Ensure the umbrella collection includes the global skymap 

779 # collection, even if it's currently empty. 

780 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

781 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

782 children.extend(self.config.extraUmbrellaChildren) 

783 self.log.info("Defining %s from chain %s.", umbrella, children) 

784 self.registry.setCollectionChain(umbrella, children) 

785 

786 # Add chained collections for reruns. 

787 for spec in reruns: 

788 if spec.chainName is not None: 

789 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED) 

790 chain = [spec.runName] 

791 chain.extend(rerunConverters[spec.runName].getCollectionChain()) 

792 for parent in spec.parents: 

793 chain.append(parent) 

794 parentConverter = rerunConverters.get(parent) 

795 if parentConverter is not None: 

796 chain.extend(parentConverter.getCollectionChain()) 

797 chain.extend(rootConverter.getCollectionChain()) 

798 if len(calibs) == 1: 

799 # Exactly one calibration repo being converted, so it's 

800 # safe-ish to assume that's the one the rerun used. 

801 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels)) 

802 self.log.info("Defining %s from chain %s.", spec.chainName, chain) 

803 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)