Coverage for python/lsst/obs/base/gen2to3/convertRepo.py: 25%

251 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-05 18:01 -0800

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"] 

24 

25import os 

26import fnmatch 

27from dataclasses import dataclass 

28from multiprocessing import Pool 

29from typing import Iterable, Optional, List, Tuple 

30 

31from lsst.daf.butler import ( 

32 Butler as Butler3, 

33 ButlerURI, 

34 CollectionType, 

35 SkyPixDimension 

36) 

37from lsst.pex.config import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field 

38from lsst.pipe.base import Task 

39from lsst.skymap import skyMapRegistry, BaseSkyMap 

40 

41from ..ingest import RawIngestTask 

42from ..defineVisits import DefineVisitsTask 

43from .repoConverter import ConversionSubset 

44from .rootRepoConverter import RootRepoConverter 

45from .calibRepoConverter import CalibRepoConverter 

46from .standardRepoConverter import StandardRepoConverter 

47from .._instrument import Instrument 

48 

49 

50@dataclass 

51class ConfiguredSkyMap: 

52 """Struct containing information about a skymap that may appear in a Gen2 

53 repository. 

54 """ 

55 

56 name: str 

57 """Name of the skymap used in Gen3 data IDs. 

58 """ 

59 

60 sha1: bytes 

61 """Hash computed by `BaseSkyMap.getSha1`. 

62 """ 

63 

64 instance: BaseSkyMap 

65 """Name of the skymap used in Gen3 data IDs. 

66 """ 

67 

68 used: bool = False 

69 """Whether this skymap has been found in at least one repository being 

70 converted. 

71 """ 

72 

73 

74def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]: 

75 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`. 

76 Otherwise return ``s`` and `False`. 

77 """ 

78 if s.startswith(prefix): 

79 return s[len(prefix):], True 

80 return s, False 

81 

82 

83@dataclass 

84class Rerun: 

85 """Specification for a Gen2 processing-output repository to convert. 

86 """ 

87 

88 path: str 

89 """Absolute or relative (to the root repository) path to the Gen2 

90 repository (`str`). 

91 """ 

92 

93 runName: Optional[str] 

94 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets 

95 will be inserted into (`str` or `None`). 

96 

97 If `None`, a name will be guessed by calling `guessCollectionNames`. 

98 """ 

99 

100 chainName: Optional[str] 

101 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will 

102 combine this repository's datasets with those of its parent repositories 

103 (`str` or `None`). 

104 

105 If `None`, a name will be guessed by calling `guessCollectionNames`. 

106 """ 

107 

108 parents: List[str] 

109 """Collection names associated with parent repositories, used to define the 

110 chained collection (`list` [ `str` ]). 

111 

112 Ignored if `chainName` is `None`. Runs used in the root repo are 

113 automatically included. 

114 """ 

115 

116 def guessCollectionNames(self, instrument: Instrument, root: str) -> None: 

117 """Update `runName` and `chainName` with guesses that match Gen3 naming 

118 conventions. 

119 

120 If `chainName` is not `None`, and `runName` is, `runName` will be set 

121 from it. If `runName` is already set, nothing will be changed, and 

122 if `chainName` is `None`, no chained collection will be created. 

123 

124 Parameters 

125 ---------- 

126 instrument : `Instrument` 

127 Instrument object for the repository being converted. 

128 root : `str` 

129 Path to the root repository. If this is present at the start of 

130 ``self.path``, it will be stripped as part of generating the run 

131 name. 

132 

133 Raises 

134 ------ 

135 ValueError 

136 Raised if the appropriate collection names cannot be inferred. 

137 """ 

138 if self.runName is not None: 

139 return 

140 if self.chainName is None: 

141 if os.path.isabs(self.path): 

142 rerunURI = ButlerURI(self.path) 

143 rootURI = ButlerURI(root) 

144 chainName = rerunURI.relative_to(rootURI) 

145 if chainName is None: 

146 raise ValueError( 

147 f"Cannot guess run name collection for rerun at '{self.path}': " 

148 f"no clear relationship to root '{root}'." 

149 ) 

150 else: 

151 chainName = self.path 

152 chainName, _ = _dropPrefix(chainName, "rerun/") 

153 chainName, isPersonal = _dropPrefix(chainName, "private/") 

154 if isPersonal: 

155 chainName = f"u/{chainName}" 

156 else: 

157 chainName, _ = _dropPrefix(chainName, "shared/") 

158 chainName = instrument.makeCollectionName("runs", chainName) 

159 self.chainName = chainName 

160 self.runName = f"{self.chainName}/direct" 

161 

162 

163@dataclass 

164class CalibRepo: 

165 """Specification for a Gen2 calibration repository to convert. 

166 """ 

167 

168 path: Optional[str] 

169 """Absolute or relative (to the root repository) path to the Gen2 

170 repository (`str` or `None`). 

171 

172 If `None`, no calibration datasets will be converted from Gen2, but 

173 curated calibrations may still be written. 

174 """ 

175 

176 curated: bool = True 

177 """If `True`, write curated calibrations into the associated 

178 ``CALIBRATION`` collection (`bool`). 

179 """ 

180 

181 labels: Tuple[str, ...] = () 

182 """Extra strings to insert into collection names, including both the 

183 ``RUN`` collections that datasets are ingested directly into and the 

184 ``CALIBRATION`` collection that associates them with validity ranges. 

185 

186 An empty tuple will directly populate the default calibration collection 

187 for this instrument with the converted datasets, and is incompatible with 

188 ``default=False``. This is a good choice for test data repositories where 

189 only one ``CALIBRATION`` collection will ever exist. In other cases, this 

190 should be a non-empty tuple, so the default calibration collection can 

191 actually be a ``CHAINED`` collection pointer that points to the current 

192 recommended ``CALIBRATION`` collection. 

193 """ 

194 

195 default: bool = True 

196 """If `True`, the created ``CALIBRATION`` collection should be the default 

197 for this instrument. 

198 

199 This field may only be `True` for one converted calibration collection if 

200 more than one is passed to `ConvertRepoTask.run`. It defaults to `True` 

201 because the vast majority of the time only one calibration collection is 

202 being converted. If ``labels`` is not empty, ``default=True`` will cause 

203 a ``CHAINED`` collection that points to the converted ``CALIBRATION`` 

204 collection to be defined. If ``labels`` is empty, ``default`` *must* be 

205 `True` and no ``CHAINED`` collection pointer is necessary. 

206 """ 

207 

208 def __post_init__(self) -> None: 

209 if not self.labels and not self.default: 

210 raise ValueError("labels=() requires default=True") 

211 

212 

213class ConvertRepoSkyMapConfig(Config): 

214 """Sub-config used to hold the parameters of a SkyMap. 

215 

216 Notes 

217 ----- 

218 This config only needs to exist because we can't put a 

219 `~lsst.pex.config.RegistryField` directly inside a 

220 `~lsst.pex.config.ConfigDictField`. 

221 

222 It needs to have its only field named "skyMap" for compatibility with the 

223 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 

224 use one config file in an obs package to configure both. 

225 

226 This name leads to unfortunate repetition with the field named 

227 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 

228 unavoidable. 

229 """ 

230 skyMap = skyMapRegistry.makeField( 

231 doc="Type and parameters for the SkyMap itself.", 

232 default="dodeca", 

233 ) 

234 

235 

236class ConvertRepoConfig(Config): 

237 raws = ConfigurableField( 

238 "Configuration for subtask responsible for ingesting raws and adding " 

239 "exposure dimension entries.", 

240 target=RawIngestTask, 

241 ) 

242 defineVisits = ConfigurableField( 

243 "Configuration for the subtask responsible for defining visits from " 

244 "exposures.", 

245 target=DefineVisitsTask, 

246 ) 

247 skyMaps = ConfigDictField( 

248 "Mapping from Gen3 skymap name to the parameters used to construct a " 

249 "BaseSkyMap instance. This will be used to associate names with " 

250 "existing skymaps found in the Gen2 repo.", 

251 keytype=str, 

252 itemtype=ConvertRepoSkyMapConfig, 

253 default={} 

254 ) 

255 rootSkyMapName = Field( 

256 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for " 

257 "datasets in the root repository when no SkyMap is found there. ", 

258 dtype=str, 

259 optional=True, 

260 default=None, 

261 ) 

262 runs = DictField( 

263 "A mapping from dataset type name to the RUN collection they should " 

264 "be inserted into. This must include all datasets that can be found " 

265 "in the root repository; other repositories will use per-repository " 

266 "runs.", 

267 keytype=str, 

268 itemtype=str, 

269 default={}, 

270 ) 

271 runsForced = DictField( 

272 "Like ``runs``, but is used even when the dataset is present in a " 

273 "non-root repository (i.e. rerun), overriding the non-root " 

274 "repository's main collection.", 

275 keytype=str, 

276 itemtype=str, 

277 default={ 

278 "brightObjectMask": "masks", 

279 } 

280 ) 

281 storageClasses = DictField( 

282 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 

283 "or 'persistable') to the Gen3 StorageClass name.", 

284 keytype=str, 

285 itemtype=str, 

286 default={ 

287 "bias": "ExposureF", 

288 "dark": "ExposureF", 

289 "flat": "ExposureF", 

290 "defects": "Defects", 

291 "crosstalk": "CrosstalkCalib", 

292 "BaseSkyMap": "SkyMap", 

293 "BaseCatalog": "Catalog", 

294 "BackgroundList": "Background", 

295 "raw": "Exposure", 

296 "MultilevelParquetTable": "DataFrame", 

297 "ParquetTable": "DataFrame", 

298 "SkyWcs": "Wcs", 

299 } 

300 ) 

301 formatterClasses = DictField( 

302 "Mapping from dataset type name to formatter class. " 

303 "By default these are derived from the formatters listed in the" 

304 " Gen3 datastore configuration.", 

305 keytype=str, 

306 itemtype=str, 

307 default={} 

308 ) 

309 targetHandlerClasses = DictField( 

310 "Mapping from dataset type name to target handler class.", 

311 keytype=str, 

312 itemtype=str, 

313 default={} 

314 ) 

315 doRegisterInstrument = Field( 

316 "If True (default), add dimension records for the Instrument and its " 

317 "filters and detectors to the registry instead of assuming they are " 

318 "already present.", 

319 dtype=bool, 

320 default=True, 

321 ) 

322 refCats = ListField( 

323 "The names of reference catalogs (subdirectories under ref_cats) to " 

324 "be converted", 

325 dtype=str, 

326 default=[] 

327 ) 

328 fileIgnorePatterns = ListField( 

329 "Filename globs that should be ignored instead of being treated as " 

330 "datasets.", 

331 dtype=str, 

332 default=["README.txt", "*.*~*", "butler.yaml", "gen3.sqlite3", 

333 "registry.sqlite3", "calibRegistry.sqlite3", "_mapper", 

334 "_parent", "repositoryCfg.yaml"] 

335 ) 

336 rawDatasetType = Field( 

337 "Gen2 dataset type to use for raw data.", 

338 dtype=str, 

339 default="raw", 

340 ) 

341 datasetIncludePatterns = ListField( 

342 "Glob-style patterns for dataset type names that should be converted.", 

343 dtype=str, 

344 default=["*"] 

345 ) 

346 datasetIgnorePatterns = ListField( 

347 "Glob-style patterns for dataset type names that should not be " 

348 "converted despite matching a pattern in datasetIncludePatterns.", 

349 dtype=str, 

350 default=[] 

351 ) 

352 datasetTemplateOverrides = DictField( 

353 "Overrides for Gen2 filename templates, keyed by dataset type. " 

354 "This can be used to support conversions of Gen2 repos whose mapper " 

355 "templates were modified in obs_* packages since the datasets were " 

356 "written.", 

357 keytype=str, 

358 itemtype=str, 

359 default={}, 

360 ) 

361 ccdKey = Field( 

362 "Key used for the Gen2 equivalent of 'detector' in data IDs.", 

363 dtype=str, 

364 default="ccd", 

365 ) 

366 relatedOnly = Field( 

367 "If True (default), only convert datasets that are related to the " 

368 "ingested visits. Ignored unless a list of visits is passed to " 

369 "run().", 

370 dtype=bool, 

371 default=False, 

372 ) 

373 doExpandDataIds = Field( 

374 "If True (default), expand data IDs to include extra metadata before " 

375 "ingesting them. " 

376 "This may be required in order to associate calibration datasets with " 

377 "validity ranges or populate file templates, so setting this to False " 

378 "is considered advanced usage (and it may not always work). When it " 

379 "does, it can provide a considerable speedup.", 

380 dtype=bool, 

381 default=True, 

382 ) 

383 doMakeUmbrellaCollection = Field( 

384 "If True (default), define an '<instrument>/defaults' CHAINED " 

385 "collection that includes everything found in the root repo as well " 

386 "as the default calibration collection.", 

387 dtype=bool, 

388 default=True, 

389 ) 

390 extraUmbrellaChildren = ListField( 

391 "Additional child collections to include in the umbrella collection. " 

392 "Ignored if doMakeUmbrellaCollection=False.", 

393 dtype=str, 

394 default=[] 

395 ) 

396 

397 @property 

398 def transfer(self): 

399 return self.raws.transfer 

400 

401 @transfer.setter 

402 def transfer(self, value): 

403 self.raws.transfer = value 

404 

405 def setDefaults(self): 

406 self.transfer = None 

407 

408 def validate(self): 

409 super().validate() 

410 if self.relatedOnly and not self.doExpandDataIds: 

411 raise ValueError("relatedOnly requires doExpandDataIds.") 

412 

413 

414class ConvertRepoTask(Task): 

415 """A task that converts one or more related Gen2 data repositories to a 

416 single Gen3 data repository (with multiple collections). 

417 

418 Parameters 

419 ---------- 

420 config: `ConvertRepoConfig` 

421 Configuration for this task. 

422 butler3: `lsst.daf.butler.Butler` 

423 A writeable Gen3 Butler instance that represents the data repository 

424 that datasets will be ingested into. If the 'raw' dataset is 

425 configured to be included in the conversion, ``butler3.run`` should be 

426 set to the name of the collection raws should be ingested into, and 

427 ``butler3.collections`` should include a calibration collection from 

428 which the ``camera`` dataset can be loaded, unless a calibration repo 

429 is converted and ``doWriteCuratedCalibrations`` is `True`. 

430 instrument : `lsst.obs.base.Instrument` 

431 The Gen3 instrument that should be used for this conversion. 

432 dry_run : `bool`, optional 

433 If `True` (`False` is default), make no changes to the Gen3 data 

434 repository while running as many steps as possible. This option is 

435 best used with a read-only ``butler3`` argument to ensure unexpected 

436 edge cases respect this argument (and fail rather than write if they 

437 do not). 

438 **kwargs 

439 Other keyword arguments are forwarded to the `Task` constructor. 

440 

441 Notes 

442 ----- 

443 Most of the work of converting repositories is delegated to instances of 

444 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 

445 only state that is relevant for all Gen2 repositories being ingested, while 

446 each `RepoConverter` instance holds only state relevant for the conversion 

447 of a single Gen2 repository. Both the task and the `RepoConverter` 

448 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 

449 methods may only be called once on a particular instance. 

450 """ 

451 

452 ConfigClass = ConvertRepoConfig 

453 

454 _DefaultName = "convertRepo" 

455 

456 def __init__(self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False, 

457 **kwargs): 

458 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here. 

459 super().__init__(config, **kwargs) 

460 # Make self.butler3 one that doesn't have any collections associated 

461 # with it - those are needed by RawIngestTask and DefineVisitsTask, but 

462 # we don't want them messing with converted datasets, because those 

463 # have their own logic for figuring out which collections to write to. 

464 self.butler3 = Butler3(butler=butler3) 

465 self.registry = self.butler3.registry 

466 self.universe = self.registry.dimensions 

467 if self.isDatasetTypeIncluded("raw"): 

468 self.makeSubtask("raws", butler=butler3) 

469 self.makeSubtask("defineVisits", butler=butler3) 

470 else: 

471 self.raws = None 

472 self.defineVisits = None 

473 self.instrument = instrument 

474 self._configuredSkyMapsBySha1 = {} 

475 self._configuredSkyMapsByName = {} 

476 for name, config in self.config.skyMaps.items(): 

477 instance = config.skyMap.apply() 

478 self._populateSkyMapDicts(name, instance) 

479 self._usedSkyPix = set() 

480 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory() 

481 self.translatorFactory.log = self.log.getChild("translators") 

482 self.dry_run = dry_run 

483 

484 def _reduce_kwargs(self): 

485 # Add extra parameters to pickle 

486 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument) 

487 

488 def _populateSkyMapDicts(self, name, instance): 

489 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance) 

490 self._configuredSkyMapsBySha1[struct.sha1] = struct 

491 self._configuredSkyMapsByName[struct.name] = struct 

492 

493 def isDatasetTypeIncluded(self, datasetTypeName: str): 

494 """Return `True` if configuration indicates that the given dataset type 

495 should be converted. 

496 

497 This method is intended to be called primarily by the 

498 `RepoConverter` instances used interally by the task. 

499 

500 Parameters 

501 ---------- 

502 datasetTypeName: str 

503 Name of the dataset type. 

504 

505 Returns 

506 ------- 

507 included : `bool` 

508 Whether the dataset should be included in the conversion. 

509 """ 

510 return ( 

511 any(fnmatch.fnmatchcase(datasetTypeName, pattern) 

512 for pattern in self.config.datasetIncludePatterns) 

513 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern) 

514 for pattern in self.config.datasetIgnorePatterns) 

515 ) 

516 

517 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str: 

518 """Indicate that a repository uses the given SkyMap. 

519 

520 This method is intended to be called primarily by the 

521 `RepoConverter` instances used interally by the task. 

522 

523 Parameters 

524 ---------- 

525 skyMap : `lsst.skymap.BaseSkyMap` 

526 SkyMap instance being used, typically retrieved from a Gen2 

527 data repository. 

528 skyMapName : `str` 

529 The name of the gen2 skymap, for error reporting. 

530 

531 Returns 

532 ------- 

533 name : `str` 

534 The name of the skymap in Gen3 data IDs. 

535 

536 Raises 

537 ------ 

538 LookupError 

539 Raised if the specified skymap cannot be found. 

540 """ 

541 sha1 = skyMap.getSha1() 

542 if sha1 not in self._configuredSkyMapsBySha1: 

543 self._populateSkyMapDicts(skyMapName, skyMap) 

544 try: 

545 struct = self._configuredSkyMapsBySha1[sha1] 

546 except KeyError as err: 

547 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration." 

548 raise LookupError(msg) from err 

549 struct.used = True 

550 return struct.name 

551 

552 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]): 

553 """Register all skymaps that have been marked as used. 

554 

555 This method is intended to be called primarily by the 

556 `RepoConverter` instances used interally by the task. 

557 

558 Parameters 

559 ---------- 

560 subset : `ConversionSubset`, optional 

561 Object that will be used to filter converted datasets by data ID. 

562 If given, it will be updated with the tracts of this skymap that 

563 overlap the visits in the subset. 

564 """ 

565 for struct in self._configuredSkyMapsBySha1.values(): 

566 if struct.used: 

567 if not self.dry_run: 

568 try: 

569 # If the skymap isn't registerd, this will raise. 

570 self.butler3.registry.expandDataId(skymap=struct.name) 

571 except LookupError: 

572 self.log.info("Registering skymap %s.", struct.name) 

573 struct.instance.register(struct.name, self.butler3) 

574 if subset is not None and self.config.relatedOnly: 

575 subset.addSkyMap(self.registry, struct.name) 

576 

577 def useSkyPix(self, dimension: SkyPixDimension): 

578 """Indicate that a repository uses the given SkyPix dimension. 

579 

580 This method is intended to be called primarily by the 

581 `RepoConverter` instances used interally by the task. 

582 

583 Parameters 

584 ---------- 

585 dimension : `lsst.daf.butler.SkyPixDimension` 

586 Dimension represening a pixelization of the sky. 

587 """ 

588 self._usedSkyPix.add(dimension) 

589 

590 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]): 

591 """Register all skymaps that have been marked as used. 

592 

593 This method is intended to be called primarily by the 

594 `RepoConverter` instances used interally by the task. 

595 

596 Parameters 

597 ---------- 

598 subset : `ConversionSubset`, optional 

599 Object that will be used to filter converted datasets by data ID. 

600 If given, it will be updated with the pixelization IDs that 

601 overlap the visits in the subset. 

602 """ 

603 if subset is not None and self.config.relatedOnly: 

604 for dimension in self._usedSkyPix: 

605 subset.addSkyPix(self.registry, dimension) 

606 

607 def run(self, root: str, *, 

608 calibs: Optional[List[CalibRepo]] = None, 

609 reruns: Optional[List[Rerun]] = None, 

610 visits: Optional[Iterable[int]] = None, 

611 pool: Optional[Pool] = None, 

612 processes: int = 1): 

613 """Convert a group of related data repositories. 

614 

615 Parameters 

616 ---------- 

617 root : `str` 

618 Complete path to the root Gen2 data repository. This should be 

619 a data repository that includes a Gen2 registry and any raw files 

620 and/or reference catalogs. 

621 calibs : `list` of `CalibRepo` 

622 Specifications for Gen2 calibration repos to convert. If `None` 

623 (default), curated calibrations only will be written to the default 

624 calibration collection for this instrument; set to ``()`` explictly 

625 to disable this. 

626 reruns : `list` of `Rerun` 

627 Specifications for rerun (processing output) repos to convert. If 

628 `None` (default), no reruns are converted. 

629 visits : iterable of `int`, optional 

630 The integer IDs of visits to convert. If not provided, all visits 

631 in the Gen2 root repository will be converted. 

632 pool : `multiprocessing.Pool`, optional 

633 If not `None`, a process pool with which to parallelize some 

634 operations. 

635 processes : `int`, optional 

636 The number of processes to use for conversion. 

637 """ 

638 if pool is None and processes > 1: 

639 pool = Pool(processes) 

640 if calibs is None: 

641 calibs = [CalibRepo(path=None)] 

642 elif calibs and not self.config.doExpandDataIds: 

643 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.") 

644 if visits is not None: 

645 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits)) 

646 else: 

647 if self.config.relatedOnly: 

648 self.log.warning("config.relatedOnly is True but all visits are being ingested; " 

649 "no filtering will be done.") 

650 subset = None 

651 if (not self.config.doExpandDataIds 

652 and self.butler3.datastore.needs_expanded_data_ids(self.config.transfer)): 

653 self.log.warning("config.doExpandDataIds=False but datastore reports that expanded data " 

654 "IDs may be needed.", 

655 self.config.transfer) 

656 

657 # Check that at most one CalibRepo is marked as default, to fail before 

658 # we actually write anything. 

659 defaultCalibRepos = [c.path for c in calibs if c.default] 

660 if len(defaultCalibRepos) > 1: 

661 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.") 

662 

663 # Make converters for all Gen2 repos. 

664 converters = [] 

665 # Start with the root repo, which must always be given even if we are 

666 # not configured to convert anything from it. 

667 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument) 

668 converters.append(rootConverter) 

669 # Calibration repos are next. 

670 for spec in calibs: 

671 calibRoot = spec.path 

672 if calibRoot is not None: 

673 if not os.path.isabs(calibRoot): 

674 calibRoot = os.path.join(rootConverter.root, calibRoot) 

675 converter = CalibRepoConverter(task=self, root=calibRoot, 

676 labels=spec.labels, 

677 instrument=self.instrument, 

678 mapper=rootConverter.mapper, 

679 subset=rootConverter.subset) 

680 converters.append(converter) 

681 # CalibRepo entries that don't have a path are just there for 

682 # curated calibs and maybe to set up a collection pointer; that's 

683 # handled further down (after we've done everything we can that 

684 # doesn't involve actually writing to the output Gen3 repo). 

685 # And now reruns. 

686 rerunConverters = {} 

687 for spec in reruns: 

688 runRoot = spec.path 

689 if not os.path.isabs(runRoot): 

690 runRoot = os.path.join(rootConverter.root, runRoot) 

691 spec.guessCollectionNames(self.instrument, rootConverter.root) 

692 converter = StandardRepoConverter(task=self, root=runRoot, run=spec.runName, 

693 instrument=self.instrument, subset=rootConverter.subset) 

694 converters.append(converter) 

695 rerunConverters[spec.runName] = converter 

696 

697 # Walk Gen2 repos to find datasets to convert. 

698 for converter in converters: 

699 converter.prep() 

700 

701 # Register the instrument if we're configured to do so. 

702 if self.config.doRegisterInstrument and not self.dry_run: 

703 self.instrument.register(self.registry) 

704 

705 # Run raw ingest (does nothing if we weren't configured to convert the 

706 # 'raw' dataset type). 

707 rootConverter.runRawIngest(pool=pool) 

708 

709 # Write curated calibrations to all calibration collections where they 

710 # were requested (which may be implicit, by passing calibs=None). Also 

711 # set up a CHAINED collection that points to the default CALIBRATION 

712 # collection if one is needed. 

713 if not self.dry_run: 

714 for spec in calibs: 

715 if spec.curated: 

716 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels) 

717 if spec.default and spec.labels: 

718 # This is guaranteed to be True at most once in the loop by 

719 # logic at the top of this method. 

720 defaultCalibName = self.instrument.makeCalibrationCollectionName() 

721 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED) 

722 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels) 

723 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION) 

724 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName]) 

725 

726 # Define visits (also does nothing if we weren't configurd to convert 

727 # the 'raw' dataset type). 

728 rootConverter.runDefineVisits(pool=pool) 

729 

730 # Insert dimensions that are potentially shared by all Gen2 

731 # repositories (and are hence managed directly by the Task, rather 

732 # than a converter instance). 

733 # This also finishes setting up the (shared) converter.subsets object 

734 # that is used to filter data IDs for config.relatedOnly. 

735 self.registerUsedSkyMaps(rootConverter.subset) 

736 self.registerUsedSkyPix(rootConverter.subset) 

737 

738 # Look for datasets, generally by scanning the filesystem. 

739 # This requires dimensions to have already been inserted so we can use 

740 # dimension information to identify related datasets. 

741 for converter in converters: 

742 converter.findDatasets() 

743 

744 # Expand data IDs. 

745 if self.config.doExpandDataIds: 

746 for converter in converters: 

747 converter.expandDataIds() 

748 

749 if self.dry_run: 

750 return 

751 

752 # Actually ingest datasets. 

753 for converter in converters: 

754 converter.ingest() 

755 

756 # Perform any post-ingest processing. 

757 for converter in converters: 

758 converter.finish() 

759 

760 # Make the umbrella collection, if desired. 

761 if self.config.doMakeUmbrellaCollection: 

762 umbrella = self.instrument.makeUmbrellaCollectionName() 

763 self.registry.registerCollection(umbrella, CollectionType.CHAINED) 

764 children = list(self.registry.getCollectionChain(umbrella)) 

765 children.extend(rootConverter.getCollectionChain()) 

766 children.append(self.instrument.makeCalibrationCollectionName()) 

767 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children: 

768 # Ensure the umbrella collection includes the global skymap 

769 # collection, even if it's currently empty. 

770 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

771 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

772 children.extend(self.config.extraUmbrellaChildren) 

773 self.log.info("Defining %s from chain %s.", umbrella, children) 

774 self.registry.setCollectionChain(umbrella, children) 

775 

776 # Add chained collections for reruns. 

777 for spec in reruns: 

778 if spec.chainName is not None: 

779 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED) 

780 chain = [spec.runName] 

781 chain.extend(rerunConverters[spec.runName].getCollectionChain()) 

782 for parent in spec.parents: 

783 chain.append(parent) 

784 parentConverter = rerunConverters.get(parent) 

785 if parentConverter is not None: 

786 chain.extend(parentConverter.getCollectionChain()) 

787 chain.extend(rootConverter.getCollectionChain()) 

788 if len(calibs) == 1: 

789 # Exactly one calibration repo being converted, so it's 

790 # safe-ish to assume that's the one the rerun used. 

791 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels)) 

792 self.log.info("Defining %s from chain %s.", spec.chainName, chain) 

793 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)