Coverage for python/lsst/obs/base/gen2to3/convertRepo.py: 26%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

253 statements  

1# This file is part of obs_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"] 

24 

25import fnmatch 

26import os 

27from dataclasses import dataclass 

28from multiprocessing import Pool 

29from typing import Iterable, List, Optional, Tuple 

30 

31from lsst.daf.butler import Butler as Butler3 

32from lsst.daf.butler import CollectionType, SkyPixDimension 

33from lsst.pex.config import Config, ConfigDictField, ConfigurableField, DictField, Field, ListField 

34from lsst.pipe.base import Task 

35from lsst.resources import ResourcePath 

36from lsst.skymap import BaseSkyMap, skyMapRegistry 

37 

38from .._instrument import Instrument 

39from ..defineVisits import DefineVisitsTask 

40from ..ingest import RawIngestTask 

41from .calibRepoConverter import CalibRepoConverter 

42from .repoConverter import ConversionSubset 

43from .rootRepoConverter import RootRepoConverter 

44from .standardRepoConverter import StandardRepoConverter 

45 

46 

47@dataclass 

48class ConfiguredSkyMap: 

49 """Struct containing information about a skymap that may appear in a Gen2 

50 repository. 

51 """ 

52 

53 name: str 

54 """Name of the skymap used in Gen3 data IDs. 

55 """ 

56 

57 sha1: bytes 

58 """Hash computed by `BaseSkyMap.getSha1`. 

59 """ 

60 

61 instance: BaseSkyMap 

62 """Name of the skymap used in Gen3 data IDs. 

63 """ 

64 

65 used: bool = False 

66 """Whether this skymap has been found in at least one repository being 

67 converted. 

68 """ 

69 

70 

71def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]: 

72 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`. 

73 Otherwise return ``s`` and `False`. 

74 """ 

75 if s.startswith(prefix): 

76 return s[len(prefix) :], True 

77 return s, False 

78 

79 

80@dataclass 

81class Rerun: 

82 """Specification for a Gen2 processing-output repository to convert.""" 

83 

84 path: str 

85 """Absolute or relative (to the root repository) path to the Gen2 

86 repository (`str`). 

87 """ 

88 

89 runName: Optional[str] 

90 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets 

91 will be inserted into (`str` or `None`). 

92 

93 If `None`, a name will be guessed by calling `guessCollectionNames`. 

94 """ 

95 

96 chainName: Optional[str] 

97 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will 

98 combine this repository's datasets with those of its parent repositories 

99 (`str` or `None`). 

100 

101 If `None`, a name will be guessed by calling `guessCollectionNames`. 

102 """ 

103 

104 parents: List[str] 

105 """Collection names associated with parent repositories, used to define the 

106 chained collection (`list` [ `str` ]). 

107 

108 Ignored if `chainName` is `None`. Runs used in the root repo are 

109 automatically included. 

110 """ 

111 

112 def guessCollectionNames(self, instrument: Instrument, root: str) -> None: 

113 """Update `runName` and `chainName` with guesses that match Gen3 naming 

114 conventions. 

115 

116 If `chainName` is not `None`, and `runName` is, `runName` will be set 

117 from it. If `runName` is already set, nothing will be changed, and 

118 if `chainName` is `None`, no chained collection will be created. 

119 

120 Parameters 

121 ---------- 

122 instrument : `Instrument` 

123 Instrument object for the repository being converted. 

124 root : `str` 

125 Path to the root repository. If this is present at the start of 

126 ``self.path``, it will be stripped as part of generating the run 

127 name. 

128 

129 Raises 

130 ------ 

131 ValueError 

132 Raised if the appropriate collection names cannot be inferred. 

133 """ 

134 if self.runName is not None: 

135 return 

136 if self.chainName is None: 

137 if os.path.isabs(self.path): 

138 rerunURI = ResourcePath(self.path) 

139 rootURI = ResourcePath(root) 

140 chainName = rerunURI.relative_to(rootURI) 

141 if chainName is None: 

142 raise ValueError( 

143 f"Cannot guess run name collection for rerun at '{self.path}': " 

144 f"no clear relationship to root '{root}'." 

145 ) 

146 else: 

147 chainName = self.path 

148 chainName, _ = _dropPrefix(chainName, "rerun/") 

149 chainName, isPersonal = _dropPrefix(chainName, "private/") 

150 if isPersonal: 

151 chainName = f"u/{chainName}" 

152 else: 

153 chainName, _ = _dropPrefix(chainName, "shared/") 

154 chainName = instrument.makeCollectionName("runs", chainName) 

155 self.chainName = chainName 

156 self.runName = f"{self.chainName}/direct" 

157 

158 

159@dataclass 

160class CalibRepo: 

161 """Specification for a Gen2 calibration repository to convert.""" 

162 

163 path: Optional[str] 

164 """Absolute or relative (to the root repository) path to the Gen2 

165 repository (`str` or `None`). 

166 

167 If `None`, no calibration datasets will be converted from Gen2, but 

168 curated calibrations may still be written. 

169 """ 

170 

171 curated: bool = True 

172 """If `True`, write curated calibrations into the associated 

173 ``CALIBRATION`` collection (`bool`). 

174 """ 

175 

176 labels: Tuple[str, ...] = () 

177 """Extra strings to insert into collection names, including both the 

178 ``RUN`` collections that datasets are ingested directly into and the 

179 ``CALIBRATION`` collection that associates them with validity ranges. 

180 

181 An empty tuple will directly populate the default calibration collection 

182 for this instrument with the converted datasets, and is incompatible with 

183 ``default=False``. This is a good choice for test data repositories where 

184 only one ``CALIBRATION`` collection will ever exist. In other cases, this 

185 should be a non-empty tuple, so the default calibration collection can 

186 actually be a ``CHAINED`` collection pointer that points to the current 

187 recommended ``CALIBRATION`` collection. 

188 """ 

189 

190 default: bool = True 

191 """If `True`, the created ``CALIBRATION`` collection should be the default 

192 for this instrument. 

193 

194 This field may only be `True` for one converted calibration collection if 

195 more than one is passed to `ConvertRepoTask.run`. It defaults to `True` 

196 because the vast majority of the time only one calibration collection is 

197 being converted. If ``labels`` is not empty, ``default=True`` will cause 

198 a ``CHAINED`` collection that points to the converted ``CALIBRATION`` 

199 collection to be defined. If ``labels`` is empty, ``default`` *must* be 

200 `True` and no ``CHAINED`` collection pointer is necessary. 

201 """ 

202 

203 def __post_init__(self) -> None: 

204 if not self.labels and not self.default: 

205 raise ValueError("labels=() requires default=True") 

206 

207 

208class ConvertRepoSkyMapConfig(Config): 

209 """Sub-config used to hold the parameters of a SkyMap. 

210 

211 Notes 

212 ----- 

213 This config only needs to exist because we can't put a 

214 `~lsst.pex.config.RegistryField` directly inside a 

215 `~lsst.pex.config.ConfigDictField`. 

216 

217 It needs to have its only field named "skyMap" for compatibility with the 

218 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can 

219 use one config file in an obs package to configure both. 

220 

221 This name leads to unfortunate repetition with the field named 

222 "skymap" that holds it - "skyMap[name].skyMap" - but that seems 

223 unavoidable. 

224 """ 

225 

226 skyMap = skyMapRegistry.makeField( 

227 doc="Type and parameters for the SkyMap itself.", 

228 default="dodeca", 

229 ) 

230 

231 

232class ConvertRepoConfig(Config): 

233 raws = ConfigurableField( 

234 "Configuration for subtask responsible for ingesting raws and adding exposure dimension entries.", 

235 target=RawIngestTask, 

236 ) 

237 defineVisits = ConfigurableField( 

238 "Configuration for the subtask responsible for defining visits from exposures.", 

239 target=DefineVisitsTask, 

240 ) 

241 skyMaps = ConfigDictField( 

242 "Mapping from Gen3 skymap name to the parameters used to construct a " 

243 "BaseSkyMap instance. This will be used to associate names with " 

244 "existing skymaps found in the Gen2 repo.", 

245 keytype=str, 

246 itemtype=ConvertRepoSkyMapConfig, 

247 default={}, 

248 ) 

249 rootSkyMapName = Field( 

250 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for " 

251 "datasets in the root repository when no SkyMap is found there. ", 

252 dtype=str, 

253 optional=True, 

254 default=None, 

255 ) 

256 runs = DictField( 

257 "A mapping from dataset type name to the RUN collection they should " 

258 "be inserted into. This must include all datasets that can be found " 

259 "in the root repository; other repositories will use per-repository " 

260 "runs.", 

261 keytype=str, 

262 itemtype=str, 

263 default={}, 

264 ) 

265 runsForced = DictField( 

266 "Like ``runs``, but is used even when the dataset is present in a " 

267 "non-root repository (i.e. rerun), overriding the non-root " 

268 "repository's main collection.", 

269 keytype=str, 

270 itemtype=str, 

271 default={ 

272 "brightObjectMask": "masks", 

273 }, 

274 ) 

275 storageClasses = DictField( 

276 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' " 

277 "or 'persistable') to the Gen3 StorageClass name.", 

278 keytype=str, 

279 itemtype=str, 

280 default={ 

281 "bias": "ExposureF", 

282 "dark": "ExposureF", 

283 "flat": "ExposureF", 

284 "defects": "Defects", 

285 "crosstalk": "CrosstalkCalib", 

286 "BaseSkyMap": "SkyMap", 

287 "BaseCatalog": "Catalog", 

288 "BackgroundList": "Background", 

289 "raw": "Exposure", 

290 "MultilevelParquetTable": "DataFrame", 

291 "ParquetTable": "DataFrame", 

292 "SkyWcs": "Wcs", 

293 }, 

294 ) 

295 formatterClasses = DictField( 

296 "Mapping from dataset type name to formatter class. " 

297 "By default these are derived from the formatters listed in the" 

298 " Gen3 datastore configuration.", 

299 keytype=str, 

300 itemtype=str, 

301 default={}, 

302 ) 

303 targetHandlerClasses = DictField( 

304 "Mapping from dataset type name to target handler class.", keytype=str, itemtype=str, default={} 

305 ) 

306 doRegisterInstrument = Field( 

307 "If True (default), add dimension records for the Instrument and its " 

308 "filters and detectors to the registry instead of assuming they are " 

309 "already present.", 

310 dtype=bool, 

311 default=True, 

312 ) 

313 refCats = ListField( 

314 "The names of reference catalogs (subdirectories under ref_cats) to be converted", 

315 dtype=str, 

316 default=[], 

317 ) 

318 fileIgnorePatterns = ListField( 

319 "Filename globs that should be ignored instead of being treated as datasets.", 

320 dtype=str, 

321 default=[ 

322 "README.txt", 

323 "*.*~*", 

324 "butler.yaml", 

325 "gen3.sqlite3", 

326 "registry.sqlite3", 

327 "calibRegistry.sqlite3", 

328 "_mapper", 

329 "_parent", 

330 "repositoryCfg.yaml", 

331 ], 

332 ) 

333 rawDatasetType = Field( 

334 "Gen2 dataset type to use for raw data.", 

335 dtype=str, 

336 default="raw", 

337 ) 

338 datasetIncludePatterns = ListField( 

339 "Glob-style patterns for dataset type names that should be converted.", dtype=str, default=["*"] 

340 ) 

341 datasetIgnorePatterns = ListField( 

342 "Glob-style patterns for dataset type names that should not be " 

343 "converted despite matching a pattern in datasetIncludePatterns.", 

344 dtype=str, 

345 default=[], 

346 ) 

347 datasetTemplateOverrides = DictField( 

348 "Overrides for Gen2 filename templates, keyed by dataset type. " 

349 "This can be used to support conversions of Gen2 repos whose mapper " 

350 "templates were modified in obs_* packages since the datasets were " 

351 "written.", 

352 keytype=str, 

353 itemtype=str, 

354 default={}, 

355 ) 

356 ccdKey = Field( 

357 "Key used for the Gen2 equivalent of 'detector' in data IDs.", 

358 dtype=str, 

359 default="ccd", 

360 ) 

361 relatedOnly = Field( 

362 "If True (default), only convert datasets that are related to the " 

363 "ingested visits. Ignored unless a list of visits is passed to " 

364 "run().", 

365 dtype=bool, 

366 default=False, 

367 ) 

368 doExpandDataIds = Field( 

369 "If True (default), expand data IDs to include extra metadata before " 

370 "ingesting them. " 

371 "This may be required in order to associate calibration datasets with " 

372 "validity ranges or populate file templates, so setting this to False " 

373 "is considered advanced usage (and it may not always work). When it " 

374 "does, it can provide a considerable speedup.", 

375 dtype=bool, 

376 default=True, 

377 ) 

378 doMakeUmbrellaCollection = Field( 

379 "If True (default), define an '<instrument>/defaults' CHAINED " 

380 "collection that includes everything found in the root repo as well " 

381 "as the default calibration collection.", 

382 dtype=bool, 

383 default=True, 

384 ) 

385 extraUmbrellaChildren = ListField( 

386 "Additional child collections to include in the umbrella collection. " 

387 "Ignored if doMakeUmbrellaCollection=False.", 

388 dtype=str, 

389 default=[], 

390 ) 

391 

392 @property 

393 def transfer(self): 

394 return self.raws.transfer 

395 

396 @transfer.setter 

397 def transfer(self, value): 

398 self.raws.transfer = value 

399 

400 def setDefaults(self): 

401 self.transfer = None 

402 

403 def validate(self): 

404 super().validate() 

405 if self.relatedOnly and not self.doExpandDataIds: 

406 raise ValueError("relatedOnly requires doExpandDataIds.") 

407 

408 

409class ConvertRepoTask(Task): 

410 """A task that converts one or more related Gen2 data repositories to a 

411 single Gen3 data repository (with multiple collections). 

412 

413 Parameters 

414 ---------- 

415 config: `ConvertRepoConfig` 

416 Configuration for this task. 

417 butler3: `lsst.daf.butler.Butler` 

418 A writeable Gen3 Butler instance that represents the data repository 

419 that datasets will be ingested into. If the 'raw' dataset is 

420 configured to be included in the conversion, ``butler3.run`` should be 

421 set to the name of the collection raws should be ingested into, and 

422 ``butler3.collections`` should include a calibration collection from 

423 which the ``camera`` dataset can be loaded, unless a calibration repo 

424 is converted and ``doWriteCuratedCalibrations`` is `True`. 

425 instrument : `lsst.obs.base.Instrument` 

426 The Gen3 instrument that should be used for this conversion. 

427 dry_run : `bool`, optional 

428 If `True` (`False` is default), make no changes to the Gen3 data 

429 repository while running as many steps as possible. This option is 

430 best used with a read-only ``butler3`` argument to ensure unexpected 

431 edge cases respect this argument (and fail rather than write if they 

432 do not). 

433 **kwargs 

434 Other keyword arguments are forwarded to the `Task` constructor. 

435 

436 Notes 

437 ----- 

438 Most of the work of converting repositories is delegated to instances of 

439 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds 

440 only state that is relevant for all Gen2 repositories being ingested, while 

441 each `RepoConverter` instance holds only state relevant for the conversion 

442 of a single Gen2 repository. Both the task and the `RepoConverter` 

443 instances are single use; `ConvertRepoTask.run` and most `RepoConverter` 

444 methods may only be called once on a particular instance. 

445 """ 

446 

447 ConfigClass = ConvertRepoConfig 

448 

449 _DefaultName = "convertRepo" 

450 

451 def __init__( 

452 self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False, **kwargs 

453 ): 

454 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here. 

455 super().__init__(config, **kwargs) 

456 # Make self.butler3 one that doesn't have any collections associated 

457 # with it - those are needed by RawIngestTask and DefineVisitsTask, but 

458 # we don't want them messing with converted datasets, because those 

459 # have their own logic for figuring out which collections to write to. 

460 self.butler3 = Butler3(butler=butler3) 

461 self.registry = self.butler3.registry 

462 self.universe = self.registry.dimensions 

463 if self.isDatasetTypeIncluded("raw"): 

464 self.makeSubtask("raws", butler=butler3) 

465 self.makeSubtask("defineVisits", butler=butler3) 

466 else: 

467 self.raws = None 

468 self.defineVisits = None 

469 self.instrument = instrument 

470 self._configuredSkyMapsBySha1 = {} 

471 self._configuredSkyMapsByName = {} 

472 for name, config in self.config.skyMaps.items(): 

473 instance = config.skyMap.apply() 

474 self._populateSkyMapDicts(name, instance) 

475 self._usedSkyPix = set() 

476 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory() 

477 self.translatorFactory.log = self.log.getChild("translators") 

478 self.dry_run = dry_run 

479 

480 def _reduce_kwargs(self): 

481 # Add extra parameters to pickle 

482 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument) 

483 

484 def _populateSkyMapDicts(self, name, instance): 

485 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance) 

486 self._configuredSkyMapsBySha1[struct.sha1] = struct 

487 self._configuredSkyMapsByName[struct.name] = struct 

488 

489 def isDatasetTypeIncluded(self, datasetTypeName: str): 

490 """Return `True` if configuration indicates that the given dataset type 

491 should be converted. 

492 

493 This method is intended to be called primarily by the 

494 `RepoConverter` instances used interally by the task. 

495 

496 Parameters 

497 ---------- 

498 datasetTypeName: str 

499 Name of the dataset type. 

500 

501 Returns 

502 ------- 

503 included : `bool` 

504 Whether the dataset should be included in the conversion. 

505 """ 

506 return any( 

507 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIncludePatterns 

508 ) and not any( 

509 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIgnorePatterns 

510 ) 

511 

512 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str: 

513 """Indicate that a repository uses the given SkyMap. 

514 

515 This method is intended to be called primarily by the 

516 `RepoConverter` instances used interally by the task. 

517 

518 Parameters 

519 ---------- 

520 skyMap : `lsst.skymap.BaseSkyMap` 

521 SkyMap instance being used, typically retrieved from a Gen2 

522 data repository. 

523 skyMapName : `str` 

524 The name of the gen2 skymap, for error reporting. 

525 

526 Returns 

527 ------- 

528 name : `str` 

529 The name of the skymap in Gen3 data IDs. 

530 

531 Raises 

532 ------ 

533 LookupError 

534 Raised if the specified skymap cannot be found. 

535 """ 

536 sha1 = skyMap.getSha1() 

537 if sha1 not in self._configuredSkyMapsBySha1: 

538 self._populateSkyMapDicts(skyMapName, skyMap) 

539 try: 

540 struct = self._configuredSkyMapsBySha1[sha1] 

541 except KeyError as err: 

542 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration." 

543 raise LookupError(msg) from err 

544 struct.used = True 

545 return struct.name 

546 

547 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]): 

548 """Register all skymaps that have been marked as used. 

549 

550 This method is intended to be called primarily by the 

551 `RepoConverter` instances used interally by the task. 

552 

553 Parameters 

554 ---------- 

555 subset : `ConversionSubset`, optional 

556 Object that will be used to filter converted datasets by data ID. 

557 If given, it will be updated with the tracts of this skymap that 

558 overlap the visits in the subset. 

559 """ 

560 for struct in self._configuredSkyMapsBySha1.values(): 

561 if struct.used: 

562 if not self.dry_run: 

563 try: 

564 # If the skymap isn't registerd, this will raise. 

565 self.butler3.registry.expandDataId(skymap=struct.name) 

566 except LookupError: 

567 self.log.info("Registering skymap %s.", struct.name) 

568 struct.instance.register(struct.name, self.butler3) 

569 if subset is not None and self.config.relatedOnly: 

570 subset.addSkyMap(self.registry, struct.name) 

571 

572 def useSkyPix(self, dimension: SkyPixDimension): 

573 """Indicate that a repository uses the given SkyPix dimension. 

574 

575 This method is intended to be called primarily by the 

576 `RepoConverter` instances used interally by the task. 

577 

578 Parameters 

579 ---------- 

580 dimension : `lsst.daf.butler.SkyPixDimension` 

581 Dimension represening a pixelization of the sky. 

582 """ 

583 self._usedSkyPix.add(dimension) 

584 

585 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]): 

586 """Register all skymaps that have been marked as used. 

587 

588 This method is intended to be called primarily by the 

589 `RepoConverter` instances used interally by the task. 

590 

591 Parameters 

592 ---------- 

593 subset : `ConversionSubset`, optional 

594 Object that will be used to filter converted datasets by data ID. 

595 If given, it will be updated with the pixelization IDs that 

596 overlap the visits in the subset. 

597 """ 

598 if subset is not None and self.config.relatedOnly: 

599 for dimension in self._usedSkyPix: 

600 subset.addSkyPix(self.registry, dimension) 

601 

602 def run( 

603 self, 

604 root: str, 

605 *, 

606 calibs: Optional[List[CalibRepo]] = None, 

607 reruns: Optional[List[Rerun]] = None, 

608 visits: Optional[Iterable[int]] = None, 

609 pool: Optional[Pool] = None, 

610 processes: int = 1, 

611 ): 

612 """Convert a group of related data repositories. 

613 

614 Parameters 

615 ---------- 

616 root : `str` 

617 Complete path to the root Gen2 data repository. This should be 

618 a data repository that includes a Gen2 registry and any raw files 

619 and/or reference catalogs. 

620 calibs : `list` of `CalibRepo` 

621 Specifications for Gen2 calibration repos to convert. If `None` 

622 (default), curated calibrations only will be written to the default 

623 calibration collection for this instrument; set to ``()`` explictly 

624 to disable this. 

625 reruns : `list` of `Rerun` 

626 Specifications for rerun (processing output) repos to convert. If 

627 `None` (default), no reruns are converted. 

628 visits : iterable of `int`, optional 

629 The integer IDs of visits to convert. If not provided, all visits 

630 in the Gen2 root repository will be converted. 

631 pool : `multiprocessing.Pool`, optional 

632 If not `None`, a process pool with which to parallelize some 

633 operations. 

634 processes : `int`, optional 

635 The number of processes to use for conversion. 

636 """ 

637 if pool is None and processes > 1: 

638 pool = Pool(processes) 

639 if calibs is None: 

640 calibs = [CalibRepo(path=None)] 

641 elif calibs and not self.config.doExpandDataIds: 

642 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.") 

643 if visits is not None: 

644 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits)) 

645 else: 

646 if self.config.relatedOnly: 

647 self.log.warning( 

648 "config.relatedOnly is True but all visits are being ingested; " 

649 "no filtering will be done." 

650 ) 

651 subset = None 

652 if not self.config.doExpandDataIds and self.butler3.datastore.needs_expanded_data_ids( 

653 self.config.transfer 

654 ): 

655 self.log.warning( 

656 "config.doExpandDataIds=False but datastore reports that expanded data IDs may be needed.", 

657 self.config.transfer, 

658 ) 

659 

660 # Check that at most one CalibRepo is marked as default, to fail before 

661 # we actually write anything. 

662 defaultCalibRepos = [c.path for c in calibs if c.default] 

663 if len(defaultCalibRepos) > 1: 

664 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.") 

665 

666 # Make converters for all Gen2 repos. 

667 converters = [] 

668 # Start with the root repo, which must always be given even if we are 

669 # not configured to convert anything from it. 

670 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument) 

671 converters.append(rootConverter) 

672 # Calibration repos are next. 

673 for spec in calibs: 

674 calibRoot = spec.path 

675 if calibRoot is not None: 

676 if not os.path.isabs(calibRoot): 

677 calibRoot = os.path.join(rootConverter.root, calibRoot) 

678 converter = CalibRepoConverter( 

679 task=self, 

680 root=calibRoot, 

681 labels=spec.labels, 

682 instrument=self.instrument, 

683 mapper=rootConverter.mapper, 

684 subset=rootConverter.subset, 

685 ) 

686 converters.append(converter) 

687 # CalibRepo entries that don't have a path are just there for 

688 # curated calibs and maybe to set up a collection pointer; that's 

689 # handled further down (after we've done everything we can that 

690 # doesn't involve actually writing to the output Gen3 repo). 

691 # And now reruns. 

692 rerunConverters = {} 

693 for spec in reruns: 

694 runRoot = spec.path 

695 if not os.path.isabs(runRoot): 

696 runRoot = os.path.join(rootConverter.root, runRoot) 

697 spec.guessCollectionNames(self.instrument, rootConverter.root) 

698 converter = StandardRepoConverter( 

699 task=self, 

700 root=runRoot, 

701 run=spec.runName, 

702 instrument=self.instrument, 

703 subset=rootConverter.subset, 

704 ) 

705 converters.append(converter) 

706 rerunConverters[spec.runName] = converter 

707 

708 # Walk Gen2 repos to find datasets to convert. 

709 for converter in converters: 

710 converter.prep() 

711 

712 # Register the instrument if we're configured to do so. 

713 if self.config.doRegisterInstrument and not self.dry_run: 

714 self.instrument.register(self.registry) 

715 

716 # Run raw ingest (does nothing if we weren't configured to convert the 

717 # 'raw' dataset type). 

718 rootConverter.runRawIngest(pool=pool) 

719 

720 # Write curated calibrations to all calibration collections where they 

721 # were requested (which may be implicit, by passing calibs=None). Also 

722 # set up a CHAINED collection that points to the default CALIBRATION 

723 # collection if one is needed. 

724 if not self.dry_run: 

725 for spec in calibs: 

726 if spec.curated: 

727 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels) 

728 if spec.default and spec.labels: 

729 # This is guaranteed to be True at most once in the loop by 

730 # logic at the top of this method. 

731 defaultCalibName = self.instrument.makeCalibrationCollectionName() 

732 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED) 

733 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels) 

734 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION) 

735 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName]) 

736 

737 # Define visits (also does nothing if we weren't configurd to convert 

738 # the 'raw' dataset type). 

739 rootConverter.runDefineVisits() 

740 

741 # Insert dimensions that are potentially shared by all Gen2 

742 # repositories (and are hence managed directly by the Task, rather 

743 # than a converter instance). 

744 # This also finishes setting up the (shared) converter.subsets object 

745 # that is used to filter data IDs for config.relatedOnly. 

746 self.registerUsedSkyMaps(rootConverter.subset) 

747 self.registerUsedSkyPix(rootConverter.subset) 

748 

749 # Look for datasets, generally by scanning the filesystem. 

750 # This requires dimensions to have already been inserted so we can use 

751 # dimension information to identify related datasets. 

752 for converter in converters: 

753 converter.findDatasets() 

754 

755 # Expand data IDs. 

756 if self.config.doExpandDataIds: 

757 for converter in converters: 

758 converter.expandDataIds() 

759 

760 if self.dry_run: 

761 return 

762 

763 # Actually ingest datasets. 

764 for converter in converters: 

765 converter.ingest() 

766 

767 # Perform any post-ingest processing. 

768 for converter in converters: 

769 converter.finish() 

770 

771 # Make the umbrella collection, if desired. 

772 if self.config.doMakeUmbrellaCollection: 

773 umbrella = self.instrument.makeUmbrellaCollectionName() 

774 self.registry.registerCollection(umbrella, CollectionType.CHAINED) 

775 children = list(self.registry.getCollectionChain(umbrella)) 

776 children.extend(rootConverter.getCollectionChain()) 

777 children.append(self.instrument.makeCalibrationCollectionName()) 

778 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children: 

779 # Ensure the umbrella collection includes the global skymap 

780 # collection, even if it's currently empty. 

781 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

782 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME) 

783 children.extend(self.config.extraUmbrellaChildren) 

784 self.log.info("Defining %s from chain %s.", umbrella, children) 

785 self.registry.setCollectionChain(umbrella, children) 

786 

787 # Add chained collections for reruns. 

788 for spec in reruns: 

789 if spec.chainName is not None: 

790 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED) 

791 chain = [spec.runName] 

792 chain.extend(rerunConverters[spec.runName].getCollectionChain()) 

793 for parent in spec.parents: 

794 chain.append(parent) 

795 parentConverter = rerunConverters.get(parent) 

796 if parentConverter is not None: 

797 chain.extend(parentConverter.getCollectionChain()) 

798 chain.extend(rootConverter.getCollectionChain()) 

799 if len(calibs) == 1: 

800 # Exactly one calibration repo being converted, so it's 

801 # safe-ish to assume that's the one the rerun used. 

802 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels)) 

803 self.log.info("Defining %s from chain %s.", spec.chainName, chain) 

804 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)