Coverage for python/lsst/obs/base/gen2to3/convertRepo.py: 29%
265 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-11 02:58 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-11 02:58 -0700
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"]
25import fnmatch
26import os
27from dataclasses import dataclass
28from multiprocessing import Pool
29from typing import Iterable, List, Optional, Tuple
31from lsst.daf.butler import Butler as Butler3
32from lsst.daf.butler import CollectionType, SkyPixDimension
33from lsst.daf.butler.registry import DataIdError
34from lsst.pex.config import Config, ConfigDictField, ConfigurableField, DictField, Field, ListField
35from lsst.pipe.base import Task
36from lsst.resources import ResourcePath
37from lsst.skymap import BaseSkyMap, skyMapRegistry
39from .._instrument import Instrument
40from ..defineVisits import DefineVisitsTask
41from ..ingest import RawIngestTask
42from .calibRepoConverter import CalibRepoConverter
43from .repoConverter import ConversionSubset
44from .rootRepoConverter import RootRepoConverter
45from .standardRepoConverter import StandardRepoConverter
48@dataclass
49class ConfiguredSkyMap:
50 """Struct containing information about a skymap that may appear in a Gen2
51 repository.
52 """
54 name: str
55 """Name of the skymap used in Gen3 data IDs.
56 """
58 sha1: bytes
59 """Hash computed by `BaseSkyMap.getSha1`.
60 """
62 instance: BaseSkyMap
63 """Name of the skymap used in Gen3 data IDs.
64 """
66 used: bool = False
67 """Whether this skymap has been found in at least one repository being
68 converted.
69 """
72def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]:
73 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`.
74 Otherwise return ``s`` and `False`.
75 """
76 if s.startswith(prefix):
77 return s[len(prefix) :], True
78 return s, False
81@dataclass
82class Rerun:
83 """Specification for a Gen2 processing-output repository to convert."""
85 path: str
86 """Absolute or relative (to the root repository) path to the Gen2
87 repository (`str`).
88 """
90 runName: Optional[str]
91 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
92 will be inserted into (`str` or `None`).
94 If `None`, a name will be guessed by calling `guessCollectionNames`.
95 """
97 chainName: Optional[str]
98 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
99 combine this repository's datasets with those of its parent repositories
100 (`str` or `None`).
102 If `None`, a name will be guessed by calling `guessCollectionNames`.
103 """
105 parents: List[str]
106 """Collection names associated with parent repositories, used to define the
107 chained collection (`list` [ `str` ]).
109 Ignored if `chainName` is `None`. Runs used in the root repo are
110 automatically included.
111 """
113 def guessCollectionNames(self, instrument: Instrument, root: str) -> None:
114 """Update `runName` and `chainName` with guesses that match Gen3 naming
115 conventions.
117 If `chainName` is not `None`, and `runName` is, `runName` will be set
118 from it. If `runName` is already set, nothing will be changed, and
119 if `chainName` is `None`, no chained collection will be created.
121 Parameters
122 ----------
123 instrument : `Instrument`
124 Instrument object for the repository being converted.
125 root : `str`
126 Path to the root repository. If this is present at the start of
127 ``self.path``, it will be stripped as part of generating the run
128 name.
130 Raises
131 ------
132 ValueError
133 Raised if the appropriate collection names cannot be inferred.
134 """
135 if self.runName is not None:
136 return
137 if self.chainName is None:
138 if os.path.isabs(self.path):
139 rerunURI = ResourcePath(self.path)
140 rootURI = ResourcePath(root)
141 chainName = rerunURI.relative_to(rootURI)
142 if chainName is None:
143 raise ValueError(
144 f"Cannot guess run name collection for rerun at '{self.path}': "
145 f"no clear relationship to root '{root}'."
146 )
147 else:
148 chainName = self.path
149 chainName, _ = _dropPrefix(chainName, "rerun/")
150 chainName, isPersonal = _dropPrefix(chainName, "private/")
151 if isPersonal:
152 chainName = f"u/{chainName}"
153 else:
154 chainName, _ = _dropPrefix(chainName, "shared/")
155 chainName = instrument.makeCollectionName("runs", chainName)
156 self.chainName = chainName
157 self.runName = f"{self.chainName}/direct"
160@dataclass
161class CalibRepo:
162 """Specification for a Gen2 calibration repository to convert."""
164 path: Optional[str]
165 """Absolute or relative (to the root repository) path to the Gen2
166 repository (`str` or `None`).
168 If `None`, no calibration datasets will be converted from Gen2, but
169 curated calibrations may still be written.
170 """
172 curated: bool = True
173 """If `True`, write curated calibrations into the associated
174 ``CALIBRATION`` collection (`bool`).
175 """
177 labels: Tuple[str, ...] = ()
178 """Extra strings to insert into collection names, including both the
179 ``RUN`` collections that datasets are ingested directly into and the
180 ``CALIBRATION`` collection that associates them with validity ranges.
182 An empty tuple will directly populate the default calibration collection
183 for this instrument with the converted datasets, and is incompatible with
184 ``default=False``. This is a good choice for test data repositories where
185 only one ``CALIBRATION`` collection will ever exist. In other cases, this
186 should be a non-empty tuple, so the default calibration collection can
187 actually be a ``CHAINED`` collection pointer that points to the current
188 recommended ``CALIBRATION`` collection.
189 """
191 default: bool = True
192 """If `True`, the created ``CALIBRATION`` collection should be the default
193 for this instrument.
195 This field may only be `True` for one converted calibration collection if
196 more than one is passed to `ConvertRepoTask.run`. It defaults to `True`
197 because the vast majority of the time only one calibration collection is
198 being converted. If ``labels`` is not empty, ``default=True`` will cause
199 a ``CHAINED`` collection that points to the converted ``CALIBRATION``
200 collection to be defined. If ``labels`` is empty, ``default`` *must* be
201 `True` and no ``CHAINED`` collection pointer is necessary.
202 """
204 def __post_init__(self) -> None:
205 if not self.labels and not self.default:
206 raise ValueError("labels=() requires default=True")
209class ConvertRepoSkyMapConfig(Config):
210 """Sub-config used to hold the parameters of a SkyMap.
212 Notes
213 -----
214 This config only needs to exist because we can't put a
215 `~lsst.pex.config.RegistryField` directly inside a
216 `~lsst.pex.config.ConfigDictField`.
218 It needs to have its only field named "skyMap" for compatibility with the
219 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
220 use one config file in an obs package to configure both.
222 This name leads to unfortunate repetition with the field named
223 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
224 unavoidable.
225 """
227 skyMap = skyMapRegistry.makeField(
228 doc="Type and parameters for the SkyMap itself.",
229 default="dodeca",
230 )
233class ConvertRepoConfig(Config):
234 raws = ConfigurableField(
235 "Configuration for subtask responsible for ingesting raws and adding exposure dimension entries.",
236 target=RawIngestTask,
237 )
238 defineVisits = ConfigurableField(
239 "Configuration for the subtask responsible for defining visits from exposures.",
240 target=DefineVisitsTask,
241 )
242 skyMaps = ConfigDictField(
243 "Mapping from Gen3 skymap name to the parameters used to construct a "
244 "BaseSkyMap instance. This will be used to associate names with "
245 "existing skymaps found in the Gen2 repo.",
246 keytype=str,
247 itemtype=ConvertRepoSkyMapConfig,
248 default={},
249 )
250 rootSkyMapName = Field(
251 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
252 "datasets in the root repository when no SkyMap is found there. ",
253 dtype=str,
254 optional=True,
255 default=None,
256 )
257 runs = DictField(
258 "A mapping from dataset type name to the RUN collection they should "
259 "be inserted into. This must include all datasets that can be found "
260 "in the root repository; other repositories will use per-repository "
261 "runs.",
262 keytype=str,
263 itemtype=str,
264 default={},
265 )
266 runsForced = DictField(
267 "Like ``runs``, but is used even when the dataset is present in a "
268 "non-root repository (i.e. rerun), overriding the non-root "
269 "repository's main collection.",
270 keytype=str,
271 itemtype=str,
272 default={
273 "brightObjectMask": "masks",
274 },
275 )
276 storageClasses = DictField(
277 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
278 "or 'persistable') to the Gen3 StorageClass name.",
279 keytype=str,
280 itemtype=str,
281 default={
282 "bias": "ExposureF",
283 "dark": "ExposureF",
284 "flat": "ExposureF",
285 "defects": "Defects",
286 "crosstalk": "CrosstalkCalib",
287 "BaseSkyMap": "SkyMap",
288 "BaseCatalog": "Catalog",
289 "BackgroundList": "Background",
290 "raw": "Exposure",
291 "MultilevelParquetTable": "DataFrame",
292 "ParquetTable": "DataFrame",
293 "SkyWcs": "Wcs",
294 },
295 )
296 formatterClasses = DictField(
297 "Mapping from dataset type name to formatter class. "
298 "By default these are derived from the formatters listed in the"
299 " Gen3 datastore configuration.",
300 keytype=str,
301 itemtype=str,
302 default={},
303 )
304 targetHandlerClasses = DictField(
305 "Mapping from dataset type name to target handler class.", keytype=str, itemtype=str, default={}
306 )
307 doRegisterInstrument = Field(
308 "If True (default), add dimension records for the Instrument and its "
309 "filters and detectors to the registry instead of assuming they are "
310 "already present.",
311 dtype=bool,
312 default=True,
313 )
314 refCats = ListField(
315 "The names of reference catalogs (subdirectories under ref_cats) to be converted",
316 dtype=str,
317 default=[],
318 )
319 fileIgnorePatterns = ListField(
320 "Filename globs that should be ignored instead of being treated as datasets.",
321 dtype=str,
322 default=[
323 "README.txt",
324 "*.*~*",
325 "butler.yaml",
326 "gen3.sqlite3",
327 "registry.sqlite3",
328 "calibRegistry.sqlite3",
329 "_mapper",
330 "_parent",
331 "repositoryCfg.yaml",
332 ],
333 )
334 rawDatasetType = Field(
335 "Gen2 dataset type to use for raw data.",
336 dtype=str,
337 default="raw",
338 )
339 datasetIncludePatterns = ListField(
340 "Glob-style patterns for dataset type names that should be converted.", dtype=str, default=["*"]
341 )
342 datasetIgnorePatterns = ListField(
343 "Glob-style patterns for dataset type names that should not be "
344 "converted despite matching a pattern in datasetIncludePatterns.",
345 dtype=str,
346 default=[],
347 )
348 datasetTemplateOverrides = DictField(
349 "Overrides for Gen2 filename templates, keyed by dataset type. "
350 "This can be used to support conversions of Gen2 repos whose mapper "
351 "templates were modified in obs_* packages since the datasets were "
352 "written.",
353 keytype=str,
354 itemtype=str,
355 default={},
356 )
357 ccdKey = Field(
358 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
359 dtype=str,
360 default="ccd",
361 )
362 relatedOnly = Field(
363 "If True (default), only convert datasets that are related to the "
364 "ingested visits. Ignored unless a list of visits is passed to "
365 "run().",
366 dtype=bool,
367 default=False,
368 )
369 doExpandDataIds = Field(
370 "If True (default), expand data IDs to include extra metadata before "
371 "ingesting them. "
372 "This may be required in order to associate calibration datasets with "
373 "validity ranges or populate file templates, so setting this to False "
374 "is considered advanced usage (and it may not always work). When it "
375 "does, it can provide a considerable speedup.",
376 dtype=bool,
377 default=True,
378 )
379 doMakeUmbrellaCollection = Field(
380 "If True (default), define an '<instrument>/defaults' CHAINED "
381 "collection that includes everything found in the root repo as well "
382 "as the default calibration collection.",
383 dtype=bool,
384 default=True,
385 )
386 extraUmbrellaChildren = ListField(
387 "Additional child collections to include in the umbrella collection. "
388 "Ignored if doMakeUmbrellaCollection=False.",
389 dtype=str,
390 default=[],
391 )
393 @property
394 def transfer(self):
395 return self.raws.transfer
397 @transfer.setter
398 def transfer(self, value):
399 self.raws.transfer = value
401 def setDefaults(self):
402 self.transfer = None
404 def validate(self):
405 super().validate()
406 if self.relatedOnly and not self.doExpandDataIds:
407 raise ValueError("relatedOnly requires doExpandDataIds.")
410class ConvertRepoTask(Task):
411 """A task that converts one or more related Gen2 data repositories to a
412 single Gen3 data repository (with multiple collections).
414 Parameters
415 ----------
416 config: `ConvertRepoConfig`
417 Configuration for this task.
418 butler3: `lsst.daf.butler.Butler`
419 A writeable Gen3 Butler instance that represents the data repository
420 that datasets will be ingested into. If the 'raw' dataset is
421 configured to be included in the conversion, ``butler3.run`` should be
422 set to the name of the collection raws should be ingested into, and
423 ``butler3.collections`` should include a calibration collection from
424 which the ``camera`` dataset can be loaded, unless a calibration repo
425 is converted and ``doWriteCuratedCalibrations`` is `True`.
426 instrument : `lsst.obs.base.Instrument`
427 The Gen3 instrument that should be used for this conversion.
428 dry_run : `bool`, optional
429 If `True` (`False` is default), make no changes to the Gen3 data
430 repository while running as many steps as possible. This option is
431 best used with a read-only ``butler3`` argument to ensure unexpected
432 edge cases respect this argument (and fail rather than write if they
433 do not).
434 **kwargs
435 Other keyword arguments are forwarded to the `Task` constructor.
437 Notes
438 -----
439 Most of the work of converting repositories is delegated to instances of
440 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
441 only state that is relevant for all Gen2 repositories being ingested, while
442 each `RepoConverter` instance holds only state relevant for the conversion
443 of a single Gen2 repository. Both the task and the `RepoConverter`
444 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
445 methods may only be called once on a particular instance.
446 """
448 ConfigClass = ConvertRepoConfig
450 _DefaultName = "convertRepo"
452 def __init__(
453 self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False, **kwargs
454 ):
455 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
456 super().__init__(config, **kwargs)
457 # Make self.butler3 one that doesn't have any collections associated
458 # with it - those are needed by RawIngestTask and DefineVisitsTask, but
459 # we don't want them messing with converted datasets, because those
460 # have their own logic for figuring out which collections to write to.
461 self.butler3 = Butler3(butler=butler3)
462 self.registry = self.butler3.registry
463 self.universe = self.registry.dimensions
464 if self.isDatasetTypeIncluded("raw"):
465 self.makeSubtask("raws", butler=butler3)
466 self.makeSubtask("defineVisits", butler=butler3)
467 else:
468 self.raws = None
469 self.defineVisits = None
470 self.instrument = instrument
471 self._configuredSkyMapsBySha1 = {}
472 self._configuredSkyMapsByName = {}
473 for name, config in self.config.skyMaps.items():
474 instance = config.skyMap.apply()
475 self._populateSkyMapDicts(name, instance)
476 self._usedSkyPix = set()
477 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory()
478 self.translatorFactory.log = self.log.getChild("translators")
479 self.dry_run = dry_run
481 def _reduce_kwargs(self):
482 # Add extra parameters to pickle
483 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument)
485 def _populateSkyMapDicts(self, name, instance):
486 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
487 self._configuredSkyMapsBySha1[struct.sha1] = struct
488 self._configuredSkyMapsByName[struct.name] = struct
490 def isDatasetTypeIncluded(self, datasetTypeName: str):
491 """Return `True` if configuration indicates that the given dataset type
492 should be converted.
494 This method is intended to be called primarily by the
495 `RepoConverter` instances used interally by the task.
497 Parameters
498 ----------
499 datasetTypeName: str
500 Name of the dataset type.
502 Returns
503 -------
504 included : `bool`
505 Whether the dataset should be included in the conversion.
506 """
507 return any(
508 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIncludePatterns
509 ) and not any(
510 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIgnorePatterns
511 )
513 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
514 """Indicate that a repository uses the given SkyMap.
516 This method is intended to be called primarily by the
517 `RepoConverter` instances used interally by the task.
519 Parameters
520 ----------
521 skyMap : `lsst.skymap.BaseSkyMap`
522 SkyMap instance being used, typically retrieved from a Gen2
523 data repository.
524 skyMapName : `str`
525 The name of the gen2 skymap, for error reporting.
527 Returns
528 -------
529 name : `str`
530 The name of the skymap in Gen3 data IDs.
532 Raises
533 ------
534 LookupError
535 Raised if the specified skymap cannot be found.
536 """
537 sha1 = skyMap.getSha1()
538 if sha1 not in self._configuredSkyMapsBySha1:
539 self._populateSkyMapDicts(skyMapName, skyMap)
540 try:
541 struct = self._configuredSkyMapsBySha1[sha1]
542 except KeyError as err:
543 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
544 raise LookupError(msg) from err
545 struct.used = True
546 return struct.name
548 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]):
549 """Register all skymaps that have been marked as used.
551 This method is intended to be called primarily by the
552 `RepoConverter` instances used interally by the task.
554 Parameters
555 ----------
556 subset : `ConversionSubset`, optional
557 Object that will be used to filter converted datasets by data ID.
558 If given, it will be updated with the tracts of this skymap that
559 overlap the visits in the subset.
560 """
561 for struct in self._configuredSkyMapsBySha1.values():
562 if struct.used:
563 if not self.dry_run:
564 try:
565 # If the skymap isn't registerd, this will raise.
566 self.butler3.registry.expandDataId(skymap=struct.name)
567 except DataIdError:
568 self.log.info("Registering skymap %s.", struct.name)
569 struct.instance.register(struct.name, self.butler3)
570 if subset is not None and self.config.relatedOnly:
571 subset.addSkyMap(self.registry, struct.name)
573 def useSkyPix(self, dimension: SkyPixDimension):
574 """Indicate that a repository uses the given SkyPix dimension.
576 This method is intended to be called primarily by the
577 `RepoConverter` instances used interally by the task.
579 Parameters
580 ----------
581 dimension : `lsst.daf.butler.SkyPixDimension`
582 Dimension represening a pixelization of the sky.
583 """
584 self._usedSkyPix.add(dimension)
586 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]):
587 """Register all skymaps that have been marked as used.
589 This method is intended to be called primarily by the
590 `RepoConverter` instances used interally by the task.
592 Parameters
593 ----------
594 subset : `ConversionSubset`, optional
595 Object that will be used to filter converted datasets by data ID.
596 If given, it will be updated with the pixelization IDs that
597 overlap the visits in the subset.
598 """
599 if subset is not None and self.config.relatedOnly:
600 for dimension in self._usedSkyPix:
601 subset.addSkyPix(self.registry, dimension)
603 def run(
604 self,
605 root: str,
606 *,
607 calibs: Optional[List[CalibRepo]] = None,
608 reruns: Optional[List[Rerun]] = None,
609 visits: Optional[Iterable[int]] = None,
610 pool: Optional[Pool] = None,
611 processes: int = 1,
612 ):
613 """Convert a group of related data repositories.
615 Parameters
616 ----------
617 root : `str`
618 Complete path to the root Gen2 data repository. This should be
619 a data repository that includes a Gen2 registry and any raw files
620 and/or reference catalogs.
621 calibs : `list` of `CalibRepo`
622 Specifications for Gen2 calibration repos to convert. If `None`
623 (default), curated calibrations only will be written to the default
624 calibration collection for this instrument; set to ``()`` explictly
625 to disable this.
626 reruns : `list` of `Rerun`
627 Specifications for rerun (processing output) repos to convert. If
628 `None` (default), no reruns are converted.
629 visits : iterable of `int`, optional
630 The integer IDs of visits to convert. If not provided, all visits
631 in the Gen2 root repository will be converted.
632 pool : `multiprocessing.Pool`, optional
633 If not `None`, a process pool with which to parallelize some
634 operations.
635 processes : `int`, optional
636 The number of processes to use for conversion.
637 """
638 if pool is None and processes > 1:
639 pool = Pool(processes)
640 if calibs is None:
641 calibs = [CalibRepo(path=None)]
642 elif calibs and not self.config.doExpandDataIds:
643 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.")
644 if visits is not None:
645 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits))
646 else:
647 if self.config.relatedOnly:
648 self.log.warning(
649 "config.relatedOnly is True but all visits are being ingested; "
650 "no filtering will be done."
651 )
652 subset = None
653 if not self.config.doExpandDataIds and self.butler3.datastore.needs_expanded_data_ids(
654 self.config.transfer
655 ):
656 self.log.warning(
657 "config.doExpandDataIds=False but datastore reports that expanded data IDs may be needed.",
658 self.config.transfer,
659 )
661 # Check that at most one CalibRepo is marked as default, to fail before
662 # we actually write anything.
663 defaultCalibRepos = [c.path for c in calibs if c.default]
664 if len(defaultCalibRepos) > 1:
665 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.")
667 # Make converters for all Gen2 repos.
668 converters = []
669 # Start with the root repo, which must always be given even if we are
670 # not configured to convert anything from it.
671 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument)
672 converters.append(rootConverter)
673 # Calibration repos are next.
674 for spec in calibs:
675 calibRoot = spec.path
676 if calibRoot is not None:
677 if not os.path.isabs(calibRoot):
678 calibRoot = os.path.join(rootConverter.root, calibRoot)
679 converter = CalibRepoConverter(
680 task=self,
681 root=calibRoot,
682 labels=spec.labels,
683 instrument=self.instrument,
684 mapper=rootConverter.mapper,
685 subset=rootConverter.subset,
686 )
687 converters.append(converter)
688 # CalibRepo entries that don't have a path are just there for
689 # curated calibs and maybe to set up a collection pointer; that's
690 # handled further down (after we've done everything we can that
691 # doesn't involve actually writing to the output Gen3 repo).
692 # And now reruns.
693 rerunConverters = {}
694 for spec in reruns:
695 runRoot = spec.path
696 if not os.path.isabs(runRoot):
697 runRoot = os.path.join(rootConverter.root, runRoot)
698 spec.guessCollectionNames(self.instrument, rootConverter.root)
699 converter = StandardRepoConverter(
700 task=self,
701 root=runRoot,
702 run=spec.runName,
703 instrument=self.instrument,
704 subset=rootConverter.subset,
705 )
706 converters.append(converter)
707 rerunConverters[spec.runName] = converter
709 # Walk Gen2 repos to find datasets to convert.
710 for converter in converters:
711 converter.prep()
713 # Register the instrument if we're configured to do so.
714 if self.config.doRegisterInstrument and not self.dry_run:
715 self.instrument.register(self.registry)
717 # Run raw ingest (does nothing if we weren't configured to convert the
718 # 'raw' dataset type).
719 rootConverter.runRawIngest(pool=pool)
721 # Write curated calibrations to all calibration collections where they
722 # were requested (which may be implicit, by passing calibs=None). Also
723 # set up a CHAINED collection that points to the default CALIBRATION
724 # collection if one is needed.
725 if not self.dry_run:
726 for spec in calibs:
727 if spec.curated:
728 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels)
729 if spec.default and spec.labels:
730 # This is guaranteed to be True at most once in the loop by
731 # logic at the top of this method.
732 defaultCalibName = self.instrument.makeCalibrationCollectionName()
733 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED)
734 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels)
735 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION)
736 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName])
738 # Define visits (also does nothing if we weren't configurd to convert
739 # the 'raw' dataset type).
740 rootConverter.runDefineVisits()
742 # Insert dimensions that are potentially shared by all Gen2
743 # repositories (and are hence managed directly by the Task, rather
744 # than a converter instance).
745 # This also finishes setting up the (shared) converter.subsets object
746 # that is used to filter data IDs for config.relatedOnly.
747 self.registerUsedSkyMaps(rootConverter.subset)
748 self.registerUsedSkyPix(rootConverter.subset)
750 # Look for datasets, generally by scanning the filesystem.
751 # This requires dimensions to have already been inserted so we can use
752 # dimension information to identify related datasets.
753 for converter in converters:
754 converter.findDatasets()
756 # Expand data IDs.
757 if self.config.doExpandDataIds:
758 for converter in converters:
759 converter.expandDataIds()
761 if self.dry_run:
762 return
764 # Actually ingest datasets.
765 for converter in converters:
766 converter.ingest()
768 # Perform any post-ingest processing.
769 for converter in converters:
770 converter.finish()
772 # Make the umbrella collection, if desired.
773 if self.config.doMakeUmbrellaCollection:
774 umbrella = self.instrument.makeUmbrellaCollectionName()
775 self.registry.registerCollection(umbrella, CollectionType.CHAINED)
776 children = list(self.registry.getCollectionChain(umbrella))
777 children.extend(rootConverter.getCollectionChain())
778 children.append(self.instrument.makeCalibrationCollectionName())
779 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children:
780 # Ensure the umbrella collection includes the global skymap
781 # collection, even if it's currently empty.
782 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
783 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
784 children.extend(self.config.extraUmbrellaChildren)
785 self.log.info("Defining %s from chain %s.", umbrella, children)
786 self.registry.setCollectionChain(umbrella, children)
788 # Add chained collections for reruns.
789 for spec in reruns:
790 if spec.chainName is not None:
791 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
792 chain = [spec.runName]
793 chain.extend(rerunConverters[spec.runName].getCollectionChain())
794 for parent in spec.parents:
795 chain.append(parent)
796 parentConverter = rerunConverters.get(parent)
797 if parentConverter is not None:
798 chain.extend(parentConverter.getCollectionChain())
799 chain.extend(rootConverter.getCollectionChain())
800 if len(calibs) == 1:
801 # Exactly one calibration repo being converted, so it's
802 # safe-ish to assume that's the one the rerun used.
803 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels))
804 self.log.info("Defining %s from chain %s.", spec.chainName, chain)
805 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)