Coverage for python/lsst/obs/base/gen2to3/convertRepo.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"]
25import os
26import fnmatch
27from dataclasses import dataclass
28from multiprocessing import Pool
29from typing import Iterable, Optional, List, Tuple
31from lsst.daf.butler import (
32 Butler as Butler3,
33 ButlerURI,
34 CollectionType,
35 SkyPixDimension
36)
37from lsst.pex.config import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
38from lsst.pipe.base import Task
39from lsst.skymap import skyMapRegistry, BaseSkyMap
41from ..ingest import RawIngestTask
42from ..defineVisits import DefineVisitsTask
43from .repoConverter import ConversionSubset
44from .rootRepoConverter import RootRepoConverter
45from .calibRepoConverter import CalibRepoConverter
46from .standardRepoConverter import StandardRepoConverter
47from .._instrument import Instrument
50@dataclass
51class ConfiguredSkyMap:
52 """Struct containing information about a skymap that may appear in a Gen2
53 repository.
54 """
56 name: str
57 """Name of the skymap used in Gen3 data IDs.
58 """
60 sha1: bytes
61 """Hash computed by `BaseSkyMap.getSha1`.
62 """
64 instance: BaseSkyMap
65 """Name of the skymap used in Gen3 data IDs.
66 """
68 used: bool = False
69 """Whether this skymap has been found in at least one repository being
70 converted.
71 """
74def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]:
75 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`.
76 Otherwise return ``s`` and `False`.
77 """
78 if s.startswith(prefix):
79 return s[len(prefix):], True
80 return s, False
83@dataclass
84class Rerun:
85 """Specification for a Gen2 processing-output repository to convert.
86 """
88 path: str
89 """Absolute or relative (to the root repository) path to the Gen2
90 repository (`str`).
91 """
93 runName: Optional[str]
94 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
95 will be inserted into (`str` or `None`).
97 If `None`, a name will be guessed by calling `guessCollectionNames`.
98 """
100 chainName: Optional[str]
101 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
102 combine this repository's datasets with those of its parent repositories
103 (`str` or `None`).
105 If `None`, a name will be guessed by calling `guessCollectionNames`.
106 """
108 parents: List[str]
109 """Collection names associated with parent repositories, used to define the
110 chained collection (`list` [ `str` ]).
112 Ignored if `chainName` is `None`. Runs used in the root repo are
113 automatically included.
114 """
116 def guessCollectionNames(self, instrument: Instrument, root: str) -> None:
117 """Update `runName` and `chainName` with guesses that match Gen3 naming
118 conventions.
120 If `chainName` is not `None`, and `runName` is, `runName` will be set
121 from it. If `runName` is already set, nothing will be changed, and
122 if `chainName` is `None`, no chained collection will be created.
124 Parameters
125 ----------
126 instrument : `Instrument`
127 Instrument object for the repository being converted.
128 root : `str`
129 Path to the root repository. If this is present at the start of
130 ``self.path``, it will be stripped as part of generating the run
131 name.
133 Raises
134 ------
135 ValueError
136 Raised if the appropriate collection names cannot be inferred.
137 """
138 if self.runName is not None:
139 return
140 if self.chainName is None:
141 if os.path.isabs(self.path):
142 rerunURI = ButlerURI(self.path)
143 rootURI = ButlerURI(root)
144 chainName = rerunURI.relative_to(rootURI)
145 if chainName is None:
146 raise ValueError(
147 f"Cannot guess run name collection for rerun at '{self.path}': "
148 f"no clear relationship to root '{root}'."
149 )
150 else:
151 chainName = self.path
152 chainName, _ = _dropPrefix(chainName, "rerun/")
153 chainName, isPersonal = _dropPrefix(chainName, "private/")
154 if isPersonal:
155 chainName = f"u/{chainName}"
156 else:
157 chainName, _ = _dropPrefix(chainName, "shared/")
158 chainName = instrument.makeCollectionName("runs", chainName)
159 self.chainName = chainName
160 self.runName = f"{self.chainName}/direct"
163@dataclass
164class CalibRepo:
165 """Specification for a Gen2 calibration repository to convert.
166 """
168 path: Optional[str]
169 """Absolute or relative (to the root repository) path to the Gen2
170 repository (`str` or `None`).
172 If `None`, no calibration datasets will be converted from Gen2, but
173 curated calibrations may still be written.
174 """
176 curated: bool = True
177 """If `True`, write curated calibrations into the associated
178 ``CALIBRATION`` collection (`bool`).
179 """
181 labels: Tuple[str, ...] = ()
182 """Extra strings to insert into collection names, including both the
183 ``RUN`` collections that datasets are ingested directly into and the
184 ``CALIBRATION`` collection that associates them with validity ranges.
186 An empty tuple will directly populate the default calibration collection
187 for this instrument with the converted datasets, and is incompatible with
188 ``default=False``. This is a good choice for test data repositories where
189 only one ``CALIBRATION`` collection will ever exist. In other cases, this
190 should be a non-empty tuple, so the default calibration collection can
191 actually be a ``CHAINED`` collection pointer that points to the current
192 recommended ``CALIBRATION`` collection.
193 """
195 default: bool = True
196 """If `True`, the created ``CALIBRATION`` collection should be the default
197 for this instrument.
199 This field may only be `True` for one converted calibration collection if
200 more than one is passed to `ConvertRepoTask.run`. It defaults to `True`
201 because the vast majority of the time only one calibration collection is
202 being converted. If ``labels`` is not empty, ``default=True`` will cause
203 a ``CHAINED`` collection that points to the converted ``CALIBRATION``
204 collection to be defined. If ``labels`` is empty, ``default`` *must* be
205 `True` and no ``CHAINED`` collection pointer is necessary.
206 """
208 def __post_init__(self) -> None:
209 if not self.labels and not self.default:
210 raise ValueError("labels=() requires default=True")
213class ConvertRepoSkyMapConfig(Config):
214 """Sub-config used to hold the parameters of a SkyMap.
216 Notes
217 -----
218 This config only needs to exist because we can't put a
219 `~lsst.pex.config.RegistryField` directly inside a
220 `~lsst.pex.config.ConfigDictField`.
222 It needs to have its only field named "skyMap" for compatibility with the
223 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
224 use one config file in an obs package to configure both.
226 This name leads to unfortunate repetition with the field named
227 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
228 unavoidable.
229 """
230 skyMap = skyMapRegistry.makeField(
231 doc="Type and parameters for the SkyMap itself.",
232 default="dodeca",
233 )
236class ConvertRepoConfig(Config):
237 raws = ConfigurableField(
238 "Configuration for subtask responsible for ingesting raws and adding "
239 "exposure dimension entries.",
240 target=RawIngestTask,
241 )
242 defineVisits = ConfigurableField(
243 "Configuration for the subtask responsible for defining visits from "
244 "exposures.",
245 target=DefineVisitsTask,
246 )
247 skyMaps = ConfigDictField(
248 "Mapping from Gen3 skymap name to the parameters used to construct a "
249 "BaseSkyMap instance. This will be used to associate names with "
250 "existing skymaps found in the Gen2 repo.",
251 keytype=str,
252 itemtype=ConvertRepoSkyMapConfig,
253 default={}
254 )
255 rootSkyMapName = Field(
256 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
257 "datasets in the root repository when no SkyMap is found there. ",
258 dtype=str,
259 optional=True,
260 default=None,
261 )
262 runs = DictField(
263 "A mapping from dataset type name to the RUN collection they should "
264 "be inserted into. This must include all datasets that can be found "
265 "in the root repository; other repositories will use per-repository "
266 "runs.",
267 keytype=str,
268 itemtype=str,
269 default={},
270 )
271 runsForced = DictField(
272 "Like ``runs``, but is used even when the dataset is present in a "
273 "non-root repository (i.e. rerun), overriding the non-root "
274 "repository's main collection.",
275 keytype=str,
276 itemtype=str,
277 default={
278 "brightObjectMask": "masks",
279 }
280 )
281 storageClasses = DictField(
282 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
283 "or 'persistable') to the Gen3 StorageClass name.",
284 keytype=str,
285 itemtype=str,
286 default={
287 "bias": "ExposureF",
288 "dark": "ExposureF",
289 "flat": "ExposureF",
290 "defects": "Defects",
291 "crosstalk": "CrosstalkCalib",
292 "BaseSkyMap": "SkyMap",
293 "BaseCatalog": "Catalog",
294 "BackgroundList": "Background",
295 "raw": "Exposure",
296 "MultilevelParquetTable": "DataFrame",
297 "ParquetTable": "DataFrame",
298 "SkyWcs": "Wcs",
299 }
300 )
301 formatterClasses = DictField(
302 "Mapping from dataset type name to formatter class. "
303 "By default these are derived from the formatters listed in the"
304 " Gen3 datastore configuration.",
305 keytype=str,
306 itemtype=str,
307 default={}
308 )
309 targetHandlerClasses = DictField(
310 "Mapping from dataset type name to target handler class.",
311 keytype=str,
312 itemtype=str,
313 default={}
314 )
315 doRegisterInstrument = Field(
316 "If True (default), add dimension records for the Instrument and its "
317 "filters and detectors to the registry instead of assuming they are "
318 "already present.",
319 dtype=bool,
320 default=True,
321 )
322 refCats = ListField(
323 "The names of reference catalogs (subdirectories under ref_cats) to "
324 "be converted",
325 dtype=str,
326 default=[]
327 )
328 fileIgnorePatterns = ListField(
329 "Filename globs that should be ignored instead of being treated as "
330 "datasets.",
331 dtype=str,
332 default=["README.txt", "*~?", "butler.yaml", "gen3.sqlite3",
333 "registry.sqlite3", "calibRegistry.sqlite3", "_mapper",
334 "_parent", "repositoryCfg.yaml"]
335 )
336 rawDatasetType = Field(
337 "Gen2 dataset type to use for raw data.",
338 dtype=str,
339 default="raw",
340 )
341 datasetIncludePatterns = ListField(
342 "Glob-style patterns for dataset type names that should be converted.",
343 dtype=str,
344 default=["*"]
345 )
346 datasetIgnorePatterns = ListField(
347 "Glob-style patterns for dataset type names that should not be "
348 "converted despite matching a pattern in datasetIncludePatterns.",
349 dtype=str,
350 default=[]
351 )
352 ccdKey = Field(
353 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
354 dtype=str,
355 default="ccd",
356 )
357 relatedOnly = Field(
358 "If True (default), only convert datasets that are related to the "
359 "ingested visits. Ignored unless a list of visits is passed to "
360 "run().",
361 dtype=bool,
362 default=False,
363 )
364 doMakeUmbrellaCollection = Field(
365 "If True (default), define an '<instrument>/defaults' CHAINED "
366 "collection that includes everything found in the root repo as well "
367 "as the default calibration collection.",
368 dtype=bool,
369 default=True,
370 )
371 extraUmbrellaChildren = ListField(
372 "Additional child collections to include in the umbrella collection. "
373 "Ignored if doMakeUmbrellaCollection=False.",
374 dtype=str,
375 default=[]
376 )
378 @property
379 def transfer(self):
380 return self.raws.transfer
382 @transfer.setter
383 def transfer(self, value):
384 self.raws.transfer = value
386 def setDefaults(self):
387 self.transfer = None
390class ConvertRepoTask(Task):
391 """A task that converts one or more related Gen2 data repositories to a
392 single Gen3 data repository (with multiple collections).
394 Parameters
395 ----------
396 config: `ConvertRepoConfig`
397 Configuration for this task.
398 butler3: `lsst.daf.butler.Butler`
399 A writeable Gen3 Butler instance that represents the data repository
400 that datasets will be ingested into. If the 'raw' dataset is
401 configured to be included in the conversion, ``butler3.run`` should be
402 set to the name of the collection raws should be ingested into, and
403 ``butler3.collections`` should include a calibration collection from
404 which the ``camera`` dataset can be loaded, unless a calibration repo
405 is converted and ``doWriteCuratedCalibrations`` is `True`.
406 instrument : `lsst.obs.base.Instrument`
407 The Gen3 instrument that should be used for this conversion.
408 **kwargs
409 Other keyword arguments are forwarded to the `Task` constructor.
411 Notes
412 -----
413 Most of the work of converting repositories is delegated to instances of
414 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
415 only state that is relevant for all Gen2 repositories being ingested, while
416 each `RepoConverter` instance holds only state relevant for the conversion
417 of a single Gen2 repository. Both the task and the `RepoConverter`
418 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
419 methods may only be called once on a particular instance.
420 """
422 ConfigClass = ConvertRepoConfig
424 _DefaultName = "convertRepo"
426 def __init__(self, config=None, *, butler3: Butler3, instrument: Instrument, **kwargs):
427 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
428 super().__init__(config, **kwargs)
429 # Make self.butler3 one that doesn't have any collections associated
430 # with it - those are needed by RawIngestTask and DefineVisitsTask, but
431 # we don't want them messing with converted datasets, because those
432 # have their own logic for figuring out which collections to write to.
433 self.butler3 = Butler3(butler=butler3)
434 self.registry = self.butler3.registry
435 self.universe = self.registry.dimensions
436 if self.isDatasetTypeIncluded("raw"):
437 self.makeSubtask("raws", butler=butler3)
438 self.makeSubtask("defineVisits", butler=butler3)
439 else:
440 self.raws = None
441 self.defineVisits = None
442 self.instrument = instrument
443 self._configuredSkyMapsBySha1 = {}
444 self._configuredSkyMapsByName = {}
445 for name, config in self.config.skyMaps.items():
446 instance = config.skyMap.apply()
447 self._populateSkyMapDicts(name, instance)
448 self._usedSkyPix = set()
449 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory()
450 self.translatorFactory.log = self.log.getChild("translators")
452 def _reduce_kwargs(self):
453 # Add extra parameters to pickle
454 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument)
456 def _populateSkyMapDicts(self, name, instance):
457 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
458 self._configuredSkyMapsBySha1[struct.sha1] = struct
459 self._configuredSkyMapsByName[struct.name] = struct
461 def isDatasetTypeIncluded(self, datasetTypeName: str):
462 """Return `True` if configuration indicates that the given dataset type
463 should be converted.
465 This method is intended to be called primarily by the
466 `RepoConverter` instances used interally by the task.
468 Parameters
469 ----------
470 datasetTypeName: str
471 Name of the dataset type.
473 Returns
474 -------
475 included : `bool`
476 Whether the dataset should be included in the conversion.
477 """
478 return (
479 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
480 for pattern in self.config.datasetIncludePatterns)
481 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
482 for pattern in self.config.datasetIgnorePatterns)
483 )
485 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
486 """Indicate that a repository uses the given SkyMap.
488 This method is intended to be called primarily by the
489 `RepoConverter` instances used interally by the task.
491 Parameters
492 ----------
493 skyMap : `lsst.skymap.BaseSkyMap`
494 SkyMap instance being used, typically retrieved from a Gen2
495 data repository.
496 skyMapName : `str`
497 The name of the gen2 skymap, for error reporting.
499 Returns
500 -------
501 name : `str`
502 The name of the skymap in Gen3 data IDs.
504 Raises
505 ------
506 LookupError
507 Raised if the specified skymap cannot be found.
508 """
509 sha1 = skyMap.getSha1()
510 if sha1 not in self._configuredSkyMapsBySha1:
511 self._populateSkyMapDicts(skyMapName, skyMap)
512 try:
513 struct = self._configuredSkyMapsBySha1[sha1]
514 except KeyError as err:
515 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
516 raise LookupError(msg) from err
517 struct.used = True
518 return struct.name
520 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]):
521 """Register all skymaps that have been marked as used.
523 This method is intended to be called primarily by the
524 `RepoConverter` instances used interally by the task.
526 Parameters
527 ----------
528 subset : `ConversionSubset`, optional
529 Object that will be used to filter converted datasets by data ID.
530 If given, it will be updated with the tracts of this skymap that
531 overlap the visits in the subset.
532 """
533 for struct in self._configuredSkyMapsBySha1.values():
534 if struct.used:
535 struct.instance.register(struct.name, self.butler3)
536 if subset is not None and self.config.relatedOnly:
537 subset.addSkyMap(self.registry, struct.name)
539 def useSkyPix(self, dimension: SkyPixDimension):
540 """Indicate that a repository uses the given SkyPix dimension.
542 This method is intended to be called primarily by the
543 `RepoConverter` instances used interally by the task.
545 Parameters
546 ----------
547 dimension : `lsst.daf.butler.SkyPixDimension`
548 Dimension represening a pixelization of the sky.
549 """
550 self._usedSkyPix.add(dimension)
552 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]):
553 """Register all skymaps that have been marked as used.
555 This method is intended to be called primarily by the
556 `RepoConverter` instances used interally by the task.
558 Parameters
559 ----------
560 subset : `ConversionSubset`, optional
561 Object that will be used to filter converted datasets by data ID.
562 If given, it will be updated with the pixelization IDs that
563 overlap the visits in the subset.
564 """
565 if subset is not None and self.config.relatedOnly:
566 for dimension in self._usedSkyPix:
567 subset.addSkyPix(self.registry, dimension)
569 def run(self, root: str, *,
570 calibs: Optional[List[CalibRepo]] = None,
571 reruns: Optional[List[Rerun]] = None,
572 visits: Optional[Iterable[int]] = None,
573 pool: Optional[Pool] = None,
574 processes: int = 1):
575 """Convert a group of related data repositories.
577 Parameters
578 ----------
579 root : `str`
580 Complete path to the root Gen2 data repository. This should be
581 a data repository that includes a Gen2 registry and any raw files
582 and/or reference catalogs.
583 calibs : `list` of `CalibRepo`
584 Specifications for Gen2 calibration repos to convert. If `None`
585 (default), curated calibrations only will be written to the default
586 calibration collection for this instrument; set to ``()`` explictly
587 to disable this.
588 reruns : `list` of `Rerun`
589 Specifications for rerun (processing output) repos to convert. If
590 `None` (default), no reruns are converted.
591 visits : iterable of `int`, optional
592 The integer IDs of visits to convert. If not provided, all visits
593 in the Gen2 root repository will be converted.
594 pool : `multiprocessing.Pool`, optional
595 If not `None`, a process pool with which to parallelize some
596 operations.
597 processes : `int`, optional
598 The number of processes to use for conversion.
599 """
600 if pool is None and processes > 1:
601 pool = Pool(processes)
602 if calibs is None:
603 calibs = [CalibRepo(path=None)]
604 if visits is not None:
605 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits))
606 else:
607 if self.config.relatedOnly:
608 self.log.warn("config.relatedOnly is True but all visits are being ingested; "
609 "no filtering will be done.")
610 subset = None
612 # Check that at most one CalibRepo is marked as default, to fail before
613 # we actually write anything.
614 defaultCalibRepos = [c.path for c in calibs if c.default]
615 if len(defaultCalibRepos) > 1:
616 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.")
618 # Make converters for all Gen2 repos.
619 converters = []
620 # Start with the root repo, which must always be given even if we are
621 # not configured to convert anything from it.
622 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument)
623 converters.append(rootConverter)
624 # Calibration repos are next.
625 for spec in calibs:
626 calibRoot = spec.path
627 if calibRoot is not None:
628 if not os.path.isabs(calibRoot):
629 calibRoot = os.path.join(rootConverter.root, calibRoot)
630 converter = CalibRepoConverter(task=self, root=calibRoot,
631 labels=spec.labels,
632 instrument=self.instrument,
633 mapper=rootConverter.mapper,
634 subset=rootConverter.subset)
635 converters.append(converter)
636 # CalibRepo entries that don't have a path are just there for
637 # curated calibs and maybe to set up a collection pointer; that's
638 # handled further down (after we've done everything we can that
639 # doesn't involve actually writing to the output Gen3 repo).
640 # And now reruns.
641 rerunConverters = {}
642 for spec in reruns:
643 runRoot = spec.path
644 if not os.path.isabs(runRoot):
645 runRoot = os.path.join(rootConverter.root, runRoot)
646 spec.guessCollectionNames(self.instrument, rootConverter.root)
647 converter = StandardRepoConverter(task=self, root=runRoot, run=spec.runName,
648 instrument=self.instrument, subset=rootConverter.subset)
649 converters.append(converter)
650 rerunConverters[spec.runName] = converter
652 # Register the instrument if we're configured to do so.
653 if self.config.doRegisterInstrument:
654 self.instrument.register(self.registry)
656 # Run raw ingest (does nothing if we weren't configured to convert the
657 # 'raw' dataset type).
658 rootConverter.runRawIngest(pool=pool)
660 # Write curated calibrations to all calibration collections where they
661 # were requested (which may be implicit, by passing calibs=None). Also
662 # set up a CHAINED collection that points to the default CALIBRATION
663 # collection if one is needed.
664 for spec in calibs:
665 if spec.curated:
666 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels)
667 if spec.default and spec.labels:
668 # This is guaranteed to be True at most once in the loop by
669 # logic at the top of this method.
670 defaultCalibName = self.instrument.makeCalibrationCollectionName()
671 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED)
672 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels)
673 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION)
674 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName])
676 # Define visits (also does nothing if we weren't configurd to convert
677 # the 'raw' dataset type).
678 rootConverter.runDefineVisits(pool=pool)
680 # Walk Gen2 repos to find datasets convert.
681 for converter in converters:
682 converter.prep()
684 # Insert dimensions that are potentially shared by all Gen2
685 # repositories (and are hence managed directly by the Task, rather
686 # than a converter instance).
687 # This also finishes setting up the (shared) converter.subsets object
688 # that is used to filter data IDs for config.relatedOnly.
689 self.registerUsedSkyMaps(rootConverter.subset)
690 self.registerUsedSkyPix(rootConverter.subset)
692 # Look for datasets, generally by scanning the filesystem.
693 # This requires dimensions to have already been inserted so we can use
694 # dimension information to identify related datasets.
695 for converter in converters:
696 converter.findDatasets()
698 # Expand data IDs.
699 for converter in converters:
700 converter.expandDataIds()
702 # Actually ingest datasets.
703 for converter in converters:
704 converter.ingest()
706 # Perform any post-ingest processing.
707 for converter in converters:
708 converter.finish()
710 # Make the umbrella collection, if desired.
711 if self.config.doMakeUmbrellaCollection:
712 umbrella = self.instrument.makeUmbrellaCollectionName()
713 self.registry.registerCollection(umbrella, CollectionType.CHAINED)
714 children = list(self.registry.getCollectionChain(umbrella))
715 children.extend(rootConverter.getCollectionChain())
716 children.append(self.instrument.makeCalibrationCollectionName())
717 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children:
718 # Ensure the umbrella collection includes the global skymap
719 # collection, even if it's currently empty.
720 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
721 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
722 children.extend(self.config.extraUmbrellaChildren)
723 self.log.info("Defining %s from chain %s.", umbrella, children)
724 self.registry.setCollectionChain(umbrella, children)
726 # Add chained collections for reruns.
727 for spec in reruns:
728 if spec.chainName is not None:
729 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
730 chain = [spec.runName]
731 chain.extend(rerunConverters[spec.runName].getCollectionChain())
732 for parent in spec.parents:
733 chain.append(parent)
734 parentConverter = rerunConverters.get(parent)
735 if parentConverter is not None:
736 chain.extend(parentConverter.getCollectionChain())
737 chain.extend(rootConverter.getCollectionChain())
738 if len(calibs) == 1:
739 # Exactly one calibration repo being converted, so it's
740 # safe-ish to assume that's the one the rerun used.
741 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels))
742 self.log.info("Defining %s from chain %s.", spec.chainName, chain)
743 self.butler3.registry.setCollectionChain(spec.chainName, chain)