Coverage for python/lsst/obs/base/gen2to3/convertRepo.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"]
25import os
26import fnmatch
27from dataclasses import dataclass
28from multiprocessing import Pool
29from typing import Iterable, Optional, List, Tuple
31from lsst.daf.butler import (
32 Butler as Butler3,
33 ButlerURI,
34 CollectionType,
35 SkyPixDimension
36)
37from lsst.pex.config import Config, ConfigurableField, ConfigDictField, DictField, ListField, Field
38from lsst.pipe.base import Task
39from lsst.skymap import skyMapRegistry, BaseSkyMap
41from ..ingest import RawIngestTask
42from ..defineVisits import DefineVisitsTask
43from .repoConverter import ConversionSubset
44from .rootRepoConverter import RootRepoConverter
45from .calibRepoConverter import CalibRepoConverter
46from .standardRepoConverter import StandardRepoConverter
47from .._instrument import Instrument
50@dataclass
51class ConfiguredSkyMap:
52 """Struct containing information about a skymap that may appear in a Gen2
53 repository.
54 """
56 name: str
57 """Name of the skymap used in Gen3 data IDs.
58 """
60 sha1: bytes
61 """Hash computed by `BaseSkyMap.getSha1`.
62 """
64 instance: BaseSkyMap
65 """Name of the skymap used in Gen3 data IDs.
66 """
68 used: bool = False
69 """Whether this skymap has been found in at least one repository being
70 converted.
71 """
74def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]:
75 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`.
76 Otherwise return ``s`` and `False`.
77 """
78 if s.startswith(prefix):
79 return s[len(prefix):], True
80 return s, False
83@dataclass
84class Rerun:
85 """Specification for a Gen2 processing-output repository to convert.
86 """
88 path: str
89 """Absolute or relative (to the root repository) path to the Gen2
90 repository (`str`).
91 """
93 runName: Optional[str]
94 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
95 will be inserted into (`str` or `None`).
97 If `None`, a name will be guessed by calling `guessCollectionNames`.
98 """
100 chainName: Optional[str]
101 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
102 combine this repository's datasets with those of its parent repositories
103 (`str` or `None`).
105 If `None`, a name will be guessed by calling `guessCollectionNames`.
106 """
108 parents: List[str]
109 """Collection names associated with parent repositories, used to define the
110 chained collection (`list` [ `str` ]).
112 Ignored if `chainName` is `None`. Runs used in the root repo are
113 automatically included.
114 """
116 def guessCollectionNames(self, instrument: Instrument, root: str) -> None:
117 """Update `runName` and `chainName` with guesses that match Gen3 naming
118 conventions.
120 If `chainName` is not `None`, and `runName` is, `runName` will be set
121 from it. If `runName` is already set, nothing will be changed, and
122 if `chainName` is `None`, no chained collection will be created.
124 Parameters
125 ----------
126 instrument : `Instrument`
127 Instrument object for the repository being converted.
128 root : `str`
129 Path to the root repository. If this is present at the start of
130 ``self.path``, it will be stripped as part of generating the run
131 name.
133 Raises
134 ------
135 ValueError
136 Raised if the appropriate collection names cannot be inferred.
137 """
138 if self.runName is not None:
139 return
140 if self.chainName is None:
141 if os.path.isabs(self.path):
142 rerunURI = ButlerURI(self.path)
143 rootURI = ButlerURI(root)
144 chainName = rerunURI.relative_to(rootURI)
145 if chainName is None:
146 raise ValueError(
147 f"Cannot guess run name collection for rerun at '{self.path}': "
148 f"no clear relationship to root '{root}'."
149 )
150 else:
151 chainName = self.path
152 chainName, _ = _dropPrefix(chainName, "rerun/")
153 chainName, isPersonal = _dropPrefix(chainName, "private/")
154 if isPersonal:
155 chainName = f"u/{chainName}"
156 else:
157 chainName, _ = _dropPrefix(chainName, "shared/")
158 chainName = instrument.makeCollectionName("runs", chainName)
159 self.chainName = chainName
160 self.runName = f"{self.chainName}/direct"
163@dataclass
164class CalibRepo:
165 """Specification for a Gen2 calibration repository to convert.
166 """
168 path: Optional[str]
169 """Absolute or relative (to the root repository) path to the Gen2
170 repository (`str` or `None`).
172 If `None`, no calibration datasets will be converted from Gen2, but
173 curated calibrations may still be written.
174 """
176 curated: bool = True
177 """If `True`, write curated calibrations into the associated
178 ``CALIBRATION`` collection (`bool`).
179 """
181 labels: Tuple[str, ...] = ()
182 """Extra strings to insert into collection names, including both the
183 ``RUN`` collections that datasets are ingested directly into and the
184 ``CALIBRATION`` collection that associates them with validity ranges.
186 An empty tuple will directly populate the default calibration collection
187 for this instrument with the converted datasets, and is incompatible with
188 ``default=False``. This is a good choice for test data repositories where
189 only one ``CALIBRATION`` collection will ever exist. In other cases, this
190 should be a non-empty tuple, so the default calibration collection can
191 actually be a ``CHAINED`` collection pointer that points to the current
192 recommended ``CALIBRATION`` collection.
193 """
195 default: bool = True
196 """If `True`, the created ``CALIBRATION`` collection should be the default
197 for this instrument.
199 This field may only be `True` for one converted calibration collection if
200 more than one is passed to `ConvertRepoTask.run`. It defaults to `True`
201 because the vast majority of the time only one calibration collection is
202 being converted. If ``labels`` is not empty, ``default=True`` will cause
203 a ``CHAINED`` collection that points to the converted ``CALIBRATION``
204 collection to be defined. If ``labels`` is empty, ``default`` *must* be
205 `True` and no ``CHAINED`` collection pointer is necessary.
206 """
208 def __post_init__(self) -> None:
209 if not self.labels and not self.default:
210 raise ValueError("labels=() requires default=True")
213class ConvertRepoSkyMapConfig(Config):
214 """Sub-config used to hold the parameters of a SkyMap.
216 Notes
217 -----
218 This config only needs to exist because we can't put a
219 `~lsst.pex.config.RegistryField` directly inside a
220 `~lsst.pex.config.ConfigDictField`.
222 It needs to have its only field named "skyMap" for compatibility with the
223 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
224 use one config file in an obs package to configure both.
226 This name leads to unfortunate repetition with the field named
227 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
228 unavoidable.
229 """
230 skyMap = skyMapRegistry.makeField(
231 doc="Type and parameters for the SkyMap itself.",
232 default="dodeca",
233 )
236class ConvertRepoConfig(Config):
237 raws = ConfigurableField(
238 "Configuration for subtask responsible for ingesting raws and adding "
239 "exposure dimension entries.",
240 target=RawIngestTask,
241 )
242 defineVisits = ConfigurableField(
243 "Configuration for the subtask responsible for defining visits from "
244 "exposures.",
245 target=DefineVisitsTask,
246 )
247 skyMaps = ConfigDictField(
248 "Mapping from Gen3 skymap name to the parameters used to construct a "
249 "BaseSkyMap instance. This will be used to associate names with "
250 "existing skymaps found in the Gen2 repo.",
251 keytype=str,
252 itemtype=ConvertRepoSkyMapConfig,
253 default={}
254 )
255 rootSkyMapName = Field(
256 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
257 "datasets in the root repository when no SkyMap is found there. ",
258 dtype=str,
259 optional=True,
260 default=None,
261 )
262 runs = DictField(
263 "A mapping from dataset type name to the RUN collection they should "
264 "be inserted into. This must include all datasets that can be found "
265 "in the root repository; other repositories will use per-repository "
266 "runs.",
267 keytype=str,
268 itemtype=str,
269 default={},
270 )
271 runsForced = DictField(
272 "Like ``runs``, but is used even when the dataset is present in a "
273 "non-root repository (i.e. rerun), overriding the non-root "
274 "repository's main collection.",
275 keytype=str,
276 itemtype=str,
277 default={
278 "brightObjectMask": "masks",
279 }
280 )
281 storageClasses = DictField(
282 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
283 "or 'persistable') to the Gen3 StorageClass name.",
284 keytype=str,
285 itemtype=str,
286 default={
287 "bias": "ExposureF",
288 "dark": "ExposureF",
289 "flat": "ExposureF",
290 "defects": "Defects",
291 "crosstalk": "CrosstalkCalib",
292 "BaseSkyMap": "SkyMap",
293 "BaseCatalog": "Catalog",
294 "BackgroundList": "Background",
295 "raw": "Exposure",
296 "MultilevelParquetTable": "DataFrame",
297 "ParquetTable": "DataFrame",
298 "SkyWcs": "Wcs",
299 }
300 )
301 formatterClasses = DictField(
302 "Mapping from dataset type name to formatter class. "
303 "By default these are derived from the formatters listed in the"
304 " Gen3 datastore configuration.",
305 keytype=str,
306 itemtype=str,
307 default={}
308 )
309 targetHandlerClasses = DictField(
310 "Mapping from dataset type name to target handler class.",
311 keytype=str,
312 itemtype=str,
313 default={}
314 )
315 doRegisterInstrument = Field(
316 "If True (default), add dimension records for the Instrument and its "
317 "filters and detectors to the registry instead of assuming they are "
318 "already present.",
319 dtype=bool,
320 default=True,
321 )
322 refCats = ListField(
323 "The names of reference catalogs (subdirectories under ref_cats) to "
324 "be converted",
325 dtype=str,
326 default=[]
327 )
328 fileIgnorePatterns = ListField(
329 "Filename globs that should be ignored instead of being treated as "
330 "datasets.",
331 dtype=str,
332 default=["README.txt", "*.*~*", "butler.yaml", "gen3.sqlite3",
333 "registry.sqlite3", "calibRegistry.sqlite3", "_mapper",
334 "_parent", "repositoryCfg.yaml"]
335 )
336 rawDatasetType = Field(
337 "Gen2 dataset type to use for raw data.",
338 dtype=str,
339 default="raw",
340 )
341 datasetIncludePatterns = ListField(
342 "Glob-style patterns for dataset type names that should be converted.",
343 dtype=str,
344 default=["*"]
345 )
346 datasetIgnorePatterns = ListField(
347 "Glob-style patterns for dataset type names that should not be "
348 "converted despite matching a pattern in datasetIncludePatterns.",
349 dtype=str,
350 default=[]
351 )
352 datasetTemplateOverrides = DictField(
353 "Overrides for Gen2 filename templates, keyed by dataset type. "
354 "This can be used to support conversions of Gen2 repos whose mapper "
355 "templates were modified in obs_* packages since the datasets were "
356 "written.",
357 keytype=str,
358 itemtype=str,
359 default={},
360 )
361 ccdKey = Field(
362 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
363 dtype=str,
364 default="ccd",
365 )
366 relatedOnly = Field(
367 "If True (default), only convert datasets that are related to the "
368 "ingested visits. Ignored unless a list of visits is passed to "
369 "run().",
370 dtype=bool,
371 default=False,
372 )
373 doExpandDataIds = Field(
374 "If True (default), expand data IDs to include extra metadata before "
375 "ingesting them. "
376 "This may be required in order to associate calibration datasets with "
377 "validity ranges or populate file templates, so setting this to False "
378 "is considered advanced usage (and it may not always work). When it "
379 "does, it can provide a considerable speedup.",
380 dtype=bool,
381 default=True,
382 )
383 doMakeUmbrellaCollection = Field(
384 "If True (default), define an '<instrument>/defaults' CHAINED "
385 "collection that includes everything found in the root repo as well "
386 "as the default calibration collection.",
387 dtype=bool,
388 default=True,
389 )
390 extraUmbrellaChildren = ListField(
391 "Additional child collections to include in the umbrella collection. "
392 "Ignored if doMakeUmbrellaCollection=False.",
393 dtype=str,
394 default=[]
395 )
397 @property
398 def transfer(self):
399 return self.raws.transfer
401 @transfer.setter
402 def transfer(self, value):
403 self.raws.transfer = value
405 def setDefaults(self):
406 self.transfer = None
408 def validate(self):
409 super().validate()
410 if self.relatedOnly and not self.doExpandDataIds:
411 raise ValueError("relatedOnly requires doExpandDataIds.")
414class ConvertRepoTask(Task):
415 """A task that converts one or more related Gen2 data repositories to a
416 single Gen3 data repository (with multiple collections).
418 Parameters
419 ----------
420 config: `ConvertRepoConfig`
421 Configuration for this task.
422 butler3: `lsst.daf.butler.Butler`
423 A writeable Gen3 Butler instance that represents the data repository
424 that datasets will be ingested into. If the 'raw' dataset is
425 configured to be included in the conversion, ``butler3.run`` should be
426 set to the name of the collection raws should be ingested into, and
427 ``butler3.collections`` should include a calibration collection from
428 which the ``camera`` dataset can be loaded, unless a calibration repo
429 is converted and ``doWriteCuratedCalibrations`` is `True`.
430 instrument : `lsst.obs.base.Instrument`
431 The Gen3 instrument that should be used for this conversion.
432 dry_run : `bool`, optional
433 If `True` (`False` is default), make no changes to the Gen3 data
434 repository while running as many steps as possible. This option is
435 best used with a read-only ``butler3`` argument to ensure unexpected
436 edge cases respect this argument (and fail rather than write if they
437 do not).
438 **kwargs
439 Other keyword arguments are forwarded to the `Task` constructor.
441 Notes
442 -----
443 Most of the work of converting repositories is delegated to instances of
444 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
445 only state that is relevant for all Gen2 repositories being ingested, while
446 each `RepoConverter` instance holds only state relevant for the conversion
447 of a single Gen2 repository. Both the task and the `RepoConverter`
448 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
449 methods may only be called once on a particular instance.
450 """
452 ConfigClass = ConvertRepoConfig
454 _DefaultName = "convertRepo"
456 def __init__(self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False,
457 **kwargs):
458 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
459 super().__init__(config, **kwargs)
460 # Make self.butler3 one that doesn't have any collections associated
461 # with it - those are needed by RawIngestTask and DefineVisitsTask, but
462 # we don't want them messing with converted datasets, because those
463 # have their own logic for figuring out which collections to write to.
464 self.butler3 = Butler3(butler=butler3)
465 self.registry = self.butler3.registry
466 self.universe = self.registry.dimensions
467 if self.isDatasetTypeIncluded("raw"):
468 self.makeSubtask("raws", butler=butler3)
469 self.makeSubtask("defineVisits", butler=butler3)
470 else:
471 self.raws = None
472 self.defineVisits = None
473 self.instrument = instrument
474 self._configuredSkyMapsBySha1 = {}
475 self._configuredSkyMapsByName = {}
476 for name, config in self.config.skyMaps.items():
477 instance = config.skyMap.apply()
478 self._populateSkyMapDicts(name, instance)
479 self._usedSkyPix = set()
480 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory()
481 self.translatorFactory.log = self.log.getChild("translators")
482 self.dry_run = dry_run
484 def _reduce_kwargs(self):
485 # Add extra parameters to pickle
486 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument)
488 def _populateSkyMapDicts(self, name, instance):
489 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
490 self._configuredSkyMapsBySha1[struct.sha1] = struct
491 self._configuredSkyMapsByName[struct.name] = struct
493 def isDatasetTypeIncluded(self, datasetTypeName: str):
494 """Return `True` if configuration indicates that the given dataset type
495 should be converted.
497 This method is intended to be called primarily by the
498 `RepoConverter` instances used interally by the task.
500 Parameters
501 ----------
502 datasetTypeName: str
503 Name of the dataset type.
505 Returns
506 -------
507 included : `bool`
508 Whether the dataset should be included in the conversion.
509 """
510 return (
511 any(fnmatch.fnmatchcase(datasetTypeName, pattern)
512 for pattern in self.config.datasetIncludePatterns)
513 and not any(fnmatch.fnmatchcase(datasetTypeName, pattern)
514 for pattern in self.config.datasetIgnorePatterns)
515 )
517 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
518 """Indicate that a repository uses the given SkyMap.
520 This method is intended to be called primarily by the
521 `RepoConverter` instances used interally by the task.
523 Parameters
524 ----------
525 skyMap : `lsst.skymap.BaseSkyMap`
526 SkyMap instance being used, typically retrieved from a Gen2
527 data repository.
528 skyMapName : `str`
529 The name of the gen2 skymap, for error reporting.
531 Returns
532 -------
533 name : `str`
534 The name of the skymap in Gen3 data IDs.
536 Raises
537 ------
538 LookupError
539 Raised if the specified skymap cannot be found.
540 """
541 sha1 = skyMap.getSha1()
542 if sha1 not in self._configuredSkyMapsBySha1:
543 self._populateSkyMapDicts(skyMapName, skyMap)
544 try:
545 struct = self._configuredSkyMapsBySha1[sha1]
546 except KeyError as err:
547 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
548 raise LookupError(msg) from err
549 struct.used = True
550 return struct.name
552 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]):
553 """Register all skymaps that have been marked as used.
555 This method is intended to be called primarily by the
556 `RepoConverter` instances used interally by the task.
558 Parameters
559 ----------
560 subset : `ConversionSubset`, optional
561 Object that will be used to filter converted datasets by data ID.
562 If given, it will be updated with the tracts of this skymap that
563 overlap the visits in the subset.
564 """
565 for struct in self._configuredSkyMapsBySha1.values():
566 if struct.used:
567 if not self.dry_run:
568 try:
569 # If the skymap isn't registerd, this will raise.
570 self.butler3.registry.expandDataId(skymap=struct.name)
571 except LookupError:
572 self.log.info("Registering skymap %s.", struct.name)
573 struct.instance.register(struct.name, self.butler3)
574 if subset is not None and self.config.relatedOnly:
575 subset.addSkyMap(self.registry, struct.name)
577 def useSkyPix(self, dimension: SkyPixDimension):
578 """Indicate that a repository uses the given SkyPix dimension.
580 This method is intended to be called primarily by the
581 `RepoConverter` instances used interally by the task.
583 Parameters
584 ----------
585 dimension : `lsst.daf.butler.SkyPixDimension`
586 Dimension represening a pixelization of the sky.
587 """
588 self._usedSkyPix.add(dimension)
590 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]):
591 """Register all skymaps that have been marked as used.
593 This method is intended to be called primarily by the
594 `RepoConverter` instances used interally by the task.
596 Parameters
597 ----------
598 subset : `ConversionSubset`, optional
599 Object that will be used to filter converted datasets by data ID.
600 If given, it will be updated with the pixelization IDs that
601 overlap the visits in the subset.
602 """
603 if subset is not None and self.config.relatedOnly:
604 for dimension in self._usedSkyPix:
605 subset.addSkyPix(self.registry, dimension)
607 def run(self, root: str, *,
608 calibs: Optional[List[CalibRepo]] = None,
609 reruns: Optional[List[Rerun]] = None,
610 visits: Optional[Iterable[int]] = None,
611 pool: Optional[Pool] = None,
612 processes: int = 1):
613 """Convert a group of related data repositories.
615 Parameters
616 ----------
617 root : `str`
618 Complete path to the root Gen2 data repository. This should be
619 a data repository that includes a Gen2 registry and any raw files
620 and/or reference catalogs.
621 calibs : `list` of `CalibRepo`
622 Specifications for Gen2 calibration repos to convert. If `None`
623 (default), curated calibrations only will be written to the default
624 calibration collection for this instrument; set to ``()`` explictly
625 to disable this.
626 reruns : `list` of `Rerun`
627 Specifications for rerun (processing output) repos to convert. If
628 `None` (default), no reruns are converted.
629 visits : iterable of `int`, optional
630 The integer IDs of visits to convert. If not provided, all visits
631 in the Gen2 root repository will be converted.
632 pool : `multiprocessing.Pool`, optional
633 If not `None`, a process pool with which to parallelize some
634 operations.
635 processes : `int`, optional
636 The number of processes to use for conversion.
637 """
638 if pool is None and processes > 1:
639 pool = Pool(processes)
640 if calibs is None:
641 calibs = [CalibRepo(path=None)]
642 elif calibs and not self.config.doExpandDataIds:
643 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.")
644 if visits is not None:
645 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits))
646 else:
647 if self.config.relatedOnly:
648 self.log.warn("config.relatedOnly is True but all visits are being ingested; "
649 "no filtering will be done.")
650 subset = None
651 if (not self.config.doExpandDataIds
652 and self.butler3.datastore.needs_expanded_data_ids(self.config.transfer)):
653 self.log.warn("config.doExpandDataIds=False but datastore reports that expanded data "
654 "IDs may be needed.",
655 self.config.transfer)
657 # Check that at most one CalibRepo is marked as default, to fail before
658 # we actually write anything.
659 defaultCalibRepos = [c.path for c in calibs if c.default]
660 if len(defaultCalibRepos) > 1:
661 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.")
663 # Make converters for all Gen2 repos.
664 converters = []
665 # Start with the root repo, which must always be given even if we are
666 # not configured to convert anything from it.
667 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument)
668 converters.append(rootConverter)
669 # Calibration repos are next.
670 for spec in calibs:
671 calibRoot = spec.path
672 if calibRoot is not None:
673 if not os.path.isabs(calibRoot):
674 calibRoot = os.path.join(rootConverter.root, calibRoot)
675 converter = CalibRepoConverter(task=self, root=calibRoot,
676 labels=spec.labels,
677 instrument=self.instrument,
678 mapper=rootConverter.mapper,
679 subset=rootConverter.subset)
680 converters.append(converter)
681 # CalibRepo entries that don't have a path are just there for
682 # curated calibs and maybe to set up a collection pointer; that's
683 # handled further down (after we've done everything we can that
684 # doesn't involve actually writing to the output Gen3 repo).
685 # And now reruns.
686 rerunConverters = {}
687 for spec in reruns:
688 runRoot = spec.path
689 if not os.path.isabs(runRoot):
690 runRoot = os.path.join(rootConverter.root, runRoot)
691 spec.guessCollectionNames(self.instrument, rootConverter.root)
692 converter = StandardRepoConverter(task=self, root=runRoot, run=spec.runName,
693 instrument=self.instrument, subset=rootConverter.subset)
694 converters.append(converter)
695 rerunConverters[spec.runName] = converter
697 # Walk Gen2 repos to find datasets to convert.
698 for converter in converters:
699 converter.prep()
701 # Register the instrument if we're configured to do so.
702 if self.config.doRegisterInstrument and not self.dry_run:
703 self.instrument.register(self.registry)
705 # Run raw ingest (does nothing if we weren't configured to convert the
706 # 'raw' dataset type).
707 rootConverter.runRawIngest(pool=pool)
709 # Write curated calibrations to all calibration collections where they
710 # were requested (which may be implicit, by passing calibs=None). Also
711 # set up a CHAINED collection that points to the default CALIBRATION
712 # collection if one is needed.
713 if not self.dry_run:
714 for spec in calibs:
715 if spec.curated:
716 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels)
717 if spec.default and spec.labels:
718 # This is guaranteed to be True at most once in the loop by
719 # logic at the top of this method.
720 defaultCalibName = self.instrument.makeCalibrationCollectionName()
721 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED)
722 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels)
723 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION)
724 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName])
726 # Define visits (also does nothing if we weren't configurd to convert
727 # the 'raw' dataset type).
728 rootConverter.runDefineVisits(pool=pool)
730 # Insert dimensions that are potentially shared by all Gen2
731 # repositories (and are hence managed directly by the Task, rather
732 # than a converter instance).
733 # This also finishes setting up the (shared) converter.subsets object
734 # that is used to filter data IDs for config.relatedOnly.
735 self.registerUsedSkyMaps(rootConverter.subset)
736 self.registerUsedSkyPix(rootConverter.subset)
738 # Look for datasets, generally by scanning the filesystem.
739 # This requires dimensions to have already been inserted so we can use
740 # dimension information to identify related datasets.
741 for converter in converters:
742 converter.findDatasets()
744 # Expand data IDs.
745 if self.config.doExpandDataIds:
746 for converter in converters:
747 converter.expandDataIds()
749 if self.dry_run:
750 return
752 # Actually ingest datasets.
753 for converter in converters:
754 converter.ingest()
756 # Perform any post-ingest processing.
757 for converter in converters:
758 converter.finish()
760 # Make the umbrella collection, if desired.
761 if self.config.doMakeUmbrellaCollection:
762 umbrella = self.instrument.makeUmbrellaCollectionName()
763 self.registry.registerCollection(umbrella, CollectionType.CHAINED)
764 children = list(self.registry.getCollectionChain(umbrella))
765 children.extend(rootConverter.getCollectionChain())
766 children.append(self.instrument.makeCalibrationCollectionName())
767 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children:
768 # Ensure the umbrella collection includes the global skymap
769 # collection, even if it's currently empty.
770 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
771 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
772 children.extend(self.config.extraUmbrellaChildren)
773 self.log.info("Defining %s from chain %s.", umbrella, children)
774 self.registry.setCollectionChain(umbrella, children)
776 # Add chained collections for reruns.
777 for spec in reruns:
778 if spec.chainName is not None:
779 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
780 chain = [spec.runName]
781 chain.extend(rerunConverters[spec.runName].getCollectionChain())
782 for parent in spec.parents:
783 chain.append(parent)
784 parentConverter = rerunConverters.get(parent)
785 if parentConverter is not None:
786 chain.extend(parentConverter.getCollectionChain())
787 chain.extend(rootConverter.getCollectionChain())
788 if len(calibs) == 1:
789 # Exactly one calibration repo being converted, so it's
790 # safe-ish to assume that's the one the rerun used.
791 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels))
792 self.log.info("Defining %s from chain %s.", spec.chainName, chain)
793 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)