Coverage for python/lsst/obs/base/gen2to3/convertRepo.py: 26%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"]
25import fnmatch
26import os
27from dataclasses import dataclass
28from multiprocessing import Pool
29from typing import Iterable, List, Optional, Tuple
31from lsst.daf.butler import Butler as Butler3
32from lsst.daf.butler import ButlerURI, CollectionType, SkyPixDimension
33from lsst.pex.config import Config, ConfigDictField, ConfigurableField, DictField, Field, ListField
34from lsst.pipe.base import Task
35from lsst.skymap import BaseSkyMap, skyMapRegistry
37from .._instrument import Instrument
38from ..defineVisits import DefineVisitsTask
39from ..ingest import RawIngestTask
40from .calibRepoConverter import CalibRepoConverter
41from .repoConverter import ConversionSubset
42from .rootRepoConverter import RootRepoConverter
43from .standardRepoConverter import StandardRepoConverter
46@dataclass
47class ConfiguredSkyMap:
48 """Struct containing information about a skymap that may appear in a Gen2
49 repository.
50 """
52 name: str
53 """Name of the skymap used in Gen3 data IDs.
54 """
56 sha1: bytes
57 """Hash computed by `BaseSkyMap.getSha1`.
58 """
60 instance: BaseSkyMap
61 """Name of the skymap used in Gen3 data IDs.
62 """
64 used: bool = False
65 """Whether this skymap has been found in at least one repository being
66 converted.
67 """
70def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]:
71 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`.
72 Otherwise return ``s`` and `False`.
73 """
74 if s.startswith(prefix):
75 return s[len(prefix) :], True
76 return s, False
79@dataclass
80class Rerun:
81 """Specification for a Gen2 processing-output repository to convert."""
83 path: str
84 """Absolute or relative (to the root repository) path to the Gen2
85 repository (`str`).
86 """
88 runName: Optional[str]
89 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
90 will be inserted into (`str` or `None`).
92 If `None`, a name will be guessed by calling `guessCollectionNames`.
93 """
95 chainName: Optional[str]
96 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
97 combine this repository's datasets with those of its parent repositories
98 (`str` or `None`).
100 If `None`, a name will be guessed by calling `guessCollectionNames`.
101 """
103 parents: List[str]
104 """Collection names associated with parent repositories, used to define the
105 chained collection (`list` [ `str` ]).
107 Ignored if `chainName` is `None`. Runs used in the root repo are
108 automatically included.
109 """
111 def guessCollectionNames(self, instrument: Instrument, root: str) -> None:
112 """Update `runName` and `chainName` with guesses that match Gen3 naming
113 conventions.
115 If `chainName` is not `None`, and `runName` is, `runName` will be set
116 from it. If `runName` is already set, nothing will be changed, and
117 if `chainName` is `None`, no chained collection will be created.
119 Parameters
120 ----------
121 instrument : `Instrument`
122 Instrument object for the repository being converted.
123 root : `str`
124 Path to the root repository. If this is present at the start of
125 ``self.path``, it will be stripped as part of generating the run
126 name.
128 Raises
129 ------
130 ValueError
131 Raised if the appropriate collection names cannot be inferred.
132 """
133 if self.runName is not None:
134 return
135 if self.chainName is None:
136 if os.path.isabs(self.path):
137 rerunURI = ButlerURI(self.path)
138 rootURI = ButlerURI(root)
139 chainName = rerunURI.relative_to(rootURI)
140 if chainName is None:
141 raise ValueError(
142 f"Cannot guess run name collection for rerun at '{self.path}': "
143 f"no clear relationship to root '{root}'."
144 )
145 else:
146 chainName = self.path
147 chainName, _ = _dropPrefix(chainName, "rerun/")
148 chainName, isPersonal = _dropPrefix(chainName, "private/")
149 if isPersonal:
150 chainName = f"u/{chainName}"
151 else:
152 chainName, _ = _dropPrefix(chainName, "shared/")
153 chainName = instrument.makeCollectionName("runs", chainName)
154 self.chainName = chainName
155 self.runName = f"{self.chainName}/direct"
158@dataclass
159class CalibRepo:
160 """Specification for a Gen2 calibration repository to convert."""
162 path: Optional[str]
163 """Absolute or relative (to the root repository) path to the Gen2
164 repository (`str` or `None`).
166 If `None`, no calibration datasets will be converted from Gen2, but
167 curated calibrations may still be written.
168 """
170 curated: bool = True
171 """If `True`, write curated calibrations into the associated
172 ``CALIBRATION`` collection (`bool`).
173 """
175 labels: Tuple[str, ...] = ()
176 """Extra strings to insert into collection names, including both the
177 ``RUN`` collections that datasets are ingested directly into and the
178 ``CALIBRATION`` collection that associates them with validity ranges.
180 An empty tuple will directly populate the default calibration collection
181 for this instrument with the converted datasets, and is incompatible with
182 ``default=False``. This is a good choice for test data repositories where
183 only one ``CALIBRATION`` collection will ever exist. In other cases, this
184 should be a non-empty tuple, so the default calibration collection can
185 actually be a ``CHAINED`` collection pointer that points to the current
186 recommended ``CALIBRATION`` collection.
187 """
189 default: bool = True
190 """If `True`, the created ``CALIBRATION`` collection should be the default
191 for this instrument.
193 This field may only be `True` for one converted calibration collection if
194 more than one is passed to `ConvertRepoTask.run`. It defaults to `True`
195 because the vast majority of the time only one calibration collection is
196 being converted. If ``labels`` is not empty, ``default=True`` will cause
197 a ``CHAINED`` collection that points to the converted ``CALIBRATION``
198 collection to be defined. If ``labels`` is empty, ``default`` *must* be
199 `True` and no ``CHAINED`` collection pointer is necessary.
200 """
202 def __post_init__(self) -> None:
203 if not self.labels and not self.default:
204 raise ValueError("labels=() requires default=True")
207class ConvertRepoSkyMapConfig(Config):
208 """Sub-config used to hold the parameters of a SkyMap.
210 Notes
211 -----
212 This config only needs to exist because we can't put a
213 `~lsst.pex.config.RegistryField` directly inside a
214 `~lsst.pex.config.ConfigDictField`.
216 It needs to have its only field named "skyMap" for compatibility with the
217 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
218 use one config file in an obs package to configure both.
220 This name leads to unfortunate repetition with the field named
221 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
222 unavoidable.
223 """
225 skyMap = skyMapRegistry.makeField(
226 doc="Type and parameters for the SkyMap itself.",
227 default="dodeca",
228 )
231class ConvertRepoConfig(Config):
232 raws = ConfigurableField(
233 "Configuration for subtask responsible for ingesting raws and adding exposure dimension entries.",
234 target=RawIngestTask,
235 )
236 defineVisits = ConfigurableField(
237 "Configuration for the subtask responsible for defining visits from exposures.",
238 target=DefineVisitsTask,
239 )
240 skyMaps = ConfigDictField(
241 "Mapping from Gen3 skymap name to the parameters used to construct a "
242 "BaseSkyMap instance. This will be used to associate names with "
243 "existing skymaps found in the Gen2 repo.",
244 keytype=str,
245 itemtype=ConvertRepoSkyMapConfig,
246 default={},
247 )
248 rootSkyMapName = Field(
249 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
250 "datasets in the root repository when no SkyMap is found there. ",
251 dtype=str,
252 optional=True,
253 default=None,
254 )
255 runs = DictField(
256 "A mapping from dataset type name to the RUN collection they should "
257 "be inserted into. This must include all datasets that can be found "
258 "in the root repository; other repositories will use per-repository "
259 "runs.",
260 keytype=str,
261 itemtype=str,
262 default={},
263 )
264 runsForced = DictField(
265 "Like ``runs``, but is used even when the dataset is present in a "
266 "non-root repository (i.e. rerun), overriding the non-root "
267 "repository's main collection.",
268 keytype=str,
269 itemtype=str,
270 default={
271 "brightObjectMask": "masks",
272 },
273 )
274 storageClasses = DictField(
275 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
276 "or 'persistable') to the Gen3 StorageClass name.",
277 keytype=str,
278 itemtype=str,
279 default={
280 "bias": "ExposureF",
281 "dark": "ExposureF",
282 "flat": "ExposureF",
283 "defects": "Defects",
284 "crosstalk": "CrosstalkCalib",
285 "BaseSkyMap": "SkyMap",
286 "BaseCatalog": "Catalog",
287 "BackgroundList": "Background",
288 "raw": "Exposure",
289 "MultilevelParquetTable": "DataFrame",
290 "ParquetTable": "DataFrame",
291 "SkyWcs": "Wcs",
292 },
293 )
294 formatterClasses = DictField(
295 "Mapping from dataset type name to formatter class. "
296 "By default these are derived from the formatters listed in the"
297 " Gen3 datastore configuration.",
298 keytype=str,
299 itemtype=str,
300 default={},
301 )
302 targetHandlerClasses = DictField(
303 "Mapping from dataset type name to target handler class.", keytype=str, itemtype=str, default={}
304 )
305 doRegisterInstrument = Field(
306 "If True (default), add dimension records for the Instrument and its "
307 "filters and detectors to the registry instead of assuming they are "
308 "already present.",
309 dtype=bool,
310 default=True,
311 )
312 refCats = ListField(
313 "The names of reference catalogs (subdirectories under ref_cats) to be converted",
314 dtype=str,
315 default=[],
316 )
317 fileIgnorePatterns = ListField(
318 "Filename globs that should be ignored instead of being treated as datasets.",
319 dtype=str,
320 default=[
321 "README.txt",
322 "*.*~*",
323 "butler.yaml",
324 "gen3.sqlite3",
325 "registry.sqlite3",
326 "calibRegistry.sqlite3",
327 "_mapper",
328 "_parent",
329 "repositoryCfg.yaml",
330 ],
331 )
332 rawDatasetType = Field(
333 "Gen2 dataset type to use for raw data.",
334 dtype=str,
335 default="raw",
336 )
337 datasetIncludePatterns = ListField(
338 "Glob-style patterns for dataset type names that should be converted.", dtype=str, default=["*"]
339 )
340 datasetIgnorePatterns = ListField(
341 "Glob-style patterns for dataset type names that should not be "
342 "converted despite matching a pattern in datasetIncludePatterns.",
343 dtype=str,
344 default=[],
345 )
346 datasetTemplateOverrides = DictField(
347 "Overrides for Gen2 filename templates, keyed by dataset type. "
348 "This can be used to support conversions of Gen2 repos whose mapper "
349 "templates were modified in obs_* packages since the datasets were "
350 "written.",
351 keytype=str,
352 itemtype=str,
353 default={},
354 )
355 ccdKey = Field(
356 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
357 dtype=str,
358 default="ccd",
359 )
360 relatedOnly = Field(
361 "If True (default), only convert datasets that are related to the "
362 "ingested visits. Ignored unless a list of visits is passed to "
363 "run().",
364 dtype=bool,
365 default=False,
366 )
367 doExpandDataIds = Field(
368 "If True (default), expand data IDs to include extra metadata before "
369 "ingesting them. "
370 "This may be required in order to associate calibration datasets with "
371 "validity ranges or populate file templates, so setting this to False "
372 "is considered advanced usage (and it may not always work). When it "
373 "does, it can provide a considerable speedup.",
374 dtype=bool,
375 default=True,
376 )
377 doMakeUmbrellaCollection = Field(
378 "If True (default), define an '<instrument>/defaults' CHAINED "
379 "collection that includes everything found in the root repo as well "
380 "as the default calibration collection.",
381 dtype=bool,
382 default=True,
383 )
384 extraUmbrellaChildren = ListField(
385 "Additional child collections to include in the umbrella collection. "
386 "Ignored if doMakeUmbrellaCollection=False.",
387 dtype=str,
388 default=[],
389 )
391 @property
392 def transfer(self):
393 return self.raws.transfer
395 @transfer.setter
396 def transfer(self, value):
397 self.raws.transfer = value
399 def setDefaults(self):
400 self.transfer = None
402 def validate(self):
403 super().validate()
404 if self.relatedOnly and not self.doExpandDataIds:
405 raise ValueError("relatedOnly requires doExpandDataIds.")
408class ConvertRepoTask(Task):
409 """A task that converts one or more related Gen2 data repositories to a
410 single Gen3 data repository (with multiple collections).
412 Parameters
413 ----------
414 config: `ConvertRepoConfig`
415 Configuration for this task.
416 butler3: `lsst.daf.butler.Butler`
417 A writeable Gen3 Butler instance that represents the data repository
418 that datasets will be ingested into. If the 'raw' dataset is
419 configured to be included in the conversion, ``butler3.run`` should be
420 set to the name of the collection raws should be ingested into, and
421 ``butler3.collections`` should include a calibration collection from
422 which the ``camera`` dataset can be loaded, unless a calibration repo
423 is converted and ``doWriteCuratedCalibrations`` is `True`.
424 instrument : `lsst.obs.base.Instrument`
425 The Gen3 instrument that should be used for this conversion.
426 dry_run : `bool`, optional
427 If `True` (`False` is default), make no changes to the Gen3 data
428 repository while running as many steps as possible. This option is
429 best used with a read-only ``butler3`` argument to ensure unexpected
430 edge cases respect this argument (and fail rather than write if they
431 do not).
432 **kwargs
433 Other keyword arguments are forwarded to the `Task` constructor.
435 Notes
436 -----
437 Most of the work of converting repositories is delegated to instances of
438 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
439 only state that is relevant for all Gen2 repositories being ingested, while
440 each `RepoConverter` instance holds only state relevant for the conversion
441 of a single Gen2 repository. Both the task and the `RepoConverter`
442 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
443 methods may only be called once on a particular instance.
444 """
446 ConfigClass = ConvertRepoConfig
448 _DefaultName = "convertRepo"
450 def __init__(
451 self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False, **kwargs
452 ):
453 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
454 super().__init__(config, **kwargs)
455 # Make self.butler3 one that doesn't have any collections associated
456 # with it - those are needed by RawIngestTask and DefineVisitsTask, but
457 # we don't want them messing with converted datasets, because those
458 # have their own logic for figuring out which collections to write to.
459 self.butler3 = Butler3(butler=butler3)
460 self.registry = self.butler3.registry
461 self.universe = self.registry.dimensions
462 if self.isDatasetTypeIncluded("raw"):
463 self.makeSubtask("raws", butler=butler3)
464 self.makeSubtask("defineVisits", butler=butler3)
465 else:
466 self.raws = None
467 self.defineVisits = None
468 self.instrument = instrument
469 self._configuredSkyMapsBySha1 = {}
470 self._configuredSkyMapsByName = {}
471 for name, config in self.config.skyMaps.items():
472 instance = config.skyMap.apply()
473 self._populateSkyMapDicts(name, instance)
474 self._usedSkyPix = set()
475 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory()
476 self.translatorFactory.log = self.log.getChild("translators")
477 self.dry_run = dry_run
479 def _reduce_kwargs(self):
480 # Add extra parameters to pickle
481 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument)
483 def _populateSkyMapDicts(self, name, instance):
484 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
485 self._configuredSkyMapsBySha1[struct.sha1] = struct
486 self._configuredSkyMapsByName[struct.name] = struct
488 def isDatasetTypeIncluded(self, datasetTypeName: str):
489 """Return `True` if configuration indicates that the given dataset type
490 should be converted.
492 This method is intended to be called primarily by the
493 `RepoConverter` instances used interally by the task.
495 Parameters
496 ----------
497 datasetTypeName: str
498 Name of the dataset type.
500 Returns
501 -------
502 included : `bool`
503 Whether the dataset should be included in the conversion.
504 """
505 return any(
506 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIncludePatterns
507 ) and not any(
508 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIgnorePatterns
509 )
511 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
512 """Indicate that a repository uses the given SkyMap.
514 This method is intended to be called primarily by the
515 `RepoConverter` instances used interally by the task.
517 Parameters
518 ----------
519 skyMap : `lsst.skymap.BaseSkyMap`
520 SkyMap instance being used, typically retrieved from a Gen2
521 data repository.
522 skyMapName : `str`
523 The name of the gen2 skymap, for error reporting.
525 Returns
526 -------
527 name : `str`
528 The name of the skymap in Gen3 data IDs.
530 Raises
531 ------
532 LookupError
533 Raised if the specified skymap cannot be found.
534 """
535 sha1 = skyMap.getSha1()
536 if sha1 not in self._configuredSkyMapsBySha1:
537 self._populateSkyMapDicts(skyMapName, skyMap)
538 try:
539 struct = self._configuredSkyMapsBySha1[sha1]
540 except KeyError as err:
541 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
542 raise LookupError(msg) from err
543 struct.used = True
544 return struct.name
546 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]):
547 """Register all skymaps that have been marked as used.
549 This method is intended to be called primarily by the
550 `RepoConverter` instances used interally by the task.
552 Parameters
553 ----------
554 subset : `ConversionSubset`, optional
555 Object that will be used to filter converted datasets by data ID.
556 If given, it will be updated with the tracts of this skymap that
557 overlap the visits in the subset.
558 """
559 for struct in self._configuredSkyMapsBySha1.values():
560 if struct.used:
561 if not self.dry_run:
562 try:
563 # If the skymap isn't registerd, this will raise.
564 self.butler3.registry.expandDataId(skymap=struct.name)
565 except LookupError:
566 self.log.info("Registering skymap %s.", struct.name)
567 struct.instance.register(struct.name, self.butler3)
568 if subset is not None and self.config.relatedOnly:
569 subset.addSkyMap(self.registry, struct.name)
571 def useSkyPix(self, dimension: SkyPixDimension):
572 """Indicate that a repository uses the given SkyPix dimension.
574 This method is intended to be called primarily by the
575 `RepoConverter` instances used interally by the task.
577 Parameters
578 ----------
579 dimension : `lsst.daf.butler.SkyPixDimension`
580 Dimension represening a pixelization of the sky.
581 """
582 self._usedSkyPix.add(dimension)
584 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]):
585 """Register all skymaps that have been marked as used.
587 This method is intended to be called primarily by the
588 `RepoConverter` instances used interally by the task.
590 Parameters
591 ----------
592 subset : `ConversionSubset`, optional
593 Object that will be used to filter converted datasets by data ID.
594 If given, it will be updated with the pixelization IDs that
595 overlap the visits in the subset.
596 """
597 if subset is not None and self.config.relatedOnly:
598 for dimension in self._usedSkyPix:
599 subset.addSkyPix(self.registry, dimension)
601 def run(
602 self,
603 root: str,
604 *,
605 calibs: Optional[List[CalibRepo]] = None,
606 reruns: Optional[List[Rerun]] = None,
607 visits: Optional[Iterable[int]] = None,
608 pool: Optional[Pool] = None,
609 processes: int = 1,
610 ):
611 """Convert a group of related data repositories.
613 Parameters
614 ----------
615 root : `str`
616 Complete path to the root Gen2 data repository. This should be
617 a data repository that includes a Gen2 registry and any raw files
618 and/or reference catalogs.
619 calibs : `list` of `CalibRepo`
620 Specifications for Gen2 calibration repos to convert. If `None`
621 (default), curated calibrations only will be written to the default
622 calibration collection for this instrument; set to ``()`` explictly
623 to disable this.
624 reruns : `list` of `Rerun`
625 Specifications for rerun (processing output) repos to convert. If
626 `None` (default), no reruns are converted.
627 visits : iterable of `int`, optional
628 The integer IDs of visits to convert. If not provided, all visits
629 in the Gen2 root repository will be converted.
630 pool : `multiprocessing.Pool`, optional
631 If not `None`, a process pool with which to parallelize some
632 operations.
633 processes : `int`, optional
634 The number of processes to use for conversion.
635 """
636 if pool is None and processes > 1:
637 pool = Pool(processes)
638 if calibs is None:
639 calibs = [CalibRepo(path=None)]
640 elif calibs and not self.config.doExpandDataIds:
641 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.")
642 if visits is not None:
643 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits))
644 else:
645 if self.config.relatedOnly:
646 self.log.warning(
647 "config.relatedOnly is True but all visits are being ingested; "
648 "no filtering will be done."
649 )
650 subset = None
651 if not self.config.doExpandDataIds and self.butler3.datastore.needs_expanded_data_ids(
652 self.config.transfer
653 ):
654 self.log.warning(
655 "config.doExpandDataIds=False but datastore reports that expanded data IDs may be needed.",
656 self.config.transfer,
657 )
659 # Check that at most one CalibRepo is marked as default, to fail before
660 # we actually write anything.
661 defaultCalibRepos = [c.path for c in calibs if c.default]
662 if len(defaultCalibRepos) > 1:
663 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.")
665 # Make converters for all Gen2 repos.
666 converters = []
667 # Start with the root repo, which must always be given even if we are
668 # not configured to convert anything from it.
669 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument)
670 converters.append(rootConverter)
671 # Calibration repos are next.
672 for spec in calibs:
673 calibRoot = spec.path
674 if calibRoot is not None:
675 if not os.path.isabs(calibRoot):
676 calibRoot = os.path.join(rootConverter.root, calibRoot)
677 converter = CalibRepoConverter(
678 task=self,
679 root=calibRoot,
680 labels=spec.labels,
681 instrument=self.instrument,
682 mapper=rootConverter.mapper,
683 subset=rootConverter.subset,
684 )
685 converters.append(converter)
686 # CalibRepo entries that don't have a path are just there for
687 # curated calibs and maybe to set up a collection pointer; that's
688 # handled further down (after we've done everything we can that
689 # doesn't involve actually writing to the output Gen3 repo).
690 # And now reruns.
691 rerunConverters = {}
692 for spec in reruns:
693 runRoot = spec.path
694 if not os.path.isabs(runRoot):
695 runRoot = os.path.join(rootConverter.root, runRoot)
696 spec.guessCollectionNames(self.instrument, rootConverter.root)
697 converter = StandardRepoConverter(
698 task=self,
699 root=runRoot,
700 run=spec.runName,
701 instrument=self.instrument,
702 subset=rootConverter.subset,
703 )
704 converters.append(converter)
705 rerunConverters[spec.runName] = converter
707 # Walk Gen2 repos to find datasets to convert.
708 for converter in converters:
709 converter.prep()
711 # Register the instrument if we're configured to do so.
712 if self.config.doRegisterInstrument and not self.dry_run:
713 self.instrument.register(self.registry)
715 # Run raw ingest (does nothing if we weren't configured to convert the
716 # 'raw' dataset type).
717 rootConverter.runRawIngest(pool=pool)
719 # Write curated calibrations to all calibration collections where they
720 # were requested (which may be implicit, by passing calibs=None). Also
721 # set up a CHAINED collection that points to the default CALIBRATION
722 # collection if one is needed.
723 if not self.dry_run:
724 for spec in calibs:
725 if spec.curated:
726 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels)
727 if spec.default and spec.labels:
728 # This is guaranteed to be True at most once in the loop by
729 # logic at the top of this method.
730 defaultCalibName = self.instrument.makeCalibrationCollectionName()
731 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED)
732 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels)
733 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION)
734 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName])
736 # Define visits (also does nothing if we weren't configurd to convert
737 # the 'raw' dataset type).
738 rootConverter.runDefineVisits(pool=pool)
740 # Insert dimensions that are potentially shared by all Gen2
741 # repositories (and are hence managed directly by the Task, rather
742 # than a converter instance).
743 # This also finishes setting up the (shared) converter.subsets object
744 # that is used to filter data IDs for config.relatedOnly.
745 self.registerUsedSkyMaps(rootConverter.subset)
746 self.registerUsedSkyPix(rootConverter.subset)
748 # Look for datasets, generally by scanning the filesystem.
749 # This requires dimensions to have already been inserted so we can use
750 # dimension information to identify related datasets.
751 for converter in converters:
752 converter.findDatasets()
754 # Expand data IDs.
755 if self.config.doExpandDataIds:
756 for converter in converters:
757 converter.expandDataIds()
759 if self.dry_run:
760 return
762 # Actually ingest datasets.
763 for converter in converters:
764 converter.ingest()
766 # Perform any post-ingest processing.
767 for converter in converters:
768 converter.finish()
770 # Make the umbrella collection, if desired.
771 if self.config.doMakeUmbrellaCollection:
772 umbrella = self.instrument.makeUmbrellaCollectionName()
773 self.registry.registerCollection(umbrella, CollectionType.CHAINED)
774 children = list(self.registry.getCollectionChain(umbrella))
775 children.extend(rootConverter.getCollectionChain())
776 children.append(self.instrument.makeCalibrationCollectionName())
777 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children:
778 # Ensure the umbrella collection includes the global skymap
779 # collection, even if it's currently empty.
780 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
781 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
782 children.extend(self.config.extraUmbrellaChildren)
783 self.log.info("Defining %s from chain %s.", umbrella, children)
784 self.registry.setCollectionChain(umbrella, children)
786 # Add chained collections for reruns.
787 for spec in reruns:
788 if spec.chainName is not None:
789 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
790 chain = [spec.runName]
791 chain.extend(rerunConverters[spec.runName].getCollectionChain())
792 for parent in spec.parents:
793 chain.append(parent)
794 parentConverter = rerunConverters.get(parent)
795 if parentConverter is not None:
796 chain.extend(parentConverter.getCollectionChain())
797 chain.extend(rootConverter.getCollectionChain())
798 if len(calibs) == 1:
799 # Exactly one calibration repo being converted, so it's
800 # safe-ish to assume that's the one the rerun used.
801 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels))
802 self.log.info("Defining %s from chain %s.", spec.chainName, chain)
803 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)