Coverage for python/lsst/obs/base/gen2to3/convertRepo.py: 26%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["CalibRepo", "ConvertRepoConfig", "ConvertRepoTask", "ConvertRepoSkyMapConfig", "Rerun"]
25import fnmatch
26import os
27from dataclasses import dataclass
28from multiprocessing import Pool
29from typing import Iterable, List, Optional, Tuple
31from lsst.daf.butler import Butler as Butler3
32from lsst.daf.butler import CollectionType, SkyPixDimension
33from lsst.pex.config import Config, ConfigDictField, ConfigurableField, DictField, Field, ListField
34from lsst.pipe.base import Task
35from lsst.resources import ResourcePath
36from lsst.skymap import BaseSkyMap, skyMapRegistry
38from .._instrument import Instrument
39from ..defineVisits import DefineVisitsTask
40from ..ingest import RawIngestTask
41from .calibRepoConverter import CalibRepoConverter
42from .repoConverter import ConversionSubset
43from .rootRepoConverter import RootRepoConverter
44from .standardRepoConverter import StandardRepoConverter
47@dataclass
48class ConfiguredSkyMap:
49 """Struct containing information about a skymap that may appear in a Gen2
50 repository.
51 """
53 name: str
54 """Name of the skymap used in Gen3 data IDs.
55 """
57 sha1: bytes
58 """Hash computed by `BaseSkyMap.getSha1`.
59 """
61 instance: BaseSkyMap
62 """Name of the skymap used in Gen3 data IDs.
63 """
65 used: bool = False
66 """Whether this skymap has been found in at least one repository being
67 converted.
68 """
71def _dropPrefix(s: str, prefix: str) -> Tuple[str, bool]:
72 """If ``s`` starts with ``prefix``, return the rest of ``s`` and `True`.
73 Otherwise return ``s`` and `False`.
74 """
75 if s.startswith(prefix):
76 return s[len(prefix) :], True
77 return s, False
80@dataclass
81class Rerun:
82 """Specification for a Gen2 processing-output repository to convert."""
84 path: str
85 """Absolute or relative (to the root repository) path to the Gen2
86 repository (`str`).
87 """
89 runName: Optional[str]
90 """Name of the `~lsst.daf.butler.CollectionType.RUN` collection datasets
91 will be inserted into (`str` or `None`).
93 If `None`, a name will be guessed by calling `guessCollectionNames`.
94 """
96 chainName: Optional[str]
97 """Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that will
98 combine this repository's datasets with those of its parent repositories
99 (`str` or `None`).
101 If `None`, a name will be guessed by calling `guessCollectionNames`.
102 """
104 parents: List[str]
105 """Collection names associated with parent repositories, used to define the
106 chained collection (`list` [ `str` ]).
108 Ignored if `chainName` is `None`. Runs used in the root repo are
109 automatically included.
110 """
112 def guessCollectionNames(self, instrument: Instrument, root: str) -> None:
113 """Update `runName` and `chainName` with guesses that match Gen3 naming
114 conventions.
116 If `chainName` is not `None`, and `runName` is, `runName` will be set
117 from it. If `runName` is already set, nothing will be changed, and
118 if `chainName` is `None`, no chained collection will be created.
120 Parameters
121 ----------
122 instrument : `Instrument`
123 Instrument object for the repository being converted.
124 root : `str`
125 Path to the root repository. If this is present at the start of
126 ``self.path``, it will be stripped as part of generating the run
127 name.
129 Raises
130 ------
131 ValueError
132 Raised if the appropriate collection names cannot be inferred.
133 """
134 if self.runName is not None:
135 return
136 if self.chainName is None:
137 if os.path.isabs(self.path):
138 rerunURI = ResourcePath(self.path)
139 rootURI = ResourcePath(root)
140 chainName = rerunURI.relative_to(rootURI)
141 if chainName is None:
142 raise ValueError(
143 f"Cannot guess run name collection for rerun at '{self.path}': "
144 f"no clear relationship to root '{root}'."
145 )
146 else:
147 chainName = self.path
148 chainName, _ = _dropPrefix(chainName, "rerun/")
149 chainName, isPersonal = _dropPrefix(chainName, "private/")
150 if isPersonal:
151 chainName = f"u/{chainName}"
152 else:
153 chainName, _ = _dropPrefix(chainName, "shared/")
154 chainName = instrument.makeCollectionName("runs", chainName)
155 self.chainName = chainName
156 self.runName = f"{self.chainName}/direct"
159@dataclass
160class CalibRepo:
161 """Specification for a Gen2 calibration repository to convert."""
163 path: Optional[str]
164 """Absolute or relative (to the root repository) path to the Gen2
165 repository (`str` or `None`).
167 If `None`, no calibration datasets will be converted from Gen2, but
168 curated calibrations may still be written.
169 """
171 curated: bool = True
172 """If `True`, write curated calibrations into the associated
173 ``CALIBRATION`` collection (`bool`).
174 """
176 labels: Tuple[str, ...] = ()
177 """Extra strings to insert into collection names, including both the
178 ``RUN`` collections that datasets are ingested directly into and the
179 ``CALIBRATION`` collection that associates them with validity ranges.
181 An empty tuple will directly populate the default calibration collection
182 for this instrument with the converted datasets, and is incompatible with
183 ``default=False``. This is a good choice for test data repositories where
184 only one ``CALIBRATION`` collection will ever exist. In other cases, this
185 should be a non-empty tuple, so the default calibration collection can
186 actually be a ``CHAINED`` collection pointer that points to the current
187 recommended ``CALIBRATION`` collection.
188 """
190 default: bool = True
191 """If `True`, the created ``CALIBRATION`` collection should be the default
192 for this instrument.
194 This field may only be `True` for one converted calibration collection if
195 more than one is passed to `ConvertRepoTask.run`. It defaults to `True`
196 because the vast majority of the time only one calibration collection is
197 being converted. If ``labels`` is not empty, ``default=True`` will cause
198 a ``CHAINED`` collection that points to the converted ``CALIBRATION``
199 collection to be defined. If ``labels`` is empty, ``default`` *must* be
200 `True` and no ``CHAINED`` collection pointer is necessary.
201 """
203 def __post_init__(self) -> None:
204 if not self.labels and not self.default:
205 raise ValueError("labels=() requires default=True")
208class ConvertRepoSkyMapConfig(Config):
209 """Sub-config used to hold the parameters of a SkyMap.
211 Notes
212 -----
213 This config only needs to exist because we can't put a
214 `~lsst.pex.config.RegistryField` directly inside a
215 `~lsst.pex.config.ConfigDictField`.
217 It needs to have its only field named "skyMap" for compatibility with the
218 configuration of `lsst.pipe.tasks.MakeSkyMapTask`, which we want so we can
219 use one config file in an obs package to configure both.
221 This name leads to unfortunate repetition with the field named
222 "skymap" that holds it - "skyMap[name].skyMap" - but that seems
223 unavoidable.
224 """
226 skyMap = skyMapRegistry.makeField(
227 doc="Type and parameters for the SkyMap itself.",
228 default="dodeca",
229 )
232class ConvertRepoConfig(Config):
233 raws = ConfigurableField(
234 "Configuration for subtask responsible for ingesting raws and adding exposure dimension entries.",
235 target=RawIngestTask,
236 )
237 defineVisits = ConfigurableField(
238 "Configuration for the subtask responsible for defining visits from exposures.",
239 target=DefineVisitsTask,
240 )
241 skyMaps = ConfigDictField(
242 "Mapping from Gen3 skymap name to the parameters used to construct a "
243 "BaseSkyMap instance. This will be used to associate names with "
244 "existing skymaps found in the Gen2 repo.",
245 keytype=str,
246 itemtype=ConvertRepoSkyMapConfig,
247 default={},
248 )
249 rootSkyMapName = Field(
250 "Name of a Gen3 skymap (an entry in ``self.skyMaps``) to assume for "
251 "datasets in the root repository when no SkyMap is found there. ",
252 dtype=str,
253 optional=True,
254 default=None,
255 )
256 runs = DictField(
257 "A mapping from dataset type name to the RUN collection they should "
258 "be inserted into. This must include all datasets that can be found "
259 "in the root repository; other repositories will use per-repository "
260 "runs.",
261 keytype=str,
262 itemtype=str,
263 default={},
264 )
265 runsForced = DictField(
266 "Like ``runs``, but is used even when the dataset is present in a "
267 "non-root repository (i.e. rerun), overriding the non-root "
268 "repository's main collection.",
269 keytype=str,
270 itemtype=str,
271 default={
272 "brightObjectMask": "masks",
273 },
274 )
275 storageClasses = DictField(
276 "Mapping from dataset type name or Gen2 policy entry (e.g. 'python' "
277 "or 'persistable') to the Gen3 StorageClass name.",
278 keytype=str,
279 itemtype=str,
280 default={
281 "bias": "ExposureF",
282 "dark": "ExposureF",
283 "flat": "ExposureF",
284 "defects": "Defects",
285 "crosstalk": "CrosstalkCalib",
286 "BaseSkyMap": "SkyMap",
287 "BaseCatalog": "Catalog",
288 "BackgroundList": "Background",
289 "raw": "Exposure",
290 "MultilevelParquetTable": "DataFrame",
291 "ParquetTable": "DataFrame",
292 "SkyWcs": "Wcs",
293 },
294 )
295 formatterClasses = DictField(
296 "Mapping from dataset type name to formatter class. "
297 "By default these are derived from the formatters listed in the"
298 " Gen3 datastore configuration.",
299 keytype=str,
300 itemtype=str,
301 default={},
302 )
303 targetHandlerClasses = DictField(
304 "Mapping from dataset type name to target handler class.", keytype=str, itemtype=str, default={}
305 )
306 doRegisterInstrument = Field(
307 "If True (default), add dimension records for the Instrument and its "
308 "filters and detectors to the registry instead of assuming they are "
309 "already present.",
310 dtype=bool,
311 default=True,
312 )
313 refCats = ListField(
314 "The names of reference catalogs (subdirectories under ref_cats) to be converted",
315 dtype=str,
316 default=[],
317 )
318 fileIgnorePatterns = ListField(
319 "Filename globs that should be ignored instead of being treated as datasets.",
320 dtype=str,
321 default=[
322 "README.txt",
323 "*.*~*",
324 "butler.yaml",
325 "gen3.sqlite3",
326 "registry.sqlite3",
327 "calibRegistry.sqlite3",
328 "_mapper",
329 "_parent",
330 "repositoryCfg.yaml",
331 ],
332 )
333 rawDatasetType = Field(
334 "Gen2 dataset type to use for raw data.",
335 dtype=str,
336 default="raw",
337 )
338 datasetIncludePatterns = ListField(
339 "Glob-style patterns for dataset type names that should be converted.", dtype=str, default=["*"]
340 )
341 datasetIgnorePatterns = ListField(
342 "Glob-style patterns for dataset type names that should not be "
343 "converted despite matching a pattern in datasetIncludePatterns.",
344 dtype=str,
345 default=[],
346 )
347 datasetTemplateOverrides = DictField(
348 "Overrides for Gen2 filename templates, keyed by dataset type. "
349 "This can be used to support conversions of Gen2 repos whose mapper "
350 "templates were modified in obs_* packages since the datasets were "
351 "written.",
352 keytype=str,
353 itemtype=str,
354 default={},
355 )
356 ccdKey = Field(
357 "Key used for the Gen2 equivalent of 'detector' in data IDs.",
358 dtype=str,
359 default="ccd",
360 )
361 relatedOnly = Field(
362 "If True (default), only convert datasets that are related to the "
363 "ingested visits. Ignored unless a list of visits is passed to "
364 "run().",
365 dtype=bool,
366 default=False,
367 )
368 doExpandDataIds = Field(
369 "If True (default), expand data IDs to include extra metadata before "
370 "ingesting them. "
371 "This may be required in order to associate calibration datasets with "
372 "validity ranges or populate file templates, so setting this to False "
373 "is considered advanced usage (and it may not always work). When it "
374 "does, it can provide a considerable speedup.",
375 dtype=bool,
376 default=True,
377 )
378 doMakeUmbrellaCollection = Field(
379 "If True (default), define an '<instrument>/defaults' CHAINED "
380 "collection that includes everything found in the root repo as well "
381 "as the default calibration collection.",
382 dtype=bool,
383 default=True,
384 )
385 extraUmbrellaChildren = ListField(
386 "Additional child collections to include in the umbrella collection. "
387 "Ignored if doMakeUmbrellaCollection=False.",
388 dtype=str,
389 default=[],
390 )
392 @property
393 def transfer(self):
394 return self.raws.transfer
396 @transfer.setter
397 def transfer(self, value):
398 self.raws.transfer = value
400 def setDefaults(self):
401 self.transfer = None
403 def validate(self):
404 super().validate()
405 if self.relatedOnly and not self.doExpandDataIds:
406 raise ValueError("relatedOnly requires doExpandDataIds.")
409class ConvertRepoTask(Task):
410 """A task that converts one or more related Gen2 data repositories to a
411 single Gen3 data repository (with multiple collections).
413 Parameters
414 ----------
415 config: `ConvertRepoConfig`
416 Configuration for this task.
417 butler3: `lsst.daf.butler.Butler`
418 A writeable Gen3 Butler instance that represents the data repository
419 that datasets will be ingested into. If the 'raw' dataset is
420 configured to be included in the conversion, ``butler3.run`` should be
421 set to the name of the collection raws should be ingested into, and
422 ``butler3.collections`` should include a calibration collection from
423 which the ``camera`` dataset can be loaded, unless a calibration repo
424 is converted and ``doWriteCuratedCalibrations`` is `True`.
425 instrument : `lsst.obs.base.Instrument`
426 The Gen3 instrument that should be used for this conversion.
427 dry_run : `bool`, optional
428 If `True` (`False` is default), make no changes to the Gen3 data
429 repository while running as many steps as possible. This option is
430 best used with a read-only ``butler3`` argument to ensure unexpected
431 edge cases respect this argument (and fail rather than write if they
432 do not).
433 **kwargs
434 Other keyword arguments are forwarded to the `Task` constructor.
436 Notes
437 -----
438 Most of the work of converting repositories is delegated to instances of
439 the `RepoConverter` hierarchy. The `ConvertRepoTask` instance itself holds
440 only state that is relevant for all Gen2 repositories being ingested, while
441 each `RepoConverter` instance holds only state relevant for the conversion
442 of a single Gen2 repository. Both the task and the `RepoConverter`
443 instances are single use; `ConvertRepoTask.run` and most `RepoConverter`
444 methods may only be called once on a particular instance.
445 """
447 ConfigClass = ConvertRepoConfig
449 _DefaultName = "convertRepo"
451 def __init__(
452 self, config=None, *, butler3: Butler3, instrument: Instrument, dry_run: bool = False, **kwargs
453 ):
454 config.validate() # Not a CmdlineTask nor PipelineTask, so have to validate the config here.
455 super().__init__(config, **kwargs)
456 # Make self.butler3 one that doesn't have any collections associated
457 # with it - those are needed by RawIngestTask and DefineVisitsTask, but
458 # we don't want them messing with converted datasets, because those
459 # have their own logic for figuring out which collections to write to.
460 self.butler3 = Butler3(butler=butler3)
461 self.registry = self.butler3.registry
462 self.universe = self.registry.dimensions
463 if self.isDatasetTypeIncluded("raw"):
464 self.makeSubtask("raws", butler=butler3)
465 self.makeSubtask("defineVisits", butler=butler3)
466 else:
467 self.raws = None
468 self.defineVisits = None
469 self.instrument = instrument
470 self._configuredSkyMapsBySha1 = {}
471 self._configuredSkyMapsByName = {}
472 for name, config in self.config.skyMaps.items():
473 instance = config.skyMap.apply()
474 self._populateSkyMapDicts(name, instance)
475 self._usedSkyPix = set()
476 self.translatorFactory = self.instrument.makeDataIdTranslatorFactory()
477 self.translatorFactory.log = self.log.getChild("translators")
478 self.dry_run = dry_run
480 def _reduce_kwargs(self):
481 # Add extra parameters to pickle
482 return dict(**super()._reduce_kwargs(), butler3=self.butler3, instrument=self.instrument)
484 def _populateSkyMapDicts(self, name, instance):
485 struct = ConfiguredSkyMap(name=name, sha1=instance.getSha1(), instance=instance)
486 self._configuredSkyMapsBySha1[struct.sha1] = struct
487 self._configuredSkyMapsByName[struct.name] = struct
489 def isDatasetTypeIncluded(self, datasetTypeName: str):
490 """Return `True` if configuration indicates that the given dataset type
491 should be converted.
493 This method is intended to be called primarily by the
494 `RepoConverter` instances used interally by the task.
496 Parameters
497 ----------
498 datasetTypeName: str
499 Name of the dataset type.
501 Returns
502 -------
503 included : `bool`
504 Whether the dataset should be included in the conversion.
505 """
506 return any(
507 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIncludePatterns
508 ) and not any(
509 fnmatch.fnmatchcase(datasetTypeName, pattern) for pattern in self.config.datasetIgnorePatterns
510 )
512 def useSkyMap(self, skyMap: BaseSkyMap, skyMapName: str) -> str:
513 """Indicate that a repository uses the given SkyMap.
515 This method is intended to be called primarily by the
516 `RepoConverter` instances used interally by the task.
518 Parameters
519 ----------
520 skyMap : `lsst.skymap.BaseSkyMap`
521 SkyMap instance being used, typically retrieved from a Gen2
522 data repository.
523 skyMapName : `str`
524 The name of the gen2 skymap, for error reporting.
526 Returns
527 -------
528 name : `str`
529 The name of the skymap in Gen3 data IDs.
531 Raises
532 ------
533 LookupError
534 Raised if the specified skymap cannot be found.
535 """
536 sha1 = skyMap.getSha1()
537 if sha1 not in self._configuredSkyMapsBySha1:
538 self._populateSkyMapDicts(skyMapName, skyMap)
539 try:
540 struct = self._configuredSkyMapsBySha1[sha1]
541 except KeyError as err:
542 msg = f"SkyMap '{skyMapName}' with sha1={sha1} not included in configuration."
543 raise LookupError(msg) from err
544 struct.used = True
545 return struct.name
547 def registerUsedSkyMaps(self, subset: Optional[ConversionSubset]):
548 """Register all skymaps that have been marked as used.
550 This method is intended to be called primarily by the
551 `RepoConverter` instances used interally by the task.
553 Parameters
554 ----------
555 subset : `ConversionSubset`, optional
556 Object that will be used to filter converted datasets by data ID.
557 If given, it will be updated with the tracts of this skymap that
558 overlap the visits in the subset.
559 """
560 for struct in self._configuredSkyMapsBySha1.values():
561 if struct.used:
562 if not self.dry_run:
563 try:
564 # If the skymap isn't registerd, this will raise.
565 self.butler3.registry.expandDataId(skymap=struct.name)
566 except LookupError:
567 self.log.info("Registering skymap %s.", struct.name)
568 struct.instance.register(struct.name, self.butler3)
569 if subset is not None and self.config.relatedOnly:
570 subset.addSkyMap(self.registry, struct.name)
572 def useSkyPix(self, dimension: SkyPixDimension):
573 """Indicate that a repository uses the given SkyPix dimension.
575 This method is intended to be called primarily by the
576 `RepoConverter` instances used interally by the task.
578 Parameters
579 ----------
580 dimension : `lsst.daf.butler.SkyPixDimension`
581 Dimension represening a pixelization of the sky.
582 """
583 self._usedSkyPix.add(dimension)
585 def registerUsedSkyPix(self, subset: Optional[ConversionSubset]):
586 """Register all skymaps that have been marked as used.
588 This method is intended to be called primarily by the
589 `RepoConverter` instances used interally by the task.
591 Parameters
592 ----------
593 subset : `ConversionSubset`, optional
594 Object that will be used to filter converted datasets by data ID.
595 If given, it will be updated with the pixelization IDs that
596 overlap the visits in the subset.
597 """
598 if subset is not None and self.config.relatedOnly:
599 for dimension in self._usedSkyPix:
600 subset.addSkyPix(self.registry, dimension)
602 def run(
603 self,
604 root: str,
605 *,
606 calibs: Optional[List[CalibRepo]] = None,
607 reruns: Optional[List[Rerun]] = None,
608 visits: Optional[Iterable[int]] = None,
609 pool: Optional[Pool] = None,
610 processes: int = 1,
611 ):
612 """Convert a group of related data repositories.
614 Parameters
615 ----------
616 root : `str`
617 Complete path to the root Gen2 data repository. This should be
618 a data repository that includes a Gen2 registry and any raw files
619 and/or reference catalogs.
620 calibs : `list` of `CalibRepo`
621 Specifications for Gen2 calibration repos to convert. If `None`
622 (default), curated calibrations only will be written to the default
623 calibration collection for this instrument; set to ``()`` explictly
624 to disable this.
625 reruns : `list` of `Rerun`
626 Specifications for rerun (processing output) repos to convert. If
627 `None` (default), no reruns are converted.
628 visits : iterable of `int`, optional
629 The integer IDs of visits to convert. If not provided, all visits
630 in the Gen2 root repository will be converted.
631 pool : `multiprocessing.Pool`, optional
632 If not `None`, a process pool with which to parallelize some
633 operations.
634 processes : `int`, optional
635 The number of processes to use for conversion.
636 """
637 if pool is None and processes > 1:
638 pool = Pool(processes)
639 if calibs is None:
640 calibs = [CalibRepo(path=None)]
641 elif calibs and not self.config.doExpandDataIds:
642 raise ValueError("Cannot convert calib repos with config.doExpandDataIds=False.")
643 if visits is not None:
644 subset = ConversionSubset(instrument=self.instrument.getName(), visits=frozenset(visits))
645 else:
646 if self.config.relatedOnly:
647 self.log.warning(
648 "config.relatedOnly is True but all visits are being ingested; "
649 "no filtering will be done."
650 )
651 subset = None
652 if not self.config.doExpandDataIds and self.butler3.datastore.needs_expanded_data_ids(
653 self.config.transfer
654 ):
655 self.log.warning(
656 "config.doExpandDataIds=False but datastore reports that expanded data IDs may be needed.",
657 self.config.transfer,
658 )
660 # Check that at most one CalibRepo is marked as default, to fail before
661 # we actually write anything.
662 defaultCalibRepos = [c.path for c in calibs if c.default]
663 if len(defaultCalibRepos) > 1:
664 raise ValueError(f"Multiple calib repos marked as default: {defaultCalibRepos}.")
666 # Make converters for all Gen2 repos.
667 converters = []
668 # Start with the root repo, which must always be given even if we are
669 # not configured to convert anything from it.
670 rootConverter = RootRepoConverter(task=self, root=root, subset=subset, instrument=self.instrument)
671 converters.append(rootConverter)
672 # Calibration repos are next.
673 for spec in calibs:
674 calibRoot = spec.path
675 if calibRoot is not None:
676 if not os.path.isabs(calibRoot):
677 calibRoot = os.path.join(rootConverter.root, calibRoot)
678 converter = CalibRepoConverter(
679 task=self,
680 root=calibRoot,
681 labels=spec.labels,
682 instrument=self.instrument,
683 mapper=rootConverter.mapper,
684 subset=rootConverter.subset,
685 )
686 converters.append(converter)
687 # CalibRepo entries that don't have a path are just there for
688 # curated calibs and maybe to set up a collection pointer; that's
689 # handled further down (after we've done everything we can that
690 # doesn't involve actually writing to the output Gen3 repo).
691 # And now reruns.
692 rerunConverters = {}
693 for spec in reruns:
694 runRoot = spec.path
695 if not os.path.isabs(runRoot):
696 runRoot = os.path.join(rootConverter.root, runRoot)
697 spec.guessCollectionNames(self.instrument, rootConverter.root)
698 converter = StandardRepoConverter(
699 task=self,
700 root=runRoot,
701 run=spec.runName,
702 instrument=self.instrument,
703 subset=rootConverter.subset,
704 )
705 converters.append(converter)
706 rerunConverters[spec.runName] = converter
708 # Walk Gen2 repos to find datasets to convert.
709 for converter in converters:
710 converter.prep()
712 # Register the instrument if we're configured to do so.
713 if self.config.doRegisterInstrument and not self.dry_run:
714 self.instrument.register(self.registry)
716 # Run raw ingest (does nothing if we weren't configured to convert the
717 # 'raw' dataset type).
718 rootConverter.runRawIngest(pool=pool)
720 # Write curated calibrations to all calibration collections where they
721 # were requested (which may be implicit, by passing calibs=None). Also
722 # set up a CHAINED collection that points to the default CALIBRATION
723 # collection if one is needed.
724 if not self.dry_run:
725 for spec in calibs:
726 if spec.curated:
727 self.instrument.writeCuratedCalibrations(self.butler3, labels=spec.labels)
728 if spec.default and spec.labels:
729 # This is guaranteed to be True at most once in the loop by
730 # logic at the top of this method.
731 defaultCalibName = self.instrument.makeCalibrationCollectionName()
732 self.butler3.registry.registerCollection(defaultCalibName, CollectionType.CHAINED)
733 recommendedCalibName = self.instrument.makeCalibrationCollectionName(*spec.labels)
734 self.butler3.registry.registerCollection(recommendedCalibName, CollectionType.CALIBRATION)
735 self.butler3.registry.setCollectionChain(defaultCalibName, [recommendedCalibName])
737 # Define visits (also does nothing if we weren't configurd to convert
738 # the 'raw' dataset type).
739 rootConverter.runDefineVisits()
741 # Insert dimensions that are potentially shared by all Gen2
742 # repositories (and are hence managed directly by the Task, rather
743 # than a converter instance).
744 # This also finishes setting up the (shared) converter.subsets object
745 # that is used to filter data IDs for config.relatedOnly.
746 self.registerUsedSkyMaps(rootConverter.subset)
747 self.registerUsedSkyPix(rootConverter.subset)
749 # Look for datasets, generally by scanning the filesystem.
750 # This requires dimensions to have already been inserted so we can use
751 # dimension information to identify related datasets.
752 for converter in converters:
753 converter.findDatasets()
755 # Expand data IDs.
756 if self.config.doExpandDataIds:
757 for converter in converters:
758 converter.expandDataIds()
760 if self.dry_run:
761 return
763 # Actually ingest datasets.
764 for converter in converters:
765 converter.ingest()
767 # Perform any post-ingest processing.
768 for converter in converters:
769 converter.finish()
771 # Make the umbrella collection, if desired.
772 if self.config.doMakeUmbrellaCollection:
773 umbrella = self.instrument.makeUmbrellaCollectionName()
774 self.registry.registerCollection(umbrella, CollectionType.CHAINED)
775 children = list(self.registry.getCollectionChain(umbrella))
776 children.extend(rootConverter.getCollectionChain())
777 children.append(self.instrument.makeCalibrationCollectionName())
778 if BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME not in children:
779 # Ensure the umbrella collection includes the global skymap
780 # collection, even if it's currently empty.
781 self.registry.registerRun(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
782 children.append(BaseSkyMap.SKYMAP_RUN_COLLECTION_NAME)
783 children.extend(self.config.extraUmbrellaChildren)
784 self.log.info("Defining %s from chain %s.", umbrella, children)
785 self.registry.setCollectionChain(umbrella, children)
787 # Add chained collections for reruns.
788 for spec in reruns:
789 if spec.chainName is not None:
790 self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
791 chain = [spec.runName]
792 chain.extend(rerunConverters[spec.runName].getCollectionChain())
793 for parent in spec.parents:
794 chain.append(parent)
795 parentConverter = rerunConverters.get(parent)
796 if parentConverter is not None:
797 chain.extend(parentConverter.getCollectionChain())
798 chain.extend(rootConverter.getCollectionChain())
799 if len(calibs) == 1:
800 # Exactly one calibration repo being converted, so it's
801 # safe-ish to assume that's the one the rerun used.
802 chain.append(self.instrument.makeCalibrationCollectionName(*calibs[0].labels))
803 self.log.info("Defining %s from chain %s.", spec.chainName, chain)
804 self.butler3.registry.setCollectionChain(spec.chainName, chain, flatten=True)