lsst.obs.base  19.0.0-21-gaaa92db+8
rootRepoConverter.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import annotations
22 
23 __all__ = ["RootRepoConverter"]
24 
25 import os
26 import re
27 import itertools
28 from typing import TYPE_CHECKING, Iterator, Optional, Tuple, List
29 
30 from lsst.skymap import BaseSkyMap
31 from lsst.daf.butler import DatasetType, DatasetRef, FileDataset
32 from .calibRepoConverter import CURATED_CALIBRATION_DATASET_TYPES
33 from .standardRepoConverter import StandardRepoConverter
34 
35 SKYMAP_DATASET_TYPES = {
36  coaddName: f"{coaddName}Coadd_skyMap" for coaddName in ("deep", "goodSeeing", "dcr")
37 }
38 
39 if TYPE_CHECKING:
40  from lsst.daf.butler import SkyPixDimension
41  from ..ingest import RawExposureData
42 
43 
45  """A specialization of `RepoConverter` for root data repositories.
46 
47  `RootRepoConverter` adds support for raw images (mostly delegated to the
48  parent task's `RawIngestTask` subtask) and reference catalogs.
49 
50  Parameters
51  ----------
52  kwds
53  Keyword arguments are forwarded to (and required by) `RepoConverter`.
54  """
55 
56  def __init__(self, **kwds):
57  super().__init__(**kwds)
58  self._exposureData: List[RawExposureData] = []
59  self._refCats: List[Tuple[str, SkyPixDimension]] = []
60  if self.task.config.rootSkyMapName is not None:
61  self._rootSkyMap = self.task.config.skyMaps[self.task.config.rootSkyMapName].skyMap.apply()
62  else:
63  self._rootSkyMap = None
64 
65  def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
66  # Docstring inherited from RepoConverter.
67  return (
68  super().isDatasetTypeSpecial(datasetTypeName)
69  or datasetTypeName in ("raw", "ref_cat", "ref_cat_config")
70  # in Gen2, some of these are in the root repo, not a calib repo
71  or datasetTypeName in CURATED_CALIBRATION_DATASET_TYPES
72  )
73 
74  def getSpecialDirectories(self) -> List[str]:
75  # Docstring inherited from RepoConverter.
76  return super().getSpecialDirectories() + ["CALIB", "ref_cats", "rerun"]
77 
78  def findMatchingSkyMap(self, datasetTypeName: str) -> Tuple[Optional[BaseSkyMap], Optional[str]]:
79  # Docstring inherited from StandardRepoConverter.findMatchingSkyMap.
80  skyMap, name = super().findMatchingSkyMap(datasetTypeName)
81  if skyMap is None and self.task.config.rootSkyMapName is not None:
82  self.task.log.debug(
83  ("Assuming configured root skymap with name '%s' for dataset %s."),
84  self.task.config.rootSkyMapName, datasetTypeName
85  )
86  skyMap = self._rootSkyMap
87  name = self.task.config.rootSkyMapName
88  return skyMap, name
89 
90  def prep(self):
91  # Docstring inherited from RepoConverter.
92  # Gather information about raws.
93  if self.task.raws is not None:
94  self.task.log.info(f"Preparing raws from root {self.root}.")
95  if self.subset is not None:
96  dataRefs = itertools.chain.from_iterable(
97  self.butler2.subset("raw", visit=visit) for visit in self.subset.visits
98  )
99  else:
100  dataRefs = self.butler2.subset("raw")
101  self._exposureData.extend(self.task.raws.prep(dataRef.getUri() for dataRef in dataRefs))
102  # Gather information about reference catalogs.
103  if self.task.isDatasetTypeIncluded("ref_cat"):
104  from lsst.meas.algorithms import DatasetConfig as RefCatDatasetConfig
105  for refCat in os.listdir(os.path.join(self.root, "ref_cats")):
106  path = os.path.join(self.root, "ref_cats", refCat)
107  configFile = os.path.join(path, "config.py")
108  if not os.path.exists(configFile):
109  continue
110  if refCat not in self.task.config.refCats:
111  continue
112  self.task.log.info(f"Preparing ref_cat {refCat} from root {self.root}.")
113  onDiskConfig = RefCatDatasetConfig()
114  onDiskConfig.load(configFile)
115  if onDiskConfig.indexer.name != "HTM":
116  raise ValueError(f"Reference catalog '{refCat}' uses unsupported "
117  f"pixelization '{onDiskConfig.indexer.name}'.")
118  level = onDiskConfig.indexer["HTM"].depth
119  try:
120  dimension = self.task.universe[f"htm{level}"]
121  except KeyError as err:
122  raise ValueError(f"Reference catalog {refCat} uses HTM level {level}, but no htm{level} "
123  f"skypix dimension is configured for this registry.") from err
124  self.task.useSkyPix(dimension)
125  self._refCats.append((refCat, dimension))
126  if self.task.isDatasetTypeIncluded("brightObjectMask") and self.task.config.rootSkyMapName:
127  self.task.useSkyMap(self._rootSkyMap)
128  super().prep()
129 
131  # Docstring inherited from RepoConverter.
132  self.task.log.info(f"Inserting observation dimension records from {self.root}.")
133  records = {"visit": [], "exposure": [], "visit_detector_region": []}
134  for exposure in self._exposureData:
135  for dimension, recordsForDimension in exposure.records.items():
136  records[dimension].extend(recordsForDimension)
137  self.task.raws.insertDimensionData(records)
138 
139  def iterDatasets(self) -> Iterator[FileDataset]:
140  # Docstring inherited from RepoConverter.
141  # Iterate over reference catalog files.
142  for refCat, dimension in self._refCats:
143  datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe,
144  storageClass="SimpleCatalog")
145  if self.subset is None:
146  regex = re.compile(r"(\d+)\.fits")
147  for fileName in os.listdir(os.path.join(self.root, "ref_cats", refCat)):
148  m = regex.match(fileName)
149  if m is not None:
150  htmId = int(m.group(1))
151  dataId = self.task.registry.expandDataId({dimension: htmId})
152  yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, fileName),
153  refs=DatasetRef(datasetType, dataId))
154  else:
155  for begin, end in self.subset.skypix[dimension]:
156  for htmId in range(begin, end):
157  dataId = self.task.registry.expandDataId({dimension: htmId})
158  yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, f"{htmId}.fits"),
159  refs=DatasetRef(datasetType, dataId))
160  yield from super().iterDatasets()
161 
162  def ingest(self):
163  # Docstring inherited from RepoConverter.
164  if self.task.raws is not None:
165  self.task.log.info(f"Ingesting raws from root {self.root}.")
166  self.task.registry.registerDatasetType(self.task.raws.datasetType)
167  # We need te delegate to RawIngestTask to actually ingest raws,
168  # rather than just including those datasets in iterDatasets for
169  # the base class to handle, because we don't want to assume we
170  # can use the Datastore-configured Formatter for raw data.
171  refs = []
172  collections = self.getCollections("raw")
173  for exposure in self._exposureData:
174  refs.extend(self.task.raws.ingestExposureDatasets(exposure))
175  for collection in collections[1:]:
176  self.task.registry.associate(collection, refs)
177  super().ingest()