lsst.obs.base  20.0.0-73-gf477d90+2054c0bfca
rootRepoConverter.py
Go to the documentation of this file.
1 # This file is part of obs_base.
2 #
3 # Developed for the LSST Data Management System.
4 # This product includes software developed by the LSST Project
5 # (https://www.lsst.org).
6 # See the COPYRIGHT file at the top-level directory of this distribution
7 # for details of code ownership.
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 from __future__ import annotations
22 
23 __all__ = ["RootRepoConverter"]
24 
25 import os
26 import re
27 import itertools
28 from typing import TYPE_CHECKING, Iterator, Optional, Tuple, List
29 
30 from lsst.skymap import BaseSkyMap
31 from lsst.daf.butler import DatasetType, DatasetRef, DimensionGraph, FileDataset
32 from .standardRepoConverter import StandardRepoConverter
33 
34 SKYMAP_DATASET_TYPES = {
35  coaddName: f"{coaddName}Coadd_skyMap" for coaddName in ("deep", "goodSeeing", "dcr")
36 }
37 
38 if TYPE_CHECKING:
39  from lsst.daf.butler import SkyPixDimension
40 
41 
42 def getDataPaths(dataRefs):
43  """Strip HDU identifiers from paths and return a unique set of paths.
44 
45  Parameters
46  ----------
47  dataRefs : `lsst.daf.persistence.ButlerDataRef`
48  The gen2 datarefs to strip "[HDU]" values from.
49 
50  Returns
51  -------
52  paths : `set` [`str`]
53  The unique file paths without appended "[HDU]".
54  """
55  paths = set()
56  for dataRef in dataRefs:
57  path = dataRef.getUri()
58  # handle with FITS files with multiple HDUs (e.g. decam raw)
59  paths.add(path.split('[')[0])
60  return paths
61 
62 
64  """A specialization of `RepoConverter` for root data repositories.
65 
66  `RootRepoConverter` adds support for raw images (mostly delegated to the
67  parent task's `RawIngestTask` subtask) and reference catalogs.
68 
69  Parameters
70  ----------
71  kwds
72  Keyword arguments are forwarded to (and required by) `RepoConverter`.
73  """
74 
75  def __init__(self, **kwds):
76  super().__init__(run=None, **kwds)
77  self._refCats: List[Tuple[str, SkyPixDimension]] = []
78  if self.task.config.rootSkyMapName is not None:
79  self._rootSkyMap = self.task.config.skyMaps[self.task.config.rootSkyMapName].skyMap.apply()
80  else:
81  self._rootSkyMap = None # All access to _rootSkyMap is guarded
82  self._rawRefs = []
83 
84  def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
85  # Docstring inherited from RepoConverter.
86  return (
87  super().isDatasetTypeSpecial(datasetTypeName)
88  or datasetTypeName in ("raw", "ref_cat", "ref_cat_config")
89  # in Gen2, some of these are in the root repo, not a calib repo
90  or datasetTypeName in self.instrument.getCuratedCalibrationNames()
91  )
92 
93  def getSpecialDirectories(self) -> List[str]:
94  # Docstring inherited from RepoConverter.
95  return super().getSpecialDirectories() + ["CALIB", "ref_cats", "rerun"]
96 
97  def findMatchingSkyMap(self, datasetTypeName: str) -> Tuple[Optional[BaseSkyMap], Optional[str]]:
98  # Docstring inherited from StandardRepoConverter.findMatchingSkyMap.
99  skyMap, name = super().findMatchingSkyMap(datasetTypeName)
100  if skyMap is None and self.task.config.rootSkyMapName is not None:
101  self.task.log.debug(
102  ("Assuming configured root skymap with name '%s' for dataset %s."),
103  self.task.config.rootSkyMapName, datasetTypeName
104  )
105  skyMap = self._rootSkyMap
106  name = self.task.config.rootSkyMapName
107  return skyMap, name
108 
109  def runRawIngest(self, pool=None):
110  if self.task.raws is None:
111  return
112  self.task.log.info(f"Finding raws in root {self.root}.")
113  if self.subset is not None:
114  dataRefs = itertools.chain.from_iterable(
115  self.butler2.subset(self.task.config.rawDatasetType,
116  visit=visit) for visit in self.subset.visits
117  )
118  else:
119  dataRefs = self.butler2.subset(self.task.config.rawDatasetType)
120  dataPaths = getDataPaths(dataRefs)
121  self.task.log.info("Ingesting raws from root %s into run %s.", self.root, self.task.raws.butler.run)
122  self._rawRefs.extend(self.task.raws.run(dataPaths, pool=pool))
123  self._chain = [self.task.raws.butler.run]
124 
125  def runDefineVisits(self, pool=None):
126  if self.task.defineVisits is None:
127  return
128  dimensions = DimensionGraph(self.task.universe, names=["exposure"])
129  exposureDataIds = set(ref.dataId.subset(dimensions) for ref in self._rawRefs)
130  self.task.log.info("Defining visits from exposures.")
131  self.task.defineVisits.run(exposureDataIds, pool=pool)
132 
133  def prep(self):
134  # Docstring inherited from RepoConverter.
135  # Gather information about reference catalogs.
136  if self.task.isDatasetTypeIncluded("ref_cat") and len(self.task.config.refCats) != 0:
137  from lsst.meas.algorithms import DatasetConfig as RefCatDatasetConfig
138  for refCat in os.listdir(os.path.join(self.root, "ref_cats")):
139  path = os.path.join(self.root, "ref_cats", refCat)
140  configFile = os.path.join(path, "config.py")
141  if not os.path.exists(configFile):
142  continue
143  if refCat not in self.task.config.refCats:
144  continue
145  self.task.log.info(f"Preparing ref_cat {refCat} from root {self.root}.")
146  onDiskConfig = RefCatDatasetConfig()
147  onDiskConfig.load(configFile)
148  if onDiskConfig.indexer.name != "HTM":
149  raise ValueError(f"Reference catalog '{refCat}' uses unsupported "
150  f"pixelization '{onDiskConfig.indexer.name}'.")
151  level = onDiskConfig.indexer["HTM"].depth
152  try:
153  dimension = self.task.universe[f"htm{level}"]
154  except KeyError as err:
155  raise ValueError(f"Reference catalog {refCat} uses HTM level {level}, but no htm{level} "
156  f"skypix dimension is configured for this registry.") from err
157  self.task.useSkyPix(dimension)
158  self._refCats.append((refCat, dimension))
159  if self.task.isDatasetTypeIncluded("brightObjectMask") and self.task.config.rootSkyMapName:
160  self.task.useSkyMap(self._rootSkyMap, self.task.config.rootSkyMapName)
161  super().prep()
162 
163  def iterDatasets(self) -> Iterator[FileDataset]:
164  # Docstring inherited from RepoConverter.
165  # Iterate over reference catalog files.
166  for refCat, dimension in self._refCats:
167  datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe,
168  storageClass="SimpleCatalog")
169  if self.subset is None:
170  regex = re.compile(r"(\d+)\.fits")
171  for fileName in os.listdir(os.path.join(self.root, "ref_cats", refCat)):
172  m = regex.match(fileName)
173  if m is not None:
174  htmId = int(m.group(1))
175  dataId = self.task.registry.expandDataId({dimension: htmId})
176  yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, fileName),
177  refs=DatasetRef(datasetType, dataId))
178  else:
179  for begin, end in self.subset.skypix[dimension]:
180  for htmId in range(begin, end):
181  dataId = self.task.registry.expandDataId({dimension: htmId})
182  yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, f"{htmId}.fits"),
183  refs=DatasetRef(datasetType, dataId))
184  yield from super().iterDatasets()
lsst.obs.base.gen2to3.repoConverter.RepoConverter.instrument
instrument
Definition: repoConverter.py:212
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter._rootSkyMap
_rootSkyMap
Definition: rootRepoConverter.py:79
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.runDefineVisits
def runDefineVisits(self, pool=None)
Definition: rootRepoConverter.py:125
lsst.obs.base.gen2to3.repoConverter.RepoConverter.root
root
Definition: repoConverter.py:211
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.iterDatasets
Iterator[FileDataset] iterDatasets(self)
Definition: rootRepoConverter.py:163
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter._chain
_chain
Definition: rootRepoConverter.py:123
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.findMatchingSkyMap
Tuple[Optional[BaseSkyMap], Optional[str]] findMatchingSkyMap(self, str datasetTypeName)
Definition: rootRepoConverter.py:97
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.__init__
def __init__(self, **kwds)
Definition: rootRepoConverter.py:75
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter
Definition: rootRepoConverter.py:63
lsst.obs.base.gen2to3.repoConverter.RepoConverter.task
task
Definition: repoConverter.py:210
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.getSpecialDirectories
List[str] getSpecialDirectories(self)
Definition: rootRepoConverter.py:93
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.prep
def prep(self)
Definition: rootRepoConverter.py:133
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter._rawRefs
_rawRefs
Definition: rootRepoConverter.py:82
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.runRawIngest
def runRawIngest(self, pool=None)
Definition: rootRepoConverter.py:109
lsst.obs.base.gen2to3.standardRepoConverter.StandardRepoConverter
Definition: standardRepoConverter.py:77
lsst.obs.base.gen2to3.rootRepoConverter.RootRepoConverter.isDatasetTypeSpecial
bool isDatasetTypeSpecial(self, str datasetTypeName)
Definition: rootRepoConverter.py:84
lsst.obs.base.gen2to3.repoConverter.RepoConverter.subset
subset
Definition: repoConverter.py:213
lsst.obs.base.gen2to3.rootRepoConverter.getDataPaths
def getDataPaths(dataRefs)
Definition: rootRepoConverter.py:42
lsst.obs.base.gen2to3.standardRepoConverter.StandardRepoConverter.butler2
butler2
Definition: standardRepoConverter.py:91