Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 14%
116 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-17 02:01 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-17 02:01 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"]
26import logging
27from abc import abstractmethod
28from collections.abc import Collection, Mapping
29from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, cast
30from uuid import UUID
32import astropy.time
33from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse
35from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig
37if TYPE_CHECKING: 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true
38 from lsst.sphgeom import Region
40 from ._schema import ObsCoreSchema
41 from ._spatial import SpatialObsCorePlugin
43_LOG = logging.getLogger(__name__)
45# Map extra column type to a conversion method that takes string.
46_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 46 ↛ exitline 46 didn't jump to the function exit
47 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input.
48 ExtraColumnType.int: int,
49 ExtraColumnType.float: float,
50 ExtraColumnType.string: str,
51}
54class ExposureRegionFactory:
55 """Abstract interface for a class that returns a Region for an exposure."""
57 @abstractmethod
58 def exposure_region(self, dataId: DataCoordinate) -> Optional[Region]:
59 """Return a region for a given DataId that corresponds to an exposure.
61 Parameters
62 ----------
63 dataId : `DataCoordinate`
64 Data ID for an exposure dataset.
66 Returns
67 -------
68 region : `Region`
69 `None` is returned if region cannot be determined.
70 """
71 raise NotImplementedError()
74Record = Dict[str, Any]
77class RecordFactory:
78 """Class that implements conversion of dataset information to ObsCore.
80 Parameters
81 ----------
82 config : `ObsCoreConfig`
83 Complete configuration specifying conversion options.
84 schema : `ObsCoreSchema`
85 Description of obscore schema.
86 universe : `DimensionUniverse`
87 Registry dimensions universe.
88 exposure_region_factory: `ExposureRegionFactory`, optional
89 Manager for Registry dimensions.
90 """
92 def __init__(
93 self,
94 config: ObsCoreConfig,
95 schema: ObsCoreSchema,
96 universe: DimensionUniverse,
97 spatial_plugins: Collection[SpatialObsCorePlugin],
98 exposure_region_factory: Optional[ExposureRegionFactory] = None,
99 ):
100 self.config = config
101 self.schema = schema
102 self.universe = universe
103 self.exposure_region_factory = exposure_region_factory
104 self.spatial_plugins = spatial_plugins
106 # All dimension elements used below.
107 self.band = cast(Dimension, universe["band"])
108 self.exposure = universe["exposure"]
109 self.visit = universe["visit"]
110 self.physical_filter = cast(Dimension, universe["physical_filter"])
112 def __call__(self, ref: DatasetRef) -> Optional[Record]:
113 """Make an ObsCore record from a dataset.
115 Parameters
116 ----------
117 ref : `DatasetRef`
118 Dataset ref, its DataId must be in expanded form.
120 Returns
121 -------
122 record : `dict` [ `str`, `Any` ] or `None`
123 ObsCore record represented as a dictionary. `None` is returned if
124 dataset does not need to be stored in the obscore table, e.g. when
125 dataset type is not in obscore configuration.
127 Notes
128 -----
129 This method filters records by dataset type and returns `None` if
130 reference dataset type is not configured. It does not check reference
131 run name against configured collections, all runs are acceptable by
132 this method.
133 """
134 # Quick check for dataset type.
135 dataset_type_name = ref.datasetType.name
136 dataset_config = self.config.dataset_types.get(dataset_type_name)
137 if dataset_config is None:
138 return None
140 dataId = ref.dataId
141 # _LOG.debug("New record, dataId=%s", dataId.full)
142 # _LOG.debug("New record, records=%s", dataId.records)
144 record: Dict[str, str | int | float | UUID | None]
146 # We need all columns filled, to simplify logic below just pre-fill
147 # everything with None.
148 record = {field.name: None for field in self.schema.table_spec.fields}
150 record["dataproduct_type"] = dataset_config.dataproduct_type
151 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype
152 record["o_ucd"] = dataset_config.o_ucd
153 record["facility_name"] = self.config.facility_name
154 record["calib_level"] = dataset_config.calib_level
155 if dataset_config.obs_collection is not None:
156 record["obs_collection"] = dataset_config.obs_collection
157 else:
158 record["obs_collection"] = self.config.obs_collection
159 record["access_format"] = dataset_config.access_format
161 record["instrument_name"] = dataId.get("instrument")
162 if self.schema.dataset_fk is not None:
163 record[self.schema.dataset_fk.name] = ref.id
165 timespan = dataId.timespan
166 if timespan is not None:
167 if timespan.begin is not None:
168 t_min = cast(astropy.time.Time, timespan.begin)
169 record["t_min"] = t_min.mjd
170 if timespan.end is not None:
171 t_max = cast(astropy.time.Time, timespan.end)
172 record["t_max"] = t_max.mjd
174 region = dataId.region
175 if self.exposure in dataId:
176 if (dimension_record := dataId.records[self.exposure]) is not None:
177 self._exposure_records(dimension_record, record)
178 if self.exposure_region_factory is not None:
179 region = self.exposure_region_factory.exposure_region(dataId)
180 elif self.visit in dataId:
181 if (dimension_record := dataId.records[self.visit]) is not None:
182 self._visit_records(dimension_record, record)
184 # ask each plugin for its values to add to a record.
185 for plugin in self.spatial_plugins:
186 assert ref.id is not None, "Dataset ID must be defined"
187 plugin_record = plugin.make_records(ref.id, region)
188 if plugin_record is not None:
189 record.update(plugin_record)
191 if self.band in dataId:
192 em_range = None
193 if (label := dataId.get(self.physical_filter)) is not None:
194 em_range = self.config.spectral_ranges.get(label)
195 if not em_range:
196 band_name = dataId[self.band]
197 assert isinstance(band_name, str), "Band name must be string"
198 em_range = self.config.spectral_ranges.get(band_name)
199 if em_range:
200 record["em_min"], record["em_max"] = em_range
201 else:
202 _LOG.warning("could not find spectral range for dataId=%s", dataId.full)
203 record["em_filter_name"] = dataId["band"]
205 # Dictionary to use for substitutions when formatting various
206 # strings.
207 fmt_kws: Dict[str, Any] = dict(records=dataId.records)
208 fmt_kws.update(dataId.full.byName())
209 fmt_kws.update(id=ref.id)
210 fmt_kws.update(run=ref.run)
211 fmt_kws.update(dataset_type=dataset_type_name)
212 fmt_kws.update(record)
213 if dataset_config.obs_id_fmt:
214 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws)
215 fmt_kws["obs_id"] = record["obs_id"]
217 if dataset_config.datalink_url_fmt:
218 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws)
220 # add extra columns
221 extra_columns = {}
222 if self.config.extra_columns:
223 extra_columns.update(self.config.extra_columns)
224 if dataset_config.extra_columns:
225 extra_columns.update(dataset_config.extra_columns)
226 for key, column_value in extra_columns.items():
227 # Try to expand the template with known keys, if expansion
228 # fails due to a missing key name then store None.
229 if isinstance(column_value, ExtraColumnConfig):
230 try:
231 value = column_value.template.format(**fmt_kws)
232 record[key] = _TYPE_CONVERSION[column_value.type](value)
233 except KeyError:
234 pass
235 else:
236 # Just a static value.
237 record[key] = column_value
239 return record
241 def _exposure_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None:
242 """Extract all needed info from a visit dimension record."""
243 record["t_exptime"] = dimension_record.exposure_time
244 record["target_name"] = dimension_record.target_name
246 def _visit_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None:
247 """Extract all needed info from an exposure dimension record."""
248 record["t_exptime"] = dimension_record.exposure_time
249 record["target_name"] = dimension_record.target_name