Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 15%
118 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-26 02:04 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-26 02:04 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"]
26import logging
27from abc import abstractmethod
28from collections.abc import Collection, Mapping
29from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, cast
30from uuid import UUID
32import astropy.time
33from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse
35from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig
37if TYPE_CHECKING: 37 ↛ 38line 37 didn't jump to line 38, because the condition on line 37 was never true
38 from lsst.sphgeom import Region
40 from ._schema import ObsCoreSchema
41 from ._spatial import SpatialObsCorePlugin
43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 from ..queries import SqlQueryContext
46_LOG = logging.getLogger(__name__)
48# Map extra column type to a conversion method that takes string.
49_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 49 ↛ exitline 49 didn't jump to the function exit
50 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input.
51 ExtraColumnType.int: int,
52 ExtraColumnType.float: float,
53 ExtraColumnType.string: str,
54}
57class ExposureRegionFactory:
58 """Abstract interface for a class that returns a Region for an exposure."""
60 @abstractmethod
61 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Optional[Region]:
62 """Return a region for a given DataId that corresponds to an exposure.
64 Parameters
65 ----------
66 dataId : `DataCoordinate`
67 Data ID for an exposure dataset.
68 context : `SqlQueryContext`
69 Context used to execute queries for additional dimension metadata.
71 Returns
72 -------
73 region : `Region`
74 `None` is returned if region cannot be determined.
75 """
76 raise NotImplementedError()
79Record = Dict[str, Any]
82class RecordFactory:
83 """Class that implements conversion of dataset information to ObsCore.
85 Parameters
86 ----------
87 config : `ObsCoreConfig`
88 Complete configuration specifying conversion options.
89 schema : `ObsCoreSchema`
90 Description of obscore schema.
91 universe : `DimensionUniverse`
92 Registry dimensions universe.
93 exposure_region_factory: `ExposureRegionFactory`, optional
94 Manager for Registry dimensions.
95 """
97 def __init__(
98 self,
99 config: ObsCoreConfig,
100 schema: ObsCoreSchema,
101 universe: DimensionUniverse,
102 spatial_plugins: Collection[SpatialObsCorePlugin],
103 exposure_region_factory: Optional[ExposureRegionFactory] = None,
104 ):
105 self.config = config
106 self.schema = schema
107 self.universe = universe
108 self.exposure_region_factory = exposure_region_factory
109 self.spatial_plugins = spatial_plugins
111 # All dimension elements used below.
112 self.band = cast(Dimension, universe["band"])
113 self.exposure = universe["exposure"]
114 self.visit = universe["visit"]
115 self.physical_filter = cast(Dimension, universe["physical_filter"])
117 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Optional[Record]:
118 """Make an ObsCore record from a dataset.
120 Parameters
121 ----------
122 ref : `DatasetRef`
123 Dataset ref, its DataId must be in expanded form.
124 context : `SqlQueryContext`
125 Context used to execute queries for additional dimension metadata.
127 Returns
128 -------
129 record : `dict` [ `str`, `Any` ] or `None`
130 ObsCore record represented as a dictionary. `None` is returned if
131 dataset does not need to be stored in the obscore table, e.g. when
132 dataset type is not in obscore configuration.
134 Notes
135 -----
136 This method filters records by dataset type and returns `None` if
137 reference dataset type is not configured. It does not check reference
138 run name against configured collections, all runs are acceptable by
139 this method.
140 """
141 # Quick check for dataset type.
142 dataset_type_name = ref.datasetType.name
143 dataset_config = self.config.dataset_types.get(dataset_type_name)
144 if dataset_config is None:
145 return None
147 dataId = ref.dataId
148 # _LOG.debug("New record, dataId=%s", dataId.full)
149 # _LOG.debug("New record, records=%s", dataId.records)
151 record: Dict[str, str | int | float | UUID | None]
153 # We need all columns filled, to simplify logic below just pre-fill
154 # everything with None.
155 record = {field.name: None for field in self.schema.table_spec.fields}
157 record["dataproduct_type"] = dataset_config.dataproduct_type
158 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype
159 record["o_ucd"] = dataset_config.o_ucd
160 record["facility_name"] = self.config.facility_name
161 record["calib_level"] = dataset_config.calib_level
162 if dataset_config.obs_collection is not None:
163 record["obs_collection"] = dataset_config.obs_collection
164 else:
165 record["obs_collection"] = self.config.obs_collection
166 record["access_format"] = dataset_config.access_format
168 record["instrument_name"] = dataId.get("instrument")
169 if self.schema.dataset_fk is not None:
170 record[self.schema.dataset_fk.name] = ref.id
172 timespan = dataId.timespan
173 if timespan is not None:
174 if timespan.begin is not None:
175 t_min = cast(astropy.time.Time, timespan.begin)
176 record["t_min"] = t_min.mjd
177 if timespan.end is not None:
178 t_max = cast(astropy.time.Time, timespan.end)
179 record["t_max"] = t_max.mjd
181 region = dataId.region
182 if self.exposure in dataId:
183 if (dimension_record := dataId.records[self.exposure]) is not None:
184 self._exposure_records(dimension_record, record)
185 if self.exposure_region_factory is not None:
186 region = self.exposure_region_factory.exposure_region(dataId, context)
187 elif self.visit in dataId:
188 if (dimension_record := dataId.records[self.visit]) is not None:
189 self._visit_records(dimension_record, record)
191 # ask each plugin for its values to add to a record.
192 for plugin in self.spatial_plugins:
193 assert ref.id is not None, "Dataset ID must be defined"
194 plugin_record = plugin.make_records(ref.id, region)
195 if plugin_record is not None:
196 record.update(plugin_record)
198 if self.band in dataId:
199 em_range = None
200 if (label := dataId.get(self.physical_filter)) is not None:
201 em_range = self.config.spectral_ranges.get(label)
202 if not em_range:
203 band_name = dataId[self.band]
204 assert isinstance(band_name, str), "Band name must be string"
205 em_range = self.config.spectral_ranges.get(band_name)
206 if em_range:
207 record["em_min"], record["em_max"] = em_range
208 else:
209 _LOG.warning("could not find spectral range for dataId=%s", dataId.full)
210 record["em_filter_name"] = dataId["band"]
212 # Dictionary to use for substitutions when formatting various
213 # strings.
214 fmt_kws: Dict[str, Any] = dict(records=dataId.records)
215 fmt_kws.update(dataId.full.byName())
216 fmt_kws.update(id=ref.id)
217 fmt_kws.update(run=ref.run)
218 fmt_kws.update(dataset_type=dataset_type_name)
219 fmt_kws.update(record)
220 if dataset_config.obs_id_fmt:
221 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws)
222 fmt_kws["obs_id"] = record["obs_id"]
224 if dataset_config.datalink_url_fmt:
225 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws)
227 # add extra columns
228 extra_columns = {}
229 if self.config.extra_columns:
230 extra_columns.update(self.config.extra_columns)
231 if dataset_config.extra_columns:
232 extra_columns.update(dataset_config.extra_columns)
233 for key, column_value in extra_columns.items():
234 # Try to expand the template with known keys, if expansion
235 # fails due to a missing key name then store None.
236 if isinstance(column_value, ExtraColumnConfig):
237 try:
238 value = column_value.template.format(**fmt_kws)
239 record[key] = _TYPE_CONVERSION[column_value.type](value)
240 except KeyError:
241 pass
242 else:
243 # Just a static value.
244 record[key] = column_value
246 return record
248 def _exposure_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None:
249 """Extract all needed info from a visit dimension record."""
250 record["t_exptime"] = dimension_record.exposure_time
251 record["target_name"] = dimension_record.target_name
253 def _visit_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None:
254 """Extract all needed info from an exposure dimension record."""
255 record["t_exptime"] = dimension_record.exposure_time
256 record["target_name"] = dimension_record.target_name