Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 16%
120 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"]
32import logging
33import warnings
34from abc import abstractmethod
35from collections.abc import Callable, Collection, Mapping
36from typing import TYPE_CHECKING, Any, cast
37from uuid import UUID
39import astropy.time
40from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse
41from lsst.utils.introspection import find_outside_stacklevel
43from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig
44from ._spatial import RegionTypeError, RegionTypeWarning
46if TYPE_CHECKING:
47 from lsst.sphgeom import Region
49 from ._schema import ObsCoreSchema
50 from ._spatial import SpatialObsCorePlugin
52if TYPE_CHECKING:
53 from ..queries import SqlQueryContext
55_LOG = logging.getLogger(__name__)
57# Map extra column type to a conversion method that takes string.
58_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 58 ↛ exitline 58 didn't jump to the function exit
59 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input.
60 ExtraColumnType.int: int,
61 ExtraColumnType.float: float,
62 ExtraColumnType.string: str,
63}
66class ExposureRegionFactory:
67 """Abstract interface for a class that returns a Region for an exposure."""
69 @abstractmethod
70 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Region | None:
71 """Return a region for a given DataId that corresponds to an exposure.
73 Parameters
74 ----------
75 dataId : `DataCoordinate`
76 Data ID for an exposure dataset.
77 context : `SqlQueryContext`
78 Context used to execute queries for additional dimension metadata.
80 Returns
81 -------
82 region : `Region`
83 `None` is returned if region cannot be determined.
84 """
85 raise NotImplementedError()
88Record = dict[str, Any]
91class RecordFactory:
92 """Class that implements conversion of dataset information to ObsCore.
94 Parameters
95 ----------
96 config : `ObsCoreConfig`
97 Complete configuration specifying conversion options.
98 schema : `ObsCoreSchema`
99 Description of obscore schema.
100 universe : `DimensionUniverse`
101 Registry dimensions universe.
102 exposure_region_factory: `ExposureRegionFactory`, optional
103 Manager for Registry dimensions.
104 """
106 def __init__(
107 self,
108 config: ObsCoreConfig,
109 schema: ObsCoreSchema,
110 universe: DimensionUniverse,
111 spatial_plugins: Collection[SpatialObsCorePlugin],
112 exposure_region_factory: ExposureRegionFactory | None = None,
113 ):
114 self.config = config
115 self.schema = schema
116 self.universe = universe
117 self.exposure_region_factory = exposure_region_factory
118 self.spatial_plugins = spatial_plugins
120 # All dimension elements used below.
121 self.band = cast(Dimension, universe["band"])
122 self.exposure = universe["exposure"]
123 self.visit = universe["visit"]
124 self.physical_filter = cast(Dimension, universe["physical_filter"])
126 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None:
127 """Make an ObsCore record from a dataset.
129 Parameters
130 ----------
131 ref : `DatasetRef`
132 Dataset ref, its DataId must be in expanded form.
133 context : `SqlQueryContext`
134 Context used to execute queries for additional dimension metadata.
136 Returns
137 -------
138 record : `dict` [ `str`, `Any` ] or `None`
139 ObsCore record represented as a dictionary. `None` is returned if
140 dataset does not need to be stored in the obscore table, e.g. when
141 dataset type is not in obscore configuration.
143 Notes
144 -----
145 This method filters records by dataset type and returns `None` if
146 reference dataset type is not configured. It does not check reference
147 run name against configured collections, all runs are acceptable by
148 this method.
149 """
150 # Quick check for dataset type.
151 dataset_type_name = ref.datasetType.name
152 dataset_config = self.config.dataset_types.get(dataset_type_name)
153 if dataset_config is None:
154 return None
156 dataId = ref.dataId
157 # _LOG.debug("New record, dataId=%s", dataId.full)
158 # _LOG.debug("New record, records=%s", dataId.records)
160 record: dict[str, str | int | float | UUID | None]
162 # We need all columns filled, to simplify logic below just pre-fill
163 # everything with None.
164 record = {field.name: None for field in self.schema.table_spec.fields}
166 record["dataproduct_type"] = dataset_config.dataproduct_type
167 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype
168 record["o_ucd"] = dataset_config.o_ucd
169 record["facility_name"] = self.config.facility_name
170 record["calib_level"] = dataset_config.calib_level
171 if dataset_config.obs_collection is not None:
172 record["obs_collection"] = dataset_config.obs_collection
173 else:
174 record["obs_collection"] = self.config.obs_collection
175 record["access_format"] = dataset_config.access_format
177 record["instrument_name"] = dataId.get("instrument")
178 if self.schema.dataset_fk is not None:
179 record[self.schema.dataset_fk.name] = ref.id
181 timespan = dataId.timespan
182 if timespan is not None:
183 if timespan.begin is not None:
184 t_min = cast(astropy.time.Time, timespan.begin)
185 record["t_min"] = t_min.mjd
186 if timespan.end is not None:
187 t_max = cast(astropy.time.Time, timespan.end)
188 record["t_max"] = t_max.mjd
190 region = dataId.region
191 if self.exposure.name in dataId:
192 if (dimension_record := dataId.records[self.exposure.name]) is not None:
193 self._exposure_records(dimension_record, record)
194 if self.exposure_region_factory is not None:
195 region = self.exposure_region_factory.exposure_region(dataId, context)
196 elif self.visit.name in dataId and (dimension_record := dataId.records[self.visit.name]) is not None:
197 self._visit_records(dimension_record, record)
199 # ask each plugin for its values to add to a record.
200 try:
201 plugin_records = self.make_spatial_records(region)
202 except RegionTypeError as exc:
203 warnings.warn(
204 f"Failed to convert region for obscore dataset {ref.id}: {exc}",
205 category=RegionTypeWarning,
206 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
207 )
208 else:
209 record.update(plugin_records)
211 if self.band.name in dataId:
212 em_range = None
213 if (label := dataId.get(self.physical_filter.name)) is not None:
214 em_range = self.config.spectral_ranges.get(cast(str, label))
215 if not em_range:
216 band_name = dataId[self.band.name]
217 assert isinstance(band_name, str), "Band name must be string"
218 em_range = self.config.spectral_ranges.get(band_name)
219 if em_range:
220 record["em_min"], record["em_max"] = em_range
221 else:
222 _LOG.warning("could not find spectral range for dataId=%s", dataId.full)
223 record["em_filter_name"] = dataId["band"]
225 # Dictionary to use for substitutions when formatting various
226 # strings.
227 fmt_kws: dict[str, Any] = dict(records=dataId.records)
228 fmt_kws.update(dataId.mapping)
229 fmt_kws.update(id=ref.id)
230 fmt_kws.update(run=ref.run)
231 fmt_kws.update(dataset_type=dataset_type_name)
232 fmt_kws.update(record)
233 if dataset_config.obs_id_fmt:
234 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws)
235 fmt_kws["obs_id"] = record["obs_id"]
237 if dataset_config.datalink_url_fmt:
238 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws)
240 # add extra columns
241 extra_columns = {}
242 if self.config.extra_columns:
243 extra_columns.update(self.config.extra_columns)
244 if dataset_config.extra_columns:
245 extra_columns.update(dataset_config.extra_columns)
246 for key, column_value in extra_columns.items():
247 # Try to expand the template with known keys, if expansion
248 # fails due to a missing key name then store None.
249 if isinstance(column_value, ExtraColumnConfig):
250 try:
251 value = column_value.template.format(**fmt_kws)
252 record[key] = _TYPE_CONVERSION[column_value.type](value)
253 except KeyError:
254 pass
255 else:
256 # Just a static value.
257 record[key] = column_value
259 return record
261 def make_spatial_records(self, region: Region | None) -> Record:
262 """Make spatial records for a given region.
264 Parameters
265 ----------
266 region : `~lsst.sphgeom.Region` or `None`
267 Spacial region to convert to record.
269 Return
270 ------
271 record : `dict`
272 Record items.
274 Raises
275 ------
276 RegionTypeError
277 Raised if type of the region is not supported.
278 """
279 record = Record()
280 # ask each plugin for its values to add to a record.
281 for plugin in self.spatial_plugins:
282 plugin_record = plugin.make_records(region)
283 if plugin_record is not None:
284 record.update(plugin_record)
285 return record
287 def _exposure_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None:
288 """Extract all needed info from a visit dimension record."""
289 record["t_exptime"] = dimension_record.exposure_time
290 record["target_name"] = dimension_record.target_name
292 def _visit_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None:
293 """Extract all needed info from an exposure dimension record."""
294 record["t_exptime"] = dimension_record.exposure_time
295 record["target_name"] = dimension_record.target_name