Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 16%
120 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 10:14 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 10:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"]
32import logging
33import warnings
34from abc import abstractmethod
35from collections.abc import Callable, Collection, Mapping
36from typing import TYPE_CHECKING, Any, cast
37from uuid import UUID
39import astropy.time
40from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse
41from lsst.utils.introspection import find_outside_stacklevel
43from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig
44from ._spatial import RegionTypeError, RegionTypeWarning
46if TYPE_CHECKING:
47 from lsst.sphgeom import Region
49 from ._schema import ObsCoreSchema
50 from ._spatial import SpatialObsCorePlugin
52if TYPE_CHECKING:
53 from ..queries import SqlQueryContext
55_LOG = logging.getLogger(__name__)
57# Map extra column type to a conversion method that takes string.
58_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 58 ↛ exitline 58 didn't jump to the function exit
59 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input.
60 ExtraColumnType.int: int,
61 ExtraColumnType.float: float,
62 ExtraColumnType.string: str,
63}
66class ExposureRegionFactory:
67 """Abstract interface for a class that returns a Region for an exposure."""
69 @abstractmethod
70 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Region | None:
71 """Return a region for a given DataId that corresponds to an exposure.
73 Parameters
74 ----------
75 dataId : `DataCoordinate`
76 Data ID for an exposure dataset.
77 context : `SqlQueryContext`
78 Context used to execute queries for additional dimension metadata.
80 Returns
81 -------
82 region : `Region`
83 `None` is returned if region cannot be determined.
84 """
85 raise NotImplementedError()
88Record = dict[str, Any]
91class RecordFactory:
92 """Class that implements conversion of dataset information to ObsCore.
94 Parameters
95 ----------
96 config : `ObsCoreConfig`
97 Complete configuration specifying conversion options.
98 schema : `ObsCoreSchema`
99 Description of obscore schema.
100 universe : `DimensionUniverse`
101 Registry dimensions universe.
102 spatial_plugins : `~collections.abc.Collection` of `SpatialObsCorePlugin`
103 Spatial plugins.
104 exposure_region_factory : `ExposureRegionFactory`, optional
105 Manager for Registry dimensions.
106 """
108 def __init__(
109 self,
110 config: ObsCoreConfig,
111 schema: ObsCoreSchema,
112 universe: DimensionUniverse,
113 spatial_plugins: Collection[SpatialObsCorePlugin],
114 exposure_region_factory: ExposureRegionFactory | None = None,
115 ):
116 self.config = config
117 self.schema = schema
118 self.universe = universe
119 self.exposure_region_factory = exposure_region_factory
120 self.spatial_plugins = spatial_plugins
122 # All dimension elements used below.
123 self.band = cast(Dimension, universe["band"])
124 self.exposure = universe["exposure"]
125 self.visit = universe["visit"]
126 self.physical_filter = cast(Dimension, universe["physical_filter"])
128 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None:
129 """Make an ObsCore record from a dataset.
131 Parameters
132 ----------
133 ref : `DatasetRef`
134 Dataset ref, its DataId must be in expanded form.
135 context : `SqlQueryContext`
136 Context used to execute queries for additional dimension metadata.
138 Returns
139 -------
140 record : `dict` [ `str`, `Any` ] or `None`
141 ObsCore record represented as a dictionary. `None` is returned if
142 dataset does not need to be stored in the obscore table, e.g. when
143 dataset type is not in obscore configuration.
145 Notes
146 -----
147 This method filters records by dataset type and returns `None` if
148 reference dataset type is not configured. It does not check reference
149 run name against configured collections, all runs are acceptable by
150 this method.
151 """
152 # Quick check for dataset type.
153 dataset_type_name = ref.datasetType.name
154 dataset_config = self.config.dataset_types.get(dataset_type_name)
155 if dataset_config is None:
156 return None
158 dataId = ref.dataId
159 # _LOG.debug("New record, dataId=%s", dataId.full)
160 # _LOG.debug("New record, records=%s", dataId.records)
162 record: dict[str, str | int | float | UUID | None]
164 # We need all columns filled, to simplify logic below just pre-fill
165 # everything with None.
166 record = {field.name: None for field in self.schema.table_spec.fields}
168 record["dataproduct_type"] = dataset_config.dataproduct_type
169 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype
170 record["o_ucd"] = dataset_config.o_ucd
171 record["facility_name"] = self.config.facility_name
172 record["calib_level"] = dataset_config.calib_level
173 if dataset_config.obs_collection is not None:
174 record["obs_collection"] = dataset_config.obs_collection
175 else:
176 record["obs_collection"] = self.config.obs_collection
177 record["access_format"] = dataset_config.access_format
179 record["instrument_name"] = dataId.get("instrument")
180 if self.schema.dataset_fk is not None:
181 record[self.schema.dataset_fk.name] = ref.id
183 timespan = dataId.timespan
184 if timespan is not None:
185 if timespan.begin is not None:
186 t_min = cast(astropy.time.Time, timespan.begin)
187 record["t_min"] = t_min.mjd
188 if timespan.end is not None:
189 t_max = cast(astropy.time.Time, timespan.end)
190 record["t_max"] = t_max.mjd
192 region = dataId.region
193 if self.exposure.name in dataId:
194 if (dimension_record := dataId.records[self.exposure.name]) is not None:
195 self._exposure_records(dimension_record, record)
196 if self.exposure_region_factory is not None:
197 region = self.exposure_region_factory.exposure_region(dataId, context)
198 elif self.visit.name in dataId and (dimension_record := dataId.records[self.visit.name]) is not None:
199 self._visit_records(dimension_record, record)
201 # ask each plugin for its values to add to a record.
202 try:
203 plugin_records = self.make_spatial_records(region)
204 except RegionTypeError as exc:
205 warnings.warn(
206 f"Failed to convert region for obscore dataset {ref.id}: {exc}",
207 category=RegionTypeWarning,
208 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
209 )
210 else:
211 record.update(plugin_records)
213 if self.band.name in dataId:
214 em_range = None
215 if (label := dataId.get(self.physical_filter.name)) is not None:
216 em_range = self.config.spectral_ranges.get(cast(str, label))
217 if not em_range:
218 band_name = dataId[self.band.name]
219 assert isinstance(band_name, str), "Band name must be string"
220 em_range = self.config.spectral_ranges.get(band_name)
221 if em_range:
222 record["em_min"], record["em_max"] = em_range
223 else:
224 _LOG.warning("could not find spectral range for dataId=%s", dataId.full)
225 record["em_filter_name"] = dataId["band"]
227 # Dictionary to use for substitutions when formatting various
228 # strings.
229 fmt_kws: dict[str, Any] = dict(records=dataId.records)
230 fmt_kws.update(dataId.mapping)
231 fmt_kws.update(id=ref.id)
232 fmt_kws.update(run=ref.run)
233 fmt_kws.update(dataset_type=dataset_type_name)
234 fmt_kws.update(record)
235 if dataset_config.obs_id_fmt:
236 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws)
237 fmt_kws["obs_id"] = record["obs_id"]
239 if dataset_config.datalink_url_fmt:
240 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws)
242 # add extra columns
243 extra_columns = {}
244 if self.config.extra_columns:
245 extra_columns.update(self.config.extra_columns)
246 if dataset_config.extra_columns:
247 extra_columns.update(dataset_config.extra_columns)
248 for key, column_value in extra_columns.items():
249 # Try to expand the template with known keys, if expansion
250 # fails due to a missing key name then store None.
251 if isinstance(column_value, ExtraColumnConfig):
252 try:
253 value = column_value.template.format(**fmt_kws)
254 record[key] = _TYPE_CONVERSION[column_value.type](value)
255 except KeyError:
256 pass
257 else:
258 # Just a static value.
259 record[key] = column_value
261 return record
263 def make_spatial_records(self, region: Region | None) -> Record:
264 """Make spatial records for a given region.
266 Parameters
267 ----------
268 region : `~lsst.sphgeom.Region` or `None`
269 Spacial region to convert to record.
271 Returns
272 -------
273 record : `dict`
274 Record items.
276 Raises
277 ------
278 RegionTypeError
279 Raised if type of the region is not supported.
280 """
281 record = Record()
282 # ask each plugin for its values to add to a record.
283 for plugin in self.spatial_plugins:
284 plugin_record = plugin.make_records(region)
285 if plugin_record is not None:
286 record.update(plugin_record)
287 return record
289 def _exposure_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None:
290 """Extract all needed info from a visit dimension record."""
291 record["t_exptime"] = dimension_record.exposure_time
292 record["target_name"] = dimension_record.target_name
294 def _visit_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None:
295 """Extract all needed info from an exposure dimension record."""
296 record["t_exptime"] = dimension_record.exposure_time
297 record["target_name"] = dimension_record.target_name