Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 16%
120 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"]
26import logging
27import warnings
28from abc import abstractmethod
29from collections.abc import Callable, Collection, Mapping
30from typing import TYPE_CHECKING, Any, cast
31from uuid import UUID
33import astropy.time
34from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse
35from lsst.utils.introspection import find_outside_stacklevel
37from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig
38from ._spatial import RegionTypeError, RegionTypeWarning
40if TYPE_CHECKING:
41 from lsst.sphgeom import Region
43 from ._schema import ObsCoreSchema
44 from ._spatial import SpatialObsCorePlugin
46if TYPE_CHECKING:
47 from ..queries import SqlQueryContext
49_LOG = logging.getLogger(__name__)
51# Map extra column type to a conversion method that takes string.
52_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 52 ↛ exitline 52 didn't jump to the function exit
53 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input.
54 ExtraColumnType.int: int,
55 ExtraColumnType.float: float,
56 ExtraColumnType.string: str,
57}
60class ExposureRegionFactory:
61 """Abstract interface for a class that returns a Region for an exposure."""
63 @abstractmethod
64 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Region | None:
65 """Return a region for a given DataId that corresponds to an exposure.
67 Parameters
68 ----------
69 dataId : `DataCoordinate`
70 Data ID for an exposure dataset.
71 context : `SqlQueryContext`
72 Context used to execute queries for additional dimension metadata.
74 Returns
75 -------
76 region : `Region`
77 `None` is returned if region cannot be determined.
78 """
79 raise NotImplementedError()
82Record = dict[str, Any]
85class RecordFactory:
86 """Class that implements conversion of dataset information to ObsCore.
88 Parameters
89 ----------
90 config : `ObsCoreConfig`
91 Complete configuration specifying conversion options.
92 schema : `ObsCoreSchema`
93 Description of obscore schema.
94 universe : `DimensionUniverse`
95 Registry dimensions universe.
96 exposure_region_factory: `ExposureRegionFactory`, optional
97 Manager for Registry dimensions.
98 """
100 def __init__(
101 self,
102 config: ObsCoreConfig,
103 schema: ObsCoreSchema,
104 universe: DimensionUniverse,
105 spatial_plugins: Collection[SpatialObsCorePlugin],
106 exposure_region_factory: ExposureRegionFactory | None = None,
107 ):
108 self.config = config
109 self.schema = schema
110 self.universe = universe
111 self.exposure_region_factory = exposure_region_factory
112 self.spatial_plugins = spatial_plugins
114 # All dimension elements used below.
115 self.band = cast(Dimension, universe["band"])
116 self.exposure = universe["exposure"]
117 self.visit = universe["visit"]
118 self.physical_filter = cast(Dimension, universe["physical_filter"])
120 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Record | None:
121 """Make an ObsCore record from a dataset.
123 Parameters
124 ----------
125 ref : `DatasetRef`
126 Dataset ref, its DataId must be in expanded form.
127 context : `SqlQueryContext`
128 Context used to execute queries for additional dimension metadata.
130 Returns
131 -------
132 record : `dict` [ `str`, `Any` ] or `None`
133 ObsCore record represented as a dictionary. `None` is returned if
134 dataset does not need to be stored in the obscore table, e.g. when
135 dataset type is not in obscore configuration.
137 Notes
138 -----
139 This method filters records by dataset type and returns `None` if
140 reference dataset type is not configured. It does not check reference
141 run name against configured collections, all runs are acceptable by
142 this method.
143 """
144 # Quick check for dataset type.
145 dataset_type_name = ref.datasetType.name
146 dataset_config = self.config.dataset_types.get(dataset_type_name)
147 if dataset_config is None:
148 return None
150 dataId = ref.dataId
151 # _LOG.debug("New record, dataId=%s", dataId.full)
152 # _LOG.debug("New record, records=%s", dataId.records)
154 record: dict[str, str | int | float | UUID | None]
156 # We need all columns filled, to simplify logic below just pre-fill
157 # everything with None.
158 record = {field.name: None for field in self.schema.table_spec.fields}
160 record["dataproduct_type"] = dataset_config.dataproduct_type
161 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype
162 record["o_ucd"] = dataset_config.o_ucd
163 record["facility_name"] = self.config.facility_name
164 record["calib_level"] = dataset_config.calib_level
165 if dataset_config.obs_collection is not None:
166 record["obs_collection"] = dataset_config.obs_collection
167 else:
168 record["obs_collection"] = self.config.obs_collection
169 record["access_format"] = dataset_config.access_format
171 record["instrument_name"] = dataId.get("instrument")
172 if self.schema.dataset_fk is not None:
173 record[self.schema.dataset_fk.name] = ref.id
175 timespan = dataId.timespan
176 if timespan is not None:
177 if timespan.begin is not None:
178 t_min = cast(astropy.time.Time, timespan.begin)
179 record["t_min"] = t_min.mjd
180 if timespan.end is not None:
181 t_max = cast(astropy.time.Time, timespan.end)
182 record["t_max"] = t_max.mjd
184 region = dataId.region
185 if self.exposure in dataId:
186 if (dimension_record := dataId.records[self.exposure]) is not None:
187 self._exposure_records(dimension_record, record)
188 if self.exposure_region_factory is not None:
189 region = self.exposure_region_factory.exposure_region(dataId, context)
190 elif self.visit in dataId and (dimension_record := dataId.records[self.visit]) is not None:
191 self._visit_records(dimension_record, record)
193 # ask each plugin for its values to add to a record.
194 try:
195 plugin_records = self.make_spatial_records(region)
196 except RegionTypeError as exc:
197 warnings.warn(
198 f"Failed to convert region for obscore dataset {ref.id}: {exc}",
199 category=RegionTypeWarning,
200 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
201 )
202 else:
203 record.update(plugin_records)
205 if self.band in dataId:
206 em_range = None
207 if (label := dataId.get(self.physical_filter)) is not None:
208 em_range = self.config.spectral_ranges.get(label)
209 if not em_range:
210 band_name = dataId[self.band]
211 assert isinstance(band_name, str), "Band name must be string"
212 em_range = self.config.spectral_ranges.get(band_name)
213 if em_range:
214 record["em_min"], record["em_max"] = em_range
215 else:
216 _LOG.warning("could not find spectral range for dataId=%s", dataId.full)
217 record["em_filter_name"] = dataId["band"]
219 # Dictionary to use for substitutions when formatting various
220 # strings.
221 fmt_kws: dict[str, Any] = dict(records=dataId.records)
222 fmt_kws.update(dataId.full.byName())
223 fmt_kws.update(id=ref.id)
224 fmt_kws.update(run=ref.run)
225 fmt_kws.update(dataset_type=dataset_type_name)
226 fmt_kws.update(record)
227 if dataset_config.obs_id_fmt:
228 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws)
229 fmt_kws["obs_id"] = record["obs_id"]
231 if dataset_config.datalink_url_fmt:
232 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws)
234 # add extra columns
235 extra_columns = {}
236 if self.config.extra_columns:
237 extra_columns.update(self.config.extra_columns)
238 if dataset_config.extra_columns:
239 extra_columns.update(dataset_config.extra_columns)
240 for key, column_value in extra_columns.items():
241 # Try to expand the template with known keys, if expansion
242 # fails due to a missing key name then store None.
243 if isinstance(column_value, ExtraColumnConfig):
244 try:
245 value = column_value.template.format(**fmt_kws)
246 record[key] = _TYPE_CONVERSION[column_value.type](value)
247 except KeyError:
248 pass
249 else:
250 # Just a static value.
251 record[key] = column_value
253 return record
255 def make_spatial_records(self, region: Region | None) -> Record:
256 """Make spatial records for a given region.
258 Parameters
259 ----------
260 region : `~lsst.sphgeom.Region` or `None`
261 Spacial region to convert to record.
263 Return
264 ------
265 record : `dict`
266 Record items.
268 Raises
269 ------
270 RegionTypeError
271 Raised if type of the region is not supported.
272 """
273 record = Record()
274 # ask each plugin for its values to add to a record.
275 for plugin in self.spatial_plugins:
276 plugin_record = plugin.make_records(region)
277 if plugin_record is not None:
278 record.update(plugin_record)
279 return record
281 def _exposure_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None:
282 """Extract all needed info from a visit dimension record."""
283 record["t_exptime"] = dimension_record.exposure_time
284 record["target_name"] = dimension_record.target_name
286 def _visit_records(self, dimension_record: DimensionRecord, record: dict[str, Any]) -> None:
287 """Extract all needed info from an exposure dimension record."""
288 record["t_exptime"] = dimension_record.exposure_time
289 record["target_name"] = dimension_record.target_name