Coverage for python/lsst/daf/butler/registry/obscore/_records.py: 15%
120 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 02:34 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 02:34 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ExposureRegionFactory", "Record", "RecordFactory"]
26import logging
27import warnings
28from abc import abstractmethod
29from collections.abc import Collection, Mapping
30from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, cast
31from uuid import UUID
33import astropy.time
34from lsst.daf.butler import DataCoordinate, DatasetRef, Dimension, DimensionRecord, DimensionUniverse
36from ._config import ExtraColumnConfig, ExtraColumnType, ObsCoreConfig
37from ._spatial import RegionTypeError, RegionTypeWarning
39if TYPE_CHECKING:
40 from lsst.sphgeom import Region
42 from ._schema import ObsCoreSchema
43 from ._spatial import SpatialObsCorePlugin
45if TYPE_CHECKING:
46 from ..queries import SqlQueryContext
48_LOG = logging.getLogger(__name__)
50# Map extra column type to a conversion method that takes string.
51_TYPE_CONVERSION: Mapping[str, Callable[[str], Any]] = { 51 ↛ exitline 51 didn't jump to the function exit
52 ExtraColumnType.bool: lambda x: bool(int(x)), # expect integer number/string as input.
53 ExtraColumnType.int: int,
54 ExtraColumnType.float: float,
55 ExtraColumnType.string: str,
56}
59class ExposureRegionFactory:
60 """Abstract interface for a class that returns a Region for an exposure."""
62 @abstractmethod
63 def exposure_region(self, dataId: DataCoordinate, context: SqlQueryContext) -> Optional[Region]:
64 """Return a region for a given DataId that corresponds to an exposure.
66 Parameters
67 ----------
68 dataId : `DataCoordinate`
69 Data ID for an exposure dataset.
70 context : `SqlQueryContext`
71 Context used to execute queries for additional dimension metadata.
73 Returns
74 -------
75 region : `Region`
76 `None` is returned if region cannot be determined.
77 """
78 raise NotImplementedError()
81Record = dict[str, Any]
84class RecordFactory:
85 """Class that implements conversion of dataset information to ObsCore.
87 Parameters
88 ----------
89 config : `ObsCoreConfig`
90 Complete configuration specifying conversion options.
91 schema : `ObsCoreSchema`
92 Description of obscore schema.
93 universe : `DimensionUniverse`
94 Registry dimensions universe.
95 exposure_region_factory: `ExposureRegionFactory`, optional
96 Manager for Registry dimensions.
97 """
99 def __init__(
100 self,
101 config: ObsCoreConfig,
102 schema: ObsCoreSchema,
103 universe: DimensionUniverse,
104 spatial_plugins: Collection[SpatialObsCorePlugin],
105 exposure_region_factory: Optional[ExposureRegionFactory] = None,
106 ):
107 self.config = config
108 self.schema = schema
109 self.universe = universe
110 self.exposure_region_factory = exposure_region_factory
111 self.spatial_plugins = spatial_plugins
113 # All dimension elements used below.
114 self.band = cast(Dimension, universe["band"])
115 self.exposure = universe["exposure"]
116 self.visit = universe["visit"]
117 self.physical_filter = cast(Dimension, universe["physical_filter"])
119 def __call__(self, ref: DatasetRef, context: SqlQueryContext) -> Optional[Record]:
120 """Make an ObsCore record from a dataset.
122 Parameters
123 ----------
124 ref : `DatasetRef`
125 Dataset ref, its DataId must be in expanded form.
126 context : `SqlQueryContext`
127 Context used to execute queries for additional dimension metadata.
129 Returns
130 -------
131 record : `dict` [ `str`, `Any` ] or `None`
132 ObsCore record represented as a dictionary. `None` is returned if
133 dataset does not need to be stored in the obscore table, e.g. when
134 dataset type is not in obscore configuration.
136 Notes
137 -----
138 This method filters records by dataset type and returns `None` if
139 reference dataset type is not configured. It does not check reference
140 run name against configured collections, all runs are acceptable by
141 this method.
142 """
143 # Quick check for dataset type.
144 dataset_type_name = ref.datasetType.name
145 dataset_config = self.config.dataset_types.get(dataset_type_name)
146 if dataset_config is None:
147 return None
149 dataId = ref.dataId
150 # _LOG.debug("New record, dataId=%s", dataId.full)
151 # _LOG.debug("New record, records=%s", dataId.records)
153 record: dict[str, str | int | float | UUID | None]
155 # We need all columns filled, to simplify logic below just pre-fill
156 # everything with None.
157 record = {field.name: None for field in self.schema.table_spec.fields}
159 record["dataproduct_type"] = dataset_config.dataproduct_type
160 record["dataproduct_subtype"] = dataset_config.dataproduct_subtype
161 record["o_ucd"] = dataset_config.o_ucd
162 record["facility_name"] = self.config.facility_name
163 record["calib_level"] = dataset_config.calib_level
164 if dataset_config.obs_collection is not None:
165 record["obs_collection"] = dataset_config.obs_collection
166 else:
167 record["obs_collection"] = self.config.obs_collection
168 record["access_format"] = dataset_config.access_format
170 record["instrument_name"] = dataId.get("instrument")
171 if self.schema.dataset_fk is not None:
172 record[self.schema.dataset_fk.name] = ref.id
174 timespan = dataId.timespan
175 if timespan is not None:
176 if timespan.begin is not None:
177 t_min = cast(astropy.time.Time, timespan.begin)
178 record["t_min"] = t_min.mjd
179 if timespan.end is not None:
180 t_max = cast(astropy.time.Time, timespan.end)
181 record["t_max"] = t_max.mjd
183 region = dataId.region
184 if self.exposure in dataId:
185 if (dimension_record := dataId.records[self.exposure]) is not None:
186 self._exposure_records(dimension_record, record)
187 if self.exposure_region_factory is not None:
188 region = self.exposure_region_factory.exposure_region(dataId, context)
189 elif self.visit in dataId:
190 if (dimension_record := dataId.records[self.visit]) is not None:
191 self._visit_records(dimension_record, record)
193 # ask each plugin for its values to add to a record.
194 try:
195 plugin_records = self.make_spatial_records(region)
196 except RegionTypeError as exc:
197 warnings.warn(
198 f"Failed to convert region for obscore dataset {ref.id}: {exc}",
199 category=RegionTypeWarning,
200 )
201 else:
202 record.update(plugin_records)
204 if self.band in dataId:
205 em_range = None
206 if (label := dataId.get(self.physical_filter)) is not None:
207 em_range = self.config.spectral_ranges.get(label)
208 if not em_range:
209 band_name = dataId[self.band]
210 assert isinstance(band_name, str), "Band name must be string"
211 em_range = self.config.spectral_ranges.get(band_name)
212 if em_range:
213 record["em_min"], record["em_max"] = em_range
214 else:
215 _LOG.warning("could not find spectral range for dataId=%s", dataId.full)
216 record["em_filter_name"] = dataId["band"]
218 # Dictionary to use for substitutions when formatting various
219 # strings.
220 fmt_kws: Dict[str, Any] = dict(records=dataId.records)
221 fmt_kws.update(dataId.full.byName())
222 fmt_kws.update(id=ref.id)
223 fmt_kws.update(run=ref.run)
224 fmt_kws.update(dataset_type=dataset_type_name)
225 fmt_kws.update(record)
226 if dataset_config.obs_id_fmt:
227 record["obs_id"] = dataset_config.obs_id_fmt.format(**fmt_kws)
228 fmt_kws["obs_id"] = record["obs_id"]
230 if dataset_config.datalink_url_fmt:
231 record["access_url"] = dataset_config.datalink_url_fmt.format(**fmt_kws)
233 # add extra columns
234 extra_columns = {}
235 if self.config.extra_columns:
236 extra_columns.update(self.config.extra_columns)
237 if dataset_config.extra_columns:
238 extra_columns.update(dataset_config.extra_columns)
239 for key, column_value in extra_columns.items():
240 # Try to expand the template with known keys, if expansion
241 # fails due to a missing key name then store None.
242 if isinstance(column_value, ExtraColumnConfig):
243 try:
244 value = column_value.template.format(**fmt_kws)
245 record[key] = _TYPE_CONVERSION[column_value.type](value)
246 except KeyError:
247 pass
248 else:
249 # Just a static value.
250 record[key] = column_value
252 return record
254 def make_spatial_records(self, region: Region | None) -> Record:
255 """Make spatial records for a given region.
257 Parameters
258 ----------
259 region : `~lsst.sphgeom.Region` or `None`
260 Spacial region to convert to record.
262 Return
263 ------
264 record : `dict`
265 Record items.
267 Raises
268 ------
269 RegionTypeError
270 Raised if type of the region is not supported.
271 """
272 record = Record()
273 # ask each plugin for its values to add to a record.
274 for plugin in self.spatial_plugins:
275 plugin_record = plugin.make_records(region)
276 if plugin_record is not None:
277 record.update(plugin_record)
278 return record
280 def _exposure_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None:
281 """Extract all needed info from a visit dimension record."""
282 record["t_exptime"] = dimension_record.exposure_time
283 record["target_name"] = dimension_record.target_name
285 def _visit_records(self, dimension_record: DimensionRecord, record: Dict[str, Any]) -> None:
286 """Extract all needed info from an exposure dimension record."""
287 record["t_exptime"] = dimension_record.exposure_time
288 record["target_name"] = dimension_record.target_name