Coverage for python/lsst/daf/butler/registry/obscore/_schema.py: 27%
59 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ["ObsCoreSchema"]
32import re
33from collections.abc import Sequence
34from typing import TYPE_CHECKING
36import sqlalchemy
37from lsst.daf.butler import ddl
38from lsst.utils.iteration import ensure_iterable
40from ._config import DatasetTypeConfig, ExtraColumnConfig, ObsCoreConfig
41from ._spatial import SpatialObsCorePlugin
43if TYPE_CHECKING:
44 from ..interfaces import DatasetRecordStorageManager
47# Regular expression to match templates in extra_columns that specify simple
48# dimensions, e.g. "{exposure}".
49_DIMENSION_TEMPLATE_RE = re.compile(r"^[{](\w+)[}]$")
51# List of standard columns in output file. This should include at least all
52# mandatory columns defined in ObsCore note (revision 1.1, Appendix B). Extra
53# columns can be added via `extra_columns` parameters in configuration.
54_STATIC_COLUMNS = (
55 ddl.FieldSpec(
56 name="dataproduct_type", dtype=sqlalchemy.String, length=255, doc="Logical data product type"
57 ),
58 ddl.FieldSpec(
59 name="dataproduct_subtype", dtype=sqlalchemy.String, length=255, doc="Data product specific type"
60 ),
61 ddl.FieldSpec(
62 name="facility_name",
63 dtype=sqlalchemy.String,
64 length=255,
65 doc="The name of the facility used for the observation",
66 ),
67 ddl.FieldSpec(name="calib_level", dtype=sqlalchemy.SmallInteger, doc="Calibration level {0, 1, 2, 3, 4}"),
68 ddl.FieldSpec(name="target_name", dtype=sqlalchemy.String, length=255, doc="Object of interest"),
69 ddl.FieldSpec(name="obs_id", dtype=sqlalchemy.String, length=255, doc="Observation ID"),
70 ddl.FieldSpec(
71 name="obs_collection", dtype=sqlalchemy.String, length=255, doc="Name of the data collection"
72 ),
73 ddl.FieldSpec(
74 name="obs_publisher_did",
75 dtype=sqlalchemy.String,
76 length=255,
77 doc="Dataset identifier given by the publisher",
78 ),
79 ddl.FieldSpec(
80 name="access_url", dtype=sqlalchemy.String, length=65535, doc="URL used to access (download) dataset"
81 ),
82 ddl.FieldSpec(name="access_format", dtype=sqlalchemy.String, length=255, doc="File content format"),
83 # Spatial columns s_ra, s_dec, s_fow, s_region are managed by a default
84 # spatial plugin
85 ddl.FieldSpec(
86 name="s_resolution", dtype=sqlalchemy.Float, doc="Spatial resolution of data as FWHM (arcsec)"
87 ),
88 ddl.FieldSpec(
89 name="s_xel1", dtype=sqlalchemy.Integer, doc="Number of elements along the first spatial axis"
90 ),
91 ddl.FieldSpec(
92 name="s_xel2", dtype=sqlalchemy.Integer, doc="Number of elements along the second spatial axis"
93 ),
94 ddl.FieldSpec(name="t_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the time axis"),
95 ddl.FieldSpec(name="t_min", dtype=sqlalchemy.Float, doc="Start time in MJD"),
96 ddl.FieldSpec(name="t_max", dtype=sqlalchemy.Float, doc="Stop time in MJD"),
97 ddl.FieldSpec(name="t_exptime", dtype=sqlalchemy.Float, doc="Total exposure time (sec)"),
98 ddl.FieldSpec(name="t_resolution", dtype=sqlalchemy.Float, doc="Temporal resolution (sec)"),
99 ddl.FieldSpec(name="em_xel", dtype=sqlalchemy.Integer, doc="Number of elements along the spectral axis"),
100 ddl.FieldSpec(name="em_min", dtype=sqlalchemy.Float, doc="Start in spectral coordinates (m)"),
101 ddl.FieldSpec(name="em_max", dtype=sqlalchemy.Float, doc="Stop in spectral coordinates (m)"),
102 ddl.FieldSpec(name="em_res_power", dtype=sqlalchemy.Float, doc="Spectral resolving power"),
103 ddl.FieldSpec(
104 name="em_filter_name", dtype=sqlalchemy.String, length=255, doc="Filter name (non-standard column)"
105 ),
106 ddl.FieldSpec(name="o_ucd", dtype=sqlalchemy.String, length=255, doc="UCD of observable"),
107 ddl.FieldSpec(name="pol_xel", dtype=sqlalchemy.Integer, doc="Number of polarization samples"),
108 ddl.FieldSpec(
109 name="instrument_name",
110 dtype=sqlalchemy.String,
111 length=255,
112 doc="Name of the instrument used for this observation",
113 ),
114)
116_TYPE_MAP = {
117 int: sqlalchemy.BigInteger,
118 float: sqlalchemy.Float,
119 bool: sqlalchemy.Boolean,
120 str: sqlalchemy.String,
121}
124class ObsCoreSchema:
125 """Generate table specification for an ObsCore table based on its
126 configuration.
128 Parameters
129 ----------
130 config : `ObsCoreConfig`
131 ObsCore configuration instance.
132 datasets : `type`, optional
133 Type of dataset records manager. If specified, the ObsCore table will
134 define a foreign key to ``datasets`` table with "ON DELETE CASCADE"
135 constraint.
137 Notes
138 -----
139 This class is designed to support both "live" obscore table which is
140 located in the same database as the Registry, and standalone table in a
141 completely separate database. Live obscore table depends on foreign key
142 constraints with "ON DELETE CASCADE" option to manage lifetime of obscore
143 records when their original datasets are removed.
144 """
146 def __init__(
147 self,
148 config: ObsCoreConfig,
149 spatial_plugins: Sequence[SpatialObsCorePlugin],
150 datasets: type[DatasetRecordStorageManager] | None = None,
151 ):
152 self._dimension_columns: dict[str, str] = {"instrument": "instrument_name"}
154 fields = list(_STATIC_COLUMNS)
156 column_names = {col.name for col in fields}
158 all_configs: list[ObsCoreConfig | DatasetTypeConfig] = [config]
159 if config.dataset_types:
160 all_configs += list(config.dataset_types.values())
161 for cfg in all_configs:
162 if cfg.extra_columns:
163 for col_name, col_value in cfg.extra_columns.items():
164 if col_name in column_names:
165 continue
166 doc: str | None = None
167 if isinstance(col_value, ExtraColumnConfig):
168 col_type = ddl.VALID_CONFIG_COLUMN_TYPES.get(col_value.type.name)
169 col_length = col_value.length
170 doc = col_value.doc
171 # For columns that store dimensions remember their
172 # column names.
173 if match := _DIMENSION_TEMPLATE_RE.match(col_value.template):
174 dimension = match.group(1)
175 self._dimension_columns[dimension] = col_name
176 else:
177 # Only value is provided, guess type from Python, and
178 # use a fixed length of 255 for strings.
179 col_type = _TYPE_MAP.get(type(col_value))
180 col_length = 255 if isinstance(col_value, str) else None
181 if col_type is None:
182 raise TypeError(
183 f"Unexpected type in extra_columns: column={col_name}, value={col_value}"
184 )
185 fields.append(ddl.FieldSpec(name=col_name, dtype=col_type, length=col_length, doc=doc))
186 column_names.add(col_name)
188 indices: list[ddl.IndexSpec] = []
189 if config.indices:
190 for columns in config.indices.values():
191 indices.append(ddl.IndexSpec(*ensure_iterable(columns)))
193 self._table_spec = ddl.TableSpec(fields=fields, indexes=indices)
195 # Possibly extend table specs with plugin-added stuff.
196 for plugin in spatial_plugins:
197 plugin.extend_table_spec(self._table_spec)
199 self._dataset_fk: ddl.FieldSpec | None = None
200 if datasets is not None:
201 # Add FK to datasets, is also a PK for this table
202 self._dataset_fk = datasets.addDatasetForeignKey(
203 self._table_spec, name="registry_dataset", onDelete="CASCADE", doc="Registry dataset ID"
204 )
205 self._dataset_fk.primaryKey = True
207 @property
208 def table_spec(self) -> ddl.TableSpec:
209 """Specification for obscore table (`ddl.TableSpec`)."""
210 return self._table_spec
212 @property
213 def dataset_fk(self) -> ddl.FieldSpec | None:
214 """Specification for the field which is a foreign key to ``datasets``
215 table, and also a primary key for obscore table (`ddl.FieldSpec` or
216 `None`).
217 """
218 return self._dataset_fk
220 def dimension_column(self, dimension: str) -> str | None:
221 """Return column name for a given dimension.
223 Parameters
224 ----------
225 dimension : `str`
226 Dimension name, e.g. "exposure".
228 Returns
229 -------
230 column_name : `str` or `None`
231 Name of the column in obscore table or `None` if there is no
232 configured column for this dimension.
233 """
234 return self._dimension_columns.get(dimension)