Coverage for python/lsst/daf/butler/queries/tree/_column_set.py: 25%
100 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:53 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:53 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ColumnSet",)
32from collections.abc import Iterable, Iterator, Mapping, Set
34from ... import column_spec
35from ...dimensions import DimensionGroup
36from ...nonempty_mapping import NonemptyMapping
39class ColumnSet:
40 """A set-like hierarchical container for the columns in a query.
42 Parameters
43 ----------
44 dimensions : `DimensionGroup`
45 The dimensions that bound the set of columns, and by default specify
46 the set of dimension key columns present.
48 Notes
49 -----
50 This class does not inherit from `collections.abc.Set` because that brings
51 in a lot of requirements we don't need (particularly interoperability with
52 other set-like objects).
54 This class is iterable over tuples of ``(logical_table, field)``, where
55 ``logical_table`` is a dimension element name or dataset type name, and
56 ``field`` is a column associated with one of those, or `None` for dimension
57 key columns. Iteration order is guaranteed to be deterministic and to
58 start with all included dimension keys in `DimensionGroup.dimension_
59 """
61 def __init__(self, dimensions: DimensionGroup) -> None:
62 self._dimensions = dimensions
63 self._removed_dimension_keys: set[str] = set()
64 self._dimension_fields: dict[str, set[str]] = {name: set() for name in dimensions.elements}
65 self._dataset_fields = NonemptyMapping[str, set[str]](set)
67 @property
68 def dimensions(self) -> DimensionGroup:
69 """The dimensions that bound all columns in the set."""
70 return self._dimensions
72 @property
73 def dimension_fields(self) -> Mapping[str, set[str]]:
74 """Dimension record fields included in the set, grouped by dimension
75 element name.
77 The keys of this mapping are always ``self.dimensions.elements``, and
78 nested sets may be empty.
79 """
80 return self._dimension_fields
82 @property
83 def dataset_fields(self) -> NonemptyMapping[str, set[str]]:
84 """Dataset fields included in the set, grouped by dataset type name.
86 The keys of this mapping are just those that actually have nonempty
87 nested sets.
88 """
89 return self._dataset_fields
91 def __bool__(self) -> bool:
92 return bool(self._dimensions) or any(self._dataset_fields.values())
94 def __eq__(self, other: object) -> bool:
95 if not isinstance(other, ColumnSet):
96 return False
97 return (
98 self._dimensions == other._dimensions
99 and self._removed_dimension_keys == other._removed_dimension_keys
100 and self._dimension_fields == other._dimension_fields
101 and self._dataset_fields == other._dataset_fields
102 )
104 def __str__(self) -> str:
105 return f"{{{', '.join(self.get_qualified_name(k, v) for k, v in self)}}}"
107 def issubset(self, other: ColumnSet) -> bool:
108 """Test whether all columns in this set are also in another.
110 Parameters
111 ----------
112 other : `ColumnSet`
113 Set of columns to compare to.
115 Returns
116 -------
117 issubset : `bool`
118 Whether all columns in ``self`` are also in ``other``.
119 """
120 return (
121 (self._get_dimension_keys() <= other._get_dimension_keys())
122 and all(
123 fields.issubset(other._dimension_fields.get(element_name, frozenset()))
124 for element_name, fields in self._dimension_fields.items()
125 )
126 and all(
127 fields.issubset(other._dataset_fields.get(dataset_type, frozenset()))
128 for dataset_type, fields in self._dataset_fields.items()
129 )
130 )
132 def issuperset(self, other: ColumnSet) -> bool:
133 """Test whether all columns another set are also in this one.
135 Parameters
136 ----------
137 other : `ColumnSet`
138 Set of columns to compare to.
140 Returns
141 -------
142 issuperset : `bool`
143 Whether all columns in ``other`` are also in ``self``.
144 """
145 return other.issubset(self)
147 def isdisjoint(self, other: ColumnSet) -> bool:
148 """Test whether there are no columns in both this set and another.
150 Parameters
151 ----------
152 other : `ColumnSet`
153 Set of columns to compare to.
155 Returns
156 -------
157 isdisjoint : `bool`
158 Whether there are any columns in both ``self`` and ``other``.
159 """
160 return (
161 self._get_dimension_keys().isdisjoint(other._get_dimension_keys())
162 and all(
163 fields.isdisjoint(other._dimension_fields.get(element, frozenset()))
164 for element, fields in self._dimension_fields.items()
165 )
166 and all(
167 fields.isdisjoint(other._dataset_fields.get(dataset_type, frozenset()))
168 for dataset_type, fields in self._dataset_fields.items()
169 )
170 )
172 def copy(self) -> ColumnSet:
173 """Return a copy of this set.
175 Returns
176 -------
177 copy : `ColumnSet`
178 New column set that can be modified without changing the original.
179 """
180 result = ColumnSet(self._dimensions)
181 for element_name, element_fields in self._dimension_fields.items():
182 result._dimension_fields[element_name].update(element_fields)
183 for dataset_type, dataset_fields in self._dataset_fields.items():
184 result._dataset_fields[dataset_type].update(dataset_fields)
185 return result
187 def update_dimensions(self, dimensions: DimensionGroup) -> None:
188 """Add new dimensions to the set.
190 Parameters
191 ----------
192 dimensions : `DimensionGroup`
193 Dimensions to be included.
194 """
195 if not dimensions.issubset(self._dimensions):
196 self._dimensions = dimensions.union(self._dimensions)
197 self._dimension_fields = {
198 name: self._dimension_fields.get(name, set()) for name in self._dimensions.elements
199 }
200 self._removed_dimension_keys.intersection_update(dimensions.names)
202 def update(self, other: ColumnSet) -> None:
203 """Add columns from another set to this one.
205 Parameters
206 ----------
207 other : `ColumnSet`
208 Column set whose columns should be included in this one.
209 """
210 self.update_dimensions(other.dimensions)
211 self._removed_dimension_keys.intersection_update(other._removed_dimension_keys)
212 for element_name, element_fields in other._dimension_fields.items():
213 self._dimension_fields[element_name].update(element_fields)
214 for dataset_type, dataset_fields in other._dataset_fields.items():
215 self._dataset_fields[dataset_type].update(dataset_fields)
217 def drop_dimension_keys(self, names: Iterable[str]) -> ColumnSet:
218 """Remove the given dimension key columns from the set.
220 Parameters
221 ----------
222 names : `~collections.abc.Iterable` [ `str` ]
223 Names of the dimensions to remove.
225 Returns
226 -------
227 self : `ColumnSet`
228 This column set, modified in place.
229 """
230 self._removed_dimension_keys.update(names)
231 return self
233 def drop_implied_dimension_keys(self) -> ColumnSet:
234 """Remove dimension key columns that are implied by others.
236 Returns
237 -------
238 self : `ColumnSet`
239 This column set, modified in place.
240 """
241 return self.drop_dimension_keys(self._dimensions.implied)
243 def restore_dimension_keys(self) -> None:
244 """Restore all removed dimension key columns."""
245 self._removed_dimension_keys.clear()
247 def __iter__(self) -> Iterator[tuple[str, str | None]]:
248 for dimension_name in self._dimensions.data_coordinate_keys:
249 if dimension_name not in self._removed_dimension_keys:
250 yield dimension_name, None
251 # We iterate over DimensionElements and their DimensionRecord columns
252 # in order to make sure that's predictable. We might want to extract
253 # these query results positionally in some contexts.
254 for element_name in self._dimensions.elements:
255 element = self._dimensions.universe[element_name]
256 fields_for_element = self._dimension_fields[element_name]
257 for spec in element.schema.remainder:
258 if spec.name in fields_for_element:
259 yield element_name, spec.name
260 # We sort dataset types and their fields lexicographically just to keep
261 # our queries from having any dependence on set-iteration order.
262 for dataset_type in sorted(self._dataset_fields):
263 for field in sorted(self._dataset_fields[dataset_type]):
264 yield dataset_type, field
266 def is_timespan(self, logical_table: str, field: str | None) -> bool:
267 """Test whether the given column is a timespan.
269 Parameters
270 ----------
271 logical_table : `str`
272 Name of the dimension element or dataset type the column belongs
273 to.
274 field : `str` or `None`
275 Column within the logical table, or `None` for dimension key
276 columns.
278 Returns
279 -------
280 is_timespan : `bool`
281 Whether this column is a timespan.
282 """
283 return field == "timespan"
285 @staticmethod
286 def get_qualified_name(logical_table: str, field: str | None) -> str:
287 """Return string that should be used to fully identify a column.
289 Parameters
290 ----------
291 logical_table : `str`
292 Name of the dimension element or dataset type the column belongs
293 to.
294 field : `str` or `None`
295 Column within the logical table, or `None` for dimension key
296 columns.
298 Returns
299 -------
300 name : `str`
301 Fully-qualified name.
302 """
303 return logical_table if field is None else f"{logical_table}:{field}"
305 def get_column_spec(self, logical_table: str, field: str | None) -> column_spec.ColumnSpec:
306 """Return a complete description of a column.
308 Parameters
309 ----------
310 logical_table : `str`
311 Name of the dimension element or dataset type the column belongs
312 to.
313 field : `str` or `None`
314 Column within the logical table, or `None` for dimension key
315 columns.
317 Returns
318 -------
319 spec : `.column_spec.ColumnSpec`
320 Description of the column.
321 """
322 qualified_name = self.get_qualified_name(logical_table, field)
323 if field is None:
324 return self._dimensions.universe.dimensions[logical_table].primary_key.model_copy(
325 update=dict(name=qualified_name)
326 )
327 if logical_table in self._dimension_fields:
328 return (
329 self._dimensions.universe[logical_table]
330 .schema.all[field]
331 .model_copy(update=dict(name=qualified_name))
332 )
333 match field:
334 case "dataset_id":
335 return column_spec.UUIDColumnSpec.model_construct(name=qualified_name, nullable=False)
336 case "ingest_date":
337 return column_spec.DateTimeColumnSpec.model_construct(name=qualified_name)
338 case "run":
339 return column_spec.StringColumnSpec.model_construct(
340 name=qualified_name, nullable=False, length=column_spec.COLLECTION_NAME_MAX_LENGTH
341 )
342 case "collection":
343 return column_spec.StringColumnSpec.model_construct(
344 name=qualified_name, nullable=False, length=column_spec.COLLECTION_NAME_MAX_LENGTH
345 )
346 case "timespan":
347 return column_spec.TimespanColumnSpec.model_construct(name=qualified_name, nullable=False)
348 raise AssertionError(f"Unrecognized column identifiers: {logical_table}, {field}.")
350 def _get_dimension_keys(self) -> Set[str]:
351 if not self._removed_dimension_keys:
352 return self._dimensions.names
353 else:
354 return self._dimensions.names - self._removed_dimension_keys