Coverage for python/lsst/daf/butler/queries/tree/_column_set.py: 25%
100 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("ColumnSet",)
32from collections.abc import Iterable, Iterator, Mapping, Set
34from ... import column_spec
35from ...dimensions import DimensionGroup
36from ...nonempty_mapping import NonemptyMapping
39class ColumnSet:
40 """A set-like hierarchical container for the columns in a query.
42 Parameters
43 ----------
44 dimensions : `DimensionGroup`
45 The dimensions that bound the set of columns, and by default specify
46 the set of dimension key columns present.
48 Notes
49 -----
50 This class does not inherit from `collections.abc.Set` because that brings
51 in a lot of requirements we don't need (particularly interoperability with
52 other set-like objects).
54 This class is iterable over tuples of ``(logical_table, field)``, where
55 ``logical_table`` is a dimension element name or dataset type name, and
56 ``field`` is a column associated with one of those, or `None` for dimension
57 key columns. Iteration order is guaranteed to be deterministic and to
58 start with all included dimension keys in
59 `DimensionGroup.data_coordinate_keys`.
60 """
62 def __init__(self, dimensions: DimensionGroup) -> None:
63 self._dimensions = dimensions
64 self._removed_dimension_keys: set[str] = set()
65 self._dimension_fields: dict[str, set[str]] = {name: set() for name in dimensions.elements}
66 self._dataset_fields = NonemptyMapping[str, set[str]](set)
68 @property
69 def dimensions(self) -> DimensionGroup:
70 """The dimensions that bound all columns in the set."""
71 return self._dimensions
73 @property
74 def dimension_fields(self) -> Mapping[str, set[str]]:
75 """Dimension record fields included in the set, grouped by dimension
76 element name.
78 The keys of this mapping are always ``self.dimensions.elements``, and
79 nested sets may be empty.
80 """
81 return self._dimension_fields
83 @property
84 def dataset_fields(self) -> NonemptyMapping[str, set[str]]:
85 """Dataset fields included in the set, grouped by dataset type name.
87 The keys of this mapping are just those that actually have nonempty
88 nested sets.
89 """
90 return self._dataset_fields
92 def __bool__(self) -> bool:
93 return bool(self._dimensions) or any(self._dataset_fields.values())
95 def __eq__(self, other: object) -> bool:
96 if not isinstance(other, ColumnSet):
97 return False
98 return (
99 self._dimensions == other._dimensions
100 and self._removed_dimension_keys == other._removed_dimension_keys
101 and self._dimension_fields == other._dimension_fields
102 and self._dataset_fields == other._dataset_fields
103 )
105 def __str__(self) -> str:
106 return f"{{{', '.join(self.get_qualified_name(k, v) for k, v in self)}}}"
108 def issubset(self, other: ColumnSet) -> bool:
109 """Test whether all columns in this set are also in another.
111 Parameters
112 ----------
113 other : `ColumnSet`
114 Set of columns to compare to.
116 Returns
117 -------
118 issubset : `bool`
119 Whether all columns in ``self`` are also in ``other``.
120 """
121 return (
122 (self._get_dimension_keys() <= other._get_dimension_keys())
123 and all(
124 fields.issubset(other._dimension_fields.get(element_name, frozenset()))
125 for element_name, fields in self._dimension_fields.items()
126 )
127 and all(
128 fields.issubset(other._dataset_fields.get(dataset_type, frozenset()))
129 for dataset_type, fields in self._dataset_fields.items()
130 )
131 )
133 def issuperset(self, other: ColumnSet) -> bool:
134 """Test whether all columns another set are also in this one.
136 Parameters
137 ----------
138 other : `ColumnSet`
139 Set of columns to compare to.
141 Returns
142 -------
143 issuperset : `bool`
144 Whether all columns in ``other`` are also in ``self``.
145 """
146 return other.issubset(self)
148 def isdisjoint(self, other: ColumnSet) -> bool:
149 """Test whether there are no columns in both this set and another.
151 Parameters
152 ----------
153 other : `ColumnSet`
154 Set of columns to compare to.
156 Returns
157 -------
158 isdisjoint : `bool`
159 Whether there are any columns in both ``self`` and ``other``.
160 """
161 return (
162 self._get_dimension_keys().isdisjoint(other._get_dimension_keys())
163 and all(
164 fields.isdisjoint(other._dimension_fields.get(element, frozenset()))
165 for element, fields in self._dimension_fields.items()
166 )
167 and all(
168 fields.isdisjoint(other._dataset_fields.get(dataset_type, frozenset()))
169 for dataset_type, fields in self._dataset_fields.items()
170 )
171 )
173 def copy(self) -> ColumnSet:
174 """Return a copy of this set.
176 Returns
177 -------
178 copy : `ColumnSet`
179 New column set that can be modified without changing the original.
180 """
181 result = ColumnSet(self._dimensions)
182 for element_name, element_fields in self._dimension_fields.items():
183 result._dimension_fields[element_name].update(element_fields)
184 for dataset_type, dataset_fields in self._dataset_fields.items():
185 result._dataset_fields[dataset_type].update(dataset_fields)
186 return result
188 def update_dimensions(self, dimensions: DimensionGroup) -> None:
189 """Add new dimensions to the set.
191 Parameters
192 ----------
193 dimensions : `DimensionGroup`
194 Dimensions to be included.
195 """
196 if not dimensions.issubset(self._dimensions):
197 self._dimensions = dimensions.union(self._dimensions)
198 self._dimension_fields = {
199 name: self._dimension_fields.get(name, set()) for name in self._dimensions.elements
200 }
201 self._removed_dimension_keys.intersection_update(dimensions.names)
203 def update(self, other: ColumnSet) -> None:
204 """Add columns from another set to this one.
206 Parameters
207 ----------
208 other : `ColumnSet`
209 Column set whose columns should be included in this one.
210 """
211 self.update_dimensions(other.dimensions)
212 self._removed_dimension_keys.intersection_update(other._removed_dimension_keys)
213 for element_name, element_fields in other._dimension_fields.items():
214 self._dimension_fields[element_name].update(element_fields)
215 for dataset_type, dataset_fields in other._dataset_fields.items():
216 self._dataset_fields[dataset_type].update(dataset_fields)
218 def drop_dimension_keys(self, names: Iterable[str]) -> ColumnSet:
219 """Remove the given dimension key columns from the set.
221 Parameters
222 ----------
223 names : `~collections.abc.Iterable` [ `str` ]
224 Names of the dimensions to remove.
226 Returns
227 -------
228 self : `ColumnSet`
229 This column set, modified in place.
230 """
231 self._removed_dimension_keys.update(names)
232 return self
234 def drop_implied_dimension_keys(self) -> ColumnSet:
235 """Remove dimension key columns that are implied by others.
237 Returns
238 -------
239 self : `ColumnSet`
240 This column set, modified in place.
241 """
242 return self.drop_dimension_keys(self._dimensions.implied)
244 def restore_dimension_keys(self) -> None:
245 """Restore all removed dimension key columns."""
246 self._removed_dimension_keys.clear()
248 def __iter__(self) -> Iterator[tuple[str, str | None]]:
249 for dimension_name in self._dimensions.data_coordinate_keys:
250 if dimension_name not in self._removed_dimension_keys:
251 yield dimension_name, None
252 # We iterate over DimensionElements and their DimensionRecord columns
253 # in order to make sure that's predictable. We might want to extract
254 # these query results positionally in some contexts.
255 for element_name in self._dimensions.elements:
256 element = self._dimensions.universe[element_name]
257 fields_for_element = self._dimension_fields[element_name]
258 for spec in element.schema.remainder:
259 if spec.name in fields_for_element:
260 yield element_name, spec.name
261 # We sort dataset types and their fields lexicographically just to keep
262 # our queries from having any dependence on set-iteration order.
263 for dataset_type in sorted(self._dataset_fields):
264 for field in sorted(self._dataset_fields[dataset_type]):
265 yield dataset_type, field
267 def is_timespan(self, logical_table: str, field: str | None) -> bool:
268 """Test whether the given column is a timespan.
270 Parameters
271 ----------
272 logical_table : `str`
273 Name of the dimension element or dataset type the column belongs
274 to.
275 field : `str` or `None`
276 Column within the logical table, or `None` for dimension key
277 columns.
279 Returns
280 -------
281 is_timespan : `bool`
282 Whether this column is a timespan.
283 """
284 return field == "timespan"
286 @staticmethod
287 def get_qualified_name(logical_table: str, field: str | None) -> str:
288 """Return string that should be used to fully identify a column.
290 Parameters
291 ----------
292 logical_table : `str`
293 Name of the dimension element or dataset type the column belongs
294 to.
295 field : `str` or `None`
296 Column within the logical table, or `None` for dimension key
297 columns.
299 Returns
300 -------
301 name : `str`
302 Fully-qualified name.
303 """
304 return logical_table if field is None else f"{logical_table}:{field}"
306 def get_column_spec(self, logical_table: str, field: str | None) -> column_spec.ColumnSpec:
307 """Return a complete description of a column.
309 Parameters
310 ----------
311 logical_table : `str`
312 Name of the dimension element or dataset type the column belongs
313 to.
314 field : `str` or `None`
315 Column within the logical table, or `None` for dimension key
316 columns.
318 Returns
319 -------
320 spec : `.column_spec.ColumnSpec`
321 Description of the column.
322 """
323 qualified_name = self.get_qualified_name(logical_table, field)
324 if field is None:
325 return self._dimensions.universe.dimensions[logical_table].primary_key.model_copy(
326 update=dict(name=qualified_name)
327 )
328 if logical_table in self._dimension_fields:
329 return (
330 self._dimensions.universe[logical_table]
331 .schema.all[field]
332 .model_copy(update=dict(name=qualified_name))
333 )
334 match field:
335 case "dataset_id":
336 return column_spec.UUIDColumnSpec.model_construct(name=qualified_name, nullable=False)
337 case "ingest_date":
338 return column_spec.DateTimeColumnSpec.model_construct(name=qualified_name)
339 case "run":
340 return column_spec.StringColumnSpec.model_construct(
341 name=qualified_name, nullable=False, length=column_spec.COLLECTION_NAME_MAX_LENGTH
342 )
343 case "collection":
344 return column_spec.StringColumnSpec.model_construct(
345 name=qualified_name, nullable=False, length=column_spec.COLLECTION_NAME_MAX_LENGTH
346 )
347 case "timespan":
348 return column_spec.TimespanColumnSpec.model_construct(name=qualified_name, nullable=False)
349 raise AssertionError(f"Unrecognized column identifiers: {logical_table}, {field}.")
351 def _get_dimension_keys(self) -> Set[str]:
352 if not self._removed_dimension_keys:
353 return self._dimensions.names
354 else:
355 return self._dimensions.names - self._removed_dimension_keys