Coverage for python/lsst/daf/butler/dimensions/_elements.py: 70%
123 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "Dimension",
32 "DimensionCombination",
33 "DimensionElement",
34)
36from abc import abstractmethod
37from typing import TYPE_CHECKING, Any, ClassVar
39from lsst.utils.classes import cached_getter
41from .. import ddl
42from .._named import NamedValueAbstractSet, NamedValueSet
43from .._topology import TopologicalRelationshipEndpoint
44from ..json import from_json_generic, to_json_generic
46if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
47 from ..registry import Registry
48 from ._governor import GovernorDimension
49 from ._graph import DimensionGraph
50 from ._records import DimensionRecord
51 from ._universe import DimensionUniverse
54class DimensionElement(TopologicalRelationshipEndpoint):
55 """A label and/or metadata in the dimensions system.
57 A named data-organization concept that defines a label and/or metadata
58 in the dimensions system.
60 A `DimensionElement` instance typically corresponds to a _logical_ table in
61 the `Registry`: either an actual database table or a way of generating rows
62 on-the-fly that can similarly participate in queries. The rows in that
63 table are represented by instances of a `DimensionRecord` subclass. Most
64 `DimensionElement` instances are instances of its `Dimension` subclass,
65 which is used for elements that can be used as data ID keys.
67 Notes
68 -----
69 `DimensionElement` instances should always be constructed by and retrieved
70 from a `DimensionUniverse`. They are immutable after they are fully
71 constructed, and should never be copied.
73 Pickling a `DimensionElement` just records its name and universe;
74 unpickling one actually just looks up the element via the singleton
75 dictionary of all universes. This allows pickle to be used to transfer
76 elements between processes, but only when each process initializes its own
77 instance of the same `DimensionUniverse`.
78 """
80 def __str__(self) -> str:
81 return self.name
83 def __repr__(self) -> str:
84 return f"{type(self).__name__}({self.name})"
86 def __eq__(self, other: Any) -> bool:
87 try:
88 return self.name == other.name
89 except AttributeError:
90 # TODO: try removing this fallback; it's not really consistent with
91 # base class intent, and it could be confusing
92 return self.name == other
94 def __hash__(self) -> int:
95 return hash(self.name)
97 # TODO: try removing comparison operators; DimensionUniverse.sorted should
98 # be adequate.
100 def __lt__(self, other: DimensionElement) -> bool:
101 try:
102 return self.universe.getElementIndex(self.name) < self.universe.getElementIndex(other.name)
103 except KeyError:
104 return NotImplemented
106 def __le__(self, other: DimensionElement) -> bool:
107 try:
108 return self.universe.getElementIndex(self.name) <= self.universe.getElementIndex(other.name)
109 except KeyError:
110 return NotImplemented
112 def __gt__(self, other: DimensionElement) -> bool:
113 try:
114 return self.universe.getElementIndex(self.name) > self.universe.getElementIndex(other.name)
115 except KeyError:
116 return NotImplemented
118 def __ge__(self, other: DimensionElement) -> bool:
119 try:
120 return self.universe.getElementIndex(self.name) >= self.universe.getElementIndex(other.name)
121 except KeyError:
122 return NotImplemented
124 @classmethod
125 def _unpickle(cls, universe: DimensionUniverse, name: str) -> DimensionElement:
126 """Callable used for unpickling.
128 For internal use only.
129 """
130 return universe[name]
132 def __reduce__(self) -> tuple:
133 return (self._unpickle, (self.universe, self.name))
135 def __deepcopy__(self, memo: dict) -> DimensionElement:
136 # DimensionElement is recursively immutable; see note in @immutable
137 # decorator.
138 return self
140 def to_simple(self, minimal: bool = False) -> str:
141 """Convert this class to a simple python type.
143 This is suitable for serialization.
145 Parameters
146 ----------
147 minimal : `bool`, optional
148 Use minimal serialization. Has no effect on for this class.
150 Returns
151 -------
152 simple : `str`
153 The object converted to a single string.
154 """
155 return self.name
157 @classmethod
158 def from_simple(
159 cls, simple: str, universe: DimensionUniverse | None = None, registry: Registry | None = None
160 ) -> DimensionElement:
161 """Construct a new object from the simplified form.
163 Usually the data is returned from the `to_simple` method.
165 Parameters
166 ----------
167 simple : `str`
168 The value returned by `to_simple()`.
169 universe : `DimensionUniverse`
170 The special graph of all known dimensions.
171 registry : `lsst.daf.butler.Registry`, optional
172 Registry from which a universe can be extracted. Can be `None`
173 if universe is provided explicitly.
175 Returns
176 -------
177 dataId : `DimensionElement`
178 Newly-constructed object.
179 """
180 if universe is None and registry is None:
181 raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
182 if universe is None and registry is not None:
183 universe = registry.dimensions
184 if universe is None:
185 # this is for mypy
186 raise ValueError("Unable to determine a usable universe")
188 return universe[simple]
190 to_json = to_json_generic
191 from_json: ClassVar = classmethod(from_json_generic)
193 def hasTable(self) -> bool:
194 """Indicate if this element is associated with a table.
196 Return `True` if this element is associated with a table
197 (even if that table "belongs" to another element).
198 """
199 return True
201 universe: DimensionUniverse
202 """The universe of all compatible dimensions with which this element is
203 associated (`DimensionUniverse`).
204 """
206 @property
207 @cached_getter
208 def governor(self) -> GovernorDimension | None:
209 """Return the governor dimension.
211 This is the `GovernorDimension` that is a required dependency of this
212 element, or `None` if there is no such dimension (`GovernorDimension`
213 or `None`).
214 """
215 if len(self.graph.governors) == 1:
216 (result,) = self.graph.governors
217 return result
218 elif len(self.graph.governors) > 1:
219 raise RuntimeError(
220 f"Dimension element {self.name} has multiple governors: {self.graph.governors}."
221 )
222 else:
223 return None
225 @property
226 @abstractmethod
227 def required(self) -> NamedValueAbstractSet[Dimension]:
228 """Return the required dimensions.
230 Dimensions that are necessary to uniquely identify a record of this
231 dimension element.
233 For elements with a database representation, these dimension are
234 exactly those used to form the (possibly compound) primary key, and all
235 dimensions here that are not ``self`` are also used to form foreign
236 keys.
238 For `Dimension` instances, this should be exactly the same as
239 ``graph.required``, but that may not be true for `DimensionElement`
240 instances in general. When they differ, there are multiple
241 combinations of dimensions that uniquely identify this element, but
242 this one is more direct.
243 """
244 raise NotImplementedError()
246 @property
247 @abstractmethod
248 def implied(self) -> NamedValueAbstractSet[Dimension]:
249 """Return the implied dimensions.
251 Other dimensions that are uniquely identified directly by a record
252 of this dimension element.
254 For elements with a database representation, these are exactly the
255 dimensions used to form foreign key constraints whose fields are not
256 (wholly) also part of the primary key.
258 Unlike ``self.graph.implied``, this set is not expanded recursively.
259 """
260 raise NotImplementedError()
262 @property
263 @cached_getter
264 def dimensions(self) -> NamedValueAbstractSet[Dimension]:
265 """Return all dimensions.
267 The union of `required` and `implied`, with all elements in
268 `required` before any elements in `implied`.
270 This differs from ``self.graph.dimensions`` both in order and in
271 content:
273 - as in ``self.implied``, implied dimensions are not expanded
274 recursively here;
275 - implied dimensions appear after required dimensions here, instead of
276 being topologically ordered.
278 As a result, this set is ordered consistently with
279 ``self.RecordClass.fields``.
280 """
281 return NamedValueSet(list(self.required) + list(self.implied)).freeze()
283 @property
284 @cached_getter
285 def graph(self) -> DimensionGraph:
286 """Return minimal graph that includes this element (`DimensionGraph`).
288 ``self.graph.required`` includes all dimensions whose primary key
289 values are sufficient (often necessary) to uniquely identify ``self``
290 (including ``self`` if ``isinstance(self, Dimension)``.
291 ``self.graph.implied`` includes all dimensions also identified
292 (possibly recursively) by this set.
293 """
294 return self.universe.extract(self.dimensions.names)
296 @property
297 @cached_getter
298 def RecordClass(self) -> type[DimensionRecord]:
299 """Return the record subclass for this element.
301 The `DimensionRecord` subclass used to hold records for this element
302 (`type`).
304 Because `DimensionRecord` subclasses are generated dynamically, this
305 type cannot be imported directly and hence can only be obtained from
306 this attribute.
307 """
308 from ._records import _subclassDimensionRecord
310 return _subclassDimensionRecord(self)
312 @property
313 @abstractmethod
314 def metadata(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
315 """Additional metadata fields included in this element's table.
317 (`NamedValueSet` of `FieldSpec`).
318 """
319 raise NotImplementedError()
321 @property
322 def viewOf(self) -> str | None:
323 """Name of another table this element's records are drawn from.
325 (`str` or `None`).
326 """
327 return None
329 @property
330 def alwaysJoin(self) -> bool:
331 """Indicate if the element should always be included.
333 If `True`, always include this element in any query or data ID in
334 which its ``required`` dimensions appear, because it defines a
335 relationship between those dimensions that must always be satisfied.
336 """
337 return False
339 @property
340 @abstractmethod
341 def populated_by(self) -> Dimension | None:
342 """The dimension that this element's records are always inserted,
343 exported, and imported alongside.
345 Notes
346 -----
347 When this is `None` (as it will be, at least at first, for any data
348 repositories created before this attribute was added), records for
349 this element will often need to be exported manually when datasets
350 associated with some other related dimension are exported, in order for
351 the post-import data repository to function as expected.
352 """
353 raise NotImplementedError()
356class Dimension(DimensionElement):
357 """A dimension.
359 A named data-organization concept that can be used as a key in a data
360 ID.
361 """
363 @property
364 @abstractmethod
365 def uniqueKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
366 """Return the unique fields.
368 All fields that can individually be used to identify records of this
369 element, given the primary keys of all required dependencies
370 (`NamedValueAbstractSet` of `FieldSpec`).
371 """
372 raise NotImplementedError()
374 @property
375 @cached_getter
376 def primaryKey(self) -> ddl.FieldSpec:
377 """Return primary key field for this dimension (`FieldSpec`).
379 Note that the database primary keys for dimension tables are in general
380 compound; this field is the only field in the database primary key that
381 is not also a foreign key (to a required dependency dimension table).
382 """
383 primaryKey, *_ = self.uniqueKeys
384 return primaryKey
386 @property
387 @cached_getter
388 def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
389 """Return alternate keys.
391 Additional unique key fields for this dimension that are not the
392 primary key (`NamedValueAbstractSet` of `FieldSpec`).
394 If this dimension has required dependencies, the keys of those
395 dimensions are also included in the unique constraints defined for
396 these alternate keys.
397 """
398 _, *alternateKeys = self.uniqueKeys
399 return NamedValueSet(alternateKeys).freeze()
401 @property
402 def populated_by(self) -> Dimension:
403 # Docstring inherited.
404 return self
407class DimensionCombination(DimensionElement):
408 """Element with extra information.
410 A `DimensionElement` that provides extra metadata and/or relationship
411 endpoint information for a combination of dimensions.
412 """