Coverage for python/lsst/daf/butler/registry/queries/find_first_dataset.py: 71%
37 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("FindFirstDataset",)
31import dataclasses
32from collections.abc import Sequence, Set
33from typing import final
35from lsst.daf.relation import ColumnTag, Relation, RowFilter, UnaryCommutator, UnaryOperationRelation
36from lsst.utils.classes import cached_getter
38from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag
41@final
42@dataclasses.dataclass(frozen=True)
43class FindFirstDataset(RowFilter):
44 """A custom relation operation that selects the first dataset from an
45 upstream relation according to its collection rank.
46 """
48 dimensions: Sequence[DimensionKeyColumnTag]
49 """Dimensions to group by while finding the first dataset within each group
50 (`~collections.abc.Sequence` [ `DimensionKeyColumnTag` ]).
51 """
53 rank: DatasetColumnTag
54 """Dataset rank column whose lowest per-group values should be selected
55 (`DatasetColumnTag`).
56 """
58 @property
59 @cached_getter
60 def columns_required(self) -> Set[ColumnTag]:
61 # Docstring inherited.
62 result: set[ColumnTag] = {self.rank}
63 result.update(self.dimensions)
64 return result
66 @property
67 def is_empty_invariant(self) -> bool:
68 # Docstring inherited.
69 return True
71 @property
72 def is_order_dependent(self) -> bool:
73 # Docstring inherited.
74 return False
76 def __str__(self) -> str:
77 return "find_first"
79 def applied_min_rows(self, target: Relation) -> int:
80 # Docstring inherited.
81 return 1 if target.min_rows else 0
83 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator:
84 # Docstring inherited.
85 if not self.columns_required <= current.target.columns:
86 return UnaryCommutator(
87 first=None,
88 second=current.operation,
89 done=False,
90 messages=(
91 f"{current.target} is missing columns "
92 f"{set(self.columns_required - current.target.columns)}",
93 ),
94 )
95 if current.operation.is_count_dependent:
96 return UnaryCommutator(
97 first=None,
98 second=current.operation,
99 done=False,
100 messages=(f"{current.operation} is count-dependent",),
101 )
102 return UnaryCommutator(self, current.operation)