Coverage for python/lsst/daf/butler/registry/queries/find_first_dataset.py: 71%

37 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 10:24 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ("FindFirstDataset",) 

30 

31import dataclasses 

32from collections.abc import Sequence, Set 

33from typing import final 

34 

35from lsst.daf.relation import ColumnTag, Relation, RowFilter, UnaryCommutator, UnaryOperationRelation 

36from lsst.utils.classes import cached_getter 

37 

38from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag 

39 

40 

41@final 

42@dataclasses.dataclass(frozen=True) 

43class FindFirstDataset(RowFilter): 

44 """A custom relation operation that selects the first dataset from an 

45 upstream relation according to its collection rank. 

46 """ 

47 

48 dimensions: Sequence[DimensionKeyColumnTag] 

49 """Dimensions to group by while finding the first dataset within each group 

50 (`~collections.abc.Sequence` [ `DimensionKeyColumnTag` ]). 

51 """ 

52 

53 rank: DatasetColumnTag 

54 """Dataset rank column whose lowest per-group values should be selected 

55 (`DatasetColumnTag`). 

56 """ 

57 

58 @property 

59 @cached_getter 

60 def columns_required(self) -> Set[ColumnTag]: 

61 # Docstring inherited. 

62 result: set[ColumnTag] = {self.rank} 

63 result.update(self.dimensions) 

64 return result 

65 

66 @property 

67 def is_empty_invariant(self) -> bool: 

68 # Docstring inherited. 

69 return True 

70 

71 @property 

72 def is_order_dependent(self) -> bool: 

73 # Docstring inherited. 

74 return False 

75 

76 def __str__(self) -> str: 

77 return "find_first" 

78 

79 def applied_min_rows(self, target: Relation) -> int: 

80 # Docstring inherited. 

81 return 1 if target.min_rows else 0 

82 

83 def commute(self, current: UnaryOperationRelation) -> UnaryCommutator: 

84 # Docstring inherited. 

85 if not self.columns_required <= current.target.columns: 

86 return UnaryCommutator( 

87 first=None, 

88 second=current.operation, 

89 done=False, 

90 messages=( 

91 f"{current.target} is missing columns " 

92 f"{set(self.columns_required - current.target.columns)}", 

93 ), 

94 ) 

95 if current.operation.is_count_dependent: 

96 return UnaryCommutator( 

97 first=None, 

98 second=current.operation, 

99 done=False, 

100 messages=(f"{current.operation} is count-dependent",), 

101 ) 

102 return UnaryCommutator(self, current.operation)