Coverage for python/lsst/daf/butler/registry/_defaults.py: 30%

61 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-07 02:46 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("RegistryDefaults",) 

31 

32import contextlib 

33from collections.abc import Sequence, Set 

34from typing import TYPE_CHECKING, Any 

35 

36from lsst.utils.classes import immutable 

37 

38from .._exceptions import MissingCollectionError 

39from ..dimensions import DataCoordinate 

40from ._collection_summary import CollectionSummary 

41from .wildcards import CollectionWildcard 

42 

43if TYPE_CHECKING: 

44 from ..registry import Registry 

45 from .sql_registry import SqlRegistry 

46 

47 

48@immutable 

49class RegistryDefaults: 

50 """A struct used to provide the default collections searched or written to 

51 by a `Registry` or `Butler` instance. 

52 

53 Parameters 

54 ---------- 

55 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

56 An expression specifying the collections to be searched (in order) when 

57 reading datasets. If a default value for a governor dimension is not 

58 given via ``**kwargs``, and exactly one value for that dimension 

59 appears in the datasets in ``collections``, that value is also used as 

60 the default for that dimension. 

61 This may be a `str` collection name or an iterable thereof. 

62 See :ref:`daf_butler_collection_expressions` for more information. 

63 These collections are not registered automatically and must be 

64 manually registered before they are used by any `Registry` or `Butler` 

65 method, but they may be manually registered after a `Registry` or 

66 `Butler` is initialized with this struct. 

67 run : `str`, optional 

68 Name of the `~CollectionType.RUN` collection new datasets should be 

69 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

70 ``collections`` will be set to ``[run]``. If not `None`, this 

71 collection will automatically be registered when the default struct is 

72 attached to a `Registry` instance. 

73 infer : `bool`, optional 

74 If `True` (default) infer default data ID values from the values 

75 present in the datasets in ``collections``: if all collections have the 

76 same value (or no value) for a governor dimension, that value will be 

77 the default for that dimension. Nonexistent collections are ignored. 

78 If a default value is provided explicitly for a governor dimension via 

79 ``**kwargs``, no default will be inferred for that dimension. 

80 **kwargs : `str` 

81 Default data ID key-value pairs. These may only identify "governor" 

82 dimensions like ``instrument`` and ``skymap``, though this is only 

83 checked when the defaults struct is actually attached to a `Registry`. 

84 """ 

85 

86 def __init__(self, collections: Any = None, run: str | None = None, infer: bool = True, **kwargs: str): 

87 if collections is None: 

88 if run is not None: 

89 collections = (run,) 

90 else: 

91 collections = () 

92 self.collections = CollectionWildcard.from_expression(collections).require_ordered() 

93 self.run = run 

94 self._infer = infer 

95 self._kwargs = kwargs 

96 

97 @staticmethod 

98 def from_data_id(data_id: DataCoordinate) -> RegistryDefaults: 

99 """Create a RegistryDefaults object with a specified ``dataId`` value 

100 and no default collections. 

101 

102 Parameters 

103 ---------- 

104 data_id : `DataCoordinate` 

105 The default data ID value. 

106 """ 

107 defaults = RegistryDefaults(None, None, False) 

108 defaults.dataId = data_id 

109 defaults._finished = True 

110 return defaults 

111 

112 def __repr__(self) -> str: 

113 collections = f"collections={self.collections!r}" if self.collections else "" 

114 run = f"run={self.run!r}" if self.run else "" 

115 if self._kwargs: 

116 kwargs = ", ".join([f"{k}={v!r}" for k, v in self._kwargs.items()]) 

117 else: 

118 kwargs = "" 

119 args = ", ".join([arg for arg in (collections, run, kwargs) if arg]) 

120 return f"{type(self).__name__}({args})" 

121 

122 def finish(self, registry: Registry | SqlRegistry) -> None: 

123 """Validate the defaults struct and standardize its data ID. 

124 

125 This should be called only by a `Registry` instance when the defaults 

126 struct is first associated with it. 

127 

128 Parameters 

129 ---------- 

130 registry : `Registry` 

131 Registry instance these defaults are being attached to. 

132 

133 Raises 

134 ------ 

135 TypeError 

136 Raised if a non-governor dimension was included in ``**kwargs`` 

137 at construction. 

138 """ 

139 # Skip re-initialization if it's already been completed. 

140 # Can't just say 'self._finished' because this class is immutable. 

141 if hasattr(self, "_finished"): 

142 return 

143 

144 allGovernorDimensions = registry.dimensions.governor_dimensions 

145 if not self._kwargs.keys() <= allGovernorDimensions.names: 

146 raise TypeError( 

147 "Only governor dimensions may be identified by a default data " 

148 f"ID, not {self._kwargs.keys() - allGovernorDimensions.names}. " 

149 "(These may just be unrecognized keyword arguments passed at " 

150 "Butler construction.)" 

151 ) 

152 if self._infer and self._kwargs.keys() != allGovernorDimensions.names: 

153 summaries = [] 

154 for collection in self.collections: 

155 with contextlib.suppress(MissingCollectionError): 

156 summaries.append(registry.getCollectionSummary(collection)) 

157 

158 if summaries: 

159 summary = CollectionSummary.union(*summaries) 

160 for dimensionName in allGovernorDimensions.names - self._kwargs.keys(): 

161 values: Set[str] = summary.governors.get(dimensionName, frozenset()) 

162 if len(values) == 1: 

163 (value,) = values 

164 self._kwargs[dimensionName] = value 

165 self.dataId = registry.expandDataId(self._kwargs, withDefaults=False) 

166 

167 self._finished = True 

168 

169 collections: Sequence[str] 

170 """The collections to search by default, in order 

171 (`~collections.abc.Sequence` [ `str` ]). 

172 """ 

173 

174 run: str | None 

175 """Name of the run this butler writes outputs to by default (`str` or 

176 `None`). 

177 """ 

178 

179 dataId: DataCoordinate 

180 """The default data ID (`DataCoordinate`). 

181 

182 Dimensions without defaults are simply not included. Only governor 

183 dimensions are ever included in defaults. 

184 

185 This attribute may not be accessed before the defaults struct is 

186 attached to a `Registry` instance. It always satisfies ``hasFull``. 

187 """