Coverage for python / lsst / daf / butler / registry / _defaults.py: 25%

82 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-30 08:41 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("RegistryDefaults",) 

31 

32import contextlib 

33from collections.abc import Sequence, Set 

34from types import EllipsisType 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.utils.classes import immutable 

38 

39from .._butler_instance_options import ButlerInstanceOptions 

40from .._exceptions import MissingCollectionError 

41from ..dimensions import DataCoordinate 

42from ._collection_summary import CollectionSummary 

43from .wildcards import CollectionWildcard 

44 

45if TYPE_CHECKING: 

46 from ..registry import CollectionArgType, Registry 

47 from .sql_registry import SqlRegistry 

48 

49 

50@immutable 

51class RegistryDefaults: 

52 """A struct used to provide the default collections searched or written to 

53 by a `Registry` or `Butler` instance. 

54 

55 Parameters 

56 ---------- 

57 collections : `str` or `~collections.abc.Iterable` [ `str` ], optional 

58 An expression specifying the collections to be searched (in order) when 

59 reading datasets. If a default value for a governor dimension is not 

60 given via ``**kwargs``, and exactly one value for that dimension 

61 appears in the datasets in ``collections``, that value is also used as 

62 the default for that dimension. 

63 This may be a `str` collection name or an iterable thereof. 

64 See :ref:`daf_butler_collection_expressions` for more information. 

65 These collections are not registered automatically and must be 

66 manually registered before they are used by any `Registry` or `Butler` 

67 method, but they may be manually registered after a `Registry` or 

68 `Butler` is initialized with this struct. 

69 run : `str`, optional 

70 Name of the `~CollectionType.RUN` collection new datasets should be 

71 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

72 ``collections`` will be set to ``[run]``. If not `None`, this 

73 collection will automatically be registered when the default struct is 

74 attached to a `Registry` instance. 

75 infer : `bool`, optional 

76 If `True` (default) infer default data ID values from the values 

77 present in the datasets in ``collections``: if all collections have the 

78 same value (or no value) for a governor dimension, that value will be 

79 the default for that dimension. Nonexistent collections are ignored. 

80 If a default value is provided explicitly for a governor dimension via 

81 ``**kwargs``, no default will be inferred for that dimension. 

82 **kwargs : `str` 

83 Default data ID key-value pairs. These may only identify "governor" 

84 dimensions like ``instrument`` and ``skymap``, though this is only 

85 checked when the defaults struct is actually attached to a `Registry`. 

86 """ 

87 

88 def __init__(self, collections: Any = None, run: str | None = None, infer: bool = True, **kwargs: str): 

89 self._original_collection_was_none = collections is None 

90 self._original_kwargs = dict(kwargs) 

91 if collections is None: 

92 if run is not None: 

93 collections = (run,) 

94 else: 

95 collections = () 

96 self.collections = CollectionWildcard.from_expression(collections).require_ordered() 

97 self.run = run 

98 self._infer = infer 

99 self._kwargs = kwargs 

100 

101 @staticmethod 

102 def from_data_id(data_id: DataCoordinate) -> RegistryDefaults: 

103 """Create a RegistryDefaults object with a specified ``dataId`` value 

104 and no default collections. 

105 

106 Parameters 

107 ---------- 

108 data_id : `DataCoordinate` 

109 The default data ID value. 

110 """ 

111 defaults = RegistryDefaults(None, None, False) 

112 defaults.dataId = data_id 

113 defaults._finished = True 

114 return defaults 

115 

116 @staticmethod 

117 def from_butler_instance_options(options: ButlerInstanceOptions) -> RegistryDefaults: 

118 """Create a `RegistryDefaults` object from the values specified by a 

119 `ButlerInstanceOptions` object. 

120 

121 Parameters 

122 ---------- 

123 options : `ButlerInstanceOptions` 

124 Butler options object. 

125 """ 

126 return RegistryDefaults( 

127 collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs 

128 ) 

129 

130 def clone( 

131 self, 

132 collections: CollectionArgType | None | EllipsisType = ..., 

133 run: str | None | EllipsisType = ..., 

134 inferDefaults: bool | EllipsisType = ..., 

135 dataId: dict[str, str] | EllipsisType = ..., 

136 ) -> RegistryDefaults: 

137 """Make a copy of this RegistryDefaults object, optionally modifying 

138 values. 

139 

140 Parameters 

141 ---------- 

142 collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\ 

143 optional 

144 Same as constructor. If omitted, uses value from original object. 

145 run : `str` or `None`, optional 

146 Same as constructor. If `None`, no default run is used. If 

147 omitted, copies value from original object. 

148 inferDefaults : `bool`, optional 

149 Same as constructor. If omitted, copies value from original 

150 object. 

151 dataId : `dict` [ `str` , `str` ] 

152 Same as ``kwargs`` arguments to constructor. If omitted, copies 

153 values from original object. 

154 

155 Returns 

156 ------- 

157 defaults : `RegistryDefaults` 

158 New instance if any changes were made, otherwise the original 

159 instance. 

160 

161 Notes 

162 ----- 

163 ``finish()`` must be called on the returned object to complete 

164 initialization. 

165 """ 

166 if collections is ... and run is ... and inferDefaults is ... and dataId is ...: 

167 # Unmodified copy -- this object is immutable so we can just return 

168 # it and avoid the need for database queries in finish(). 

169 return self 

170 

171 if collections is ...: 

172 if self._original_collection_was_none: 

173 # Ensure that defaulting collections to the run collection 

174 # works the same as the constructor. 

175 collections = None 

176 else: 

177 collections = self.collections 

178 if run is ...: 

179 run = self.run 

180 if inferDefaults is ...: 

181 inferDefaults = self._infer 

182 if dataId is ...: 

183 dataId = self._original_kwargs 

184 

185 return RegistryDefaults(collections, run, inferDefaults, **dataId) 

186 

187 def __repr__(self) -> str: 

188 collections = f"collections={self.collections!r}" if self.collections else "" 

189 run = f"run={self.run!r}" if self.run else "" 

190 if self._kwargs: 

191 kwargs = ", ".join([f"{k}={v!r}" for k, v in self._kwargs.items()]) 

192 else: 

193 kwargs = "" 

194 args = ", ".join([arg for arg in (collections, run, kwargs) if arg]) 

195 return f"{type(self).__name__}({args})" 

196 

197 def finish(self, registry: Registry | SqlRegistry) -> None: 

198 """Validate the defaults struct and standardize its data ID. 

199 

200 This should be called only by a `Registry` instance when the defaults 

201 struct is first associated with it. 

202 

203 Parameters 

204 ---------- 

205 registry : `Registry` 

206 Registry instance these defaults are being attached to. 

207 

208 Raises 

209 ------ 

210 TypeError 

211 Raised if a non-governor dimension was included in ``**kwargs`` 

212 at construction. 

213 """ 

214 # Skip re-initialization if it's already been completed. 

215 # Can't just say 'self._finished' because this class is immutable. 

216 if hasattr(self, "_finished"): 

217 return 

218 

219 allGovernorDimensions = registry.dimensions.governor_dimensions 

220 if not self._kwargs.keys() <= allGovernorDimensions.names: 

221 raise TypeError( 

222 "Only governor dimensions may be identified by a default data " 

223 f"ID, not {self._kwargs.keys() - allGovernorDimensions.names}. " 

224 "(These may just be unrecognized keyword arguments passed at " 

225 "Butler construction.)" 

226 ) 

227 if self._infer and self._kwargs.keys() != allGovernorDimensions.names: 

228 summaries = [] 

229 for collection in self.collections: 

230 with contextlib.suppress(MissingCollectionError): 

231 summaries.append(registry.getCollectionSummary(collection)) 

232 

233 if summaries: 

234 summary = CollectionSummary.union(*summaries) 

235 for dimensionName in allGovernorDimensions.names - self._kwargs.keys(): 

236 values: Set[str] = summary.governors.get(dimensionName, frozenset()) 

237 if len(values) == 1: 

238 (value,) = values 

239 self._kwargs[dimensionName] = value 

240 self.dataId = registry.expandDataId(self._kwargs, withDefaults=False) 

241 

242 self._finished = True 

243 

244 collections: Sequence[str] 

245 """The collections to search by default, in order 

246 (`~collections.abc.Sequence` [ `str` ]). 

247 """ 

248 

249 run: str | None 

250 """Name of the run this butler writes outputs to by default (`str` or 

251 `None`). 

252 """ 

253 

254 dataId: DataCoordinate 

255 """The default data ID (`DataCoordinate`). 

256 

257 Dimensions without defaults are simply not included. Only governor 

258 dimensions are ever included in defaults. 

259 

260 This attribute may not be accessed before the defaults struct is 

261 attached to a `Registry` instance. It always satisfies ``hasFull``. 

262 """