Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining connection types to be used within a 

23`PipelineTaskConnections` class. 

24""" 

25 

26__all__ = ["InitInput", "InitOutput", "Input", "PrerequisiteInput", 

27 "Output", "BaseConnection"] 

28 

29import dataclasses 

30import typing 

31from typing import Callable, Iterable, Optional 

32 

33from lsst.daf.butler import ( 

34 CollectionSearch, 

35 DataCoordinate, 

36 DatasetRef, 

37 DatasetType, 

38 DimensionUniverse, 

39 Registry, 

40 StorageClass, 

41) 

42 

43 

44@dataclasses.dataclass(frozen=True) 

45class BaseConnection: 

46 """Base class used for declaring PipelineTask connections 

47 

48 Parameters 

49 ---------- 

50 name : `str` 

51 The name used to identify the dataset type 

52 storageClass : `str` 

53 The storage class used when (un)/persisting the dataset type 

54 multiple : `bool` 

55 Indicates if this connection should expect to contain multiple objects 

56 of the given dataset type 

57 """ 

58 name: str 

59 storageClass: str 

60 doc: str = "" 

61 multiple: bool = False 

62 

63 def __get__(self, inst, klass): 

64 """Descriptor method 

65 

66 This is a method used to turn a connection into a descriptor. 

67 When a connection is added to a connection class, it is a class level 

68 variable. This method makes accessing this connection, on the 

69 instance of the connection class owning this connection, return a 

70 result specialized for that instance. In the case of connections 

71 this specifically means names specified in a config instance will 

72 be visible instead of the default names for the connection. 

73 """ 

74 # If inst is None, this is being accessed by the class and not an 

75 # instance, return this connection itself 

76 if inst is None: 

77 return self 

78 # If no object cache exists, create one to track the instances this 

79 # connection has been accessed by 

80 if not hasattr(inst, '_connectionCache'): 

81 object.__setattr__(inst, '_connectionCache', {}) 

82 # Look up an existing cached instance 

83 idSelf = id(self) 

84 if idSelf in inst._connectionCache: 

85 return inst._connectionCache[idSelf] 

86 # Accumulate the parameters that define this connection 

87 params = {} 

88 for field in dataclasses.fields(self): 

89 params[field.name] = getattr(self, field.name) 

90 # Get the name override defined by the instance of the connection class 

91 params['name'] = inst._nameOverrides[self.varName] 

92 # Return a new instance of this connection specialized with the 

93 # information provided by the connection class instance 

94 return inst._connectionCache.setdefault(idSelf, self.__class__(**params)) 

95 

96 def makeDatasetType(self, universe: DimensionUniverse, 

97 parentStorageClass: Optional[StorageClass] = None): 

98 """Construct a true `DatasetType` instance with normalized dimensions. 

99 

100 Parameters 

101 ---------- 

102 universe : `lsst.daf.butler.DimensionUniverse` 

103 Set of all known dimensions to be used to normalize the dimension 

104 names specified in config. 

105 parentStorageClass : `lsst.daf.butler.StorageClass`, optional 

106 Parent storage class for component datasets; `None` otherwise. 

107 

108 Returns 

109 ------- 

110 datasetType : `DatasetType` 

111 The `DatasetType` defined by this connection. 

112 """ 

113 return DatasetType(self.name, 

114 universe.empty, 

115 self.storageClass, 

116 parentStorageClass=parentStorageClass) 

117 

118 

119@dataclasses.dataclass(frozen=True) 

120class DimensionedConnection(BaseConnection): 

121 """Class used for declaring PipelineTask connections that includes 

122 dimensions 

123 

124 Parameters 

125 ---------- 

126 name : `str` 

127 The name used to identify the dataset type 

128 storageClass : `str` 

129 The storage class used when (un)/persisting the dataset type 

130 multiple : `bool` 

131 Indicates if this connection should expect to contain multiple objects 

132 of the given dataset type 

133 dimensions : iterable of `str` 

134 The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used 

135 to identify the dataset type identified by the specified name 

136 isCalibration: `bool`, optional 

137 `True` if this dataset type may be included in CALIBRATION-type 

138 collections to associate it with a validity range, `False` (default) 

139 otherwise. 

140 """ 

141 dimensions: typing.Iterable[str] = () 

142 isCalibration: bool = False 

143 

144 def __post_init__(self): 

145 if isinstance(self.dimensions, str): 145 ↛ 146line 145 didn't jump to line 146, because the condition on line 145 was never true

146 raise TypeError("Dimensions must be iterable of dimensions, got str," 

147 "possibly omitted trailing comma") 

148 if not isinstance(self.dimensions, typing.Iterable): 148 ↛ 149line 148 didn't jump to line 149, because the condition on line 148 was never true

149 raise TypeError("Dimensions must be iterable of dimensions") 

150 

151 def makeDatasetType(self, universe: DimensionUniverse, 

152 parentStorageClass: Optional[StorageClass] = None): 

153 """Construct a true `DatasetType` instance with normalized dimensions. 

154 

155 Parameters 

156 ---------- 

157 universe : `lsst.daf.butler.DimensionUniverse` 

158 Set of all known dimensions to be used to normalize the dimension 

159 names specified in config. 

160 parentStorageClass : `lsst.daf.butler.StorageClass`, optional 

161 Parent storage class for component datasets; `None` otherwise. 

162 

163 Returns 

164 ------- 

165 datasetType : `DatasetType` 

166 The `DatasetType` defined by this connection. 

167 """ 

168 return DatasetType(self.name, 

169 universe.extract(self.dimensions), 

170 self.storageClass, isCalibration=self.isCalibration, 

171 parentStorageClass=parentStorageClass) 

172 

173 

174@dataclasses.dataclass(frozen=True) 

175class BaseInput(DimensionedConnection): 

176 """Class used for declaring PipelineTask input connections 

177 

178 Parameters 

179 ---------- 

180 name : `str` 

181 The default name used to identify the dataset type 

182 storageClass : `str` 

183 The storage class used when (un)/persisting the dataset type 

184 multiple : `bool` 

185 Indicates if this connection should expect to contain multiple objects 

186 of the given dataset type 

187 dimensions : iterable of `str` 

188 The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used 

189 to identify the dataset type identified by the specified name 

190 deferLoad : `bool` 

191 Indicates that this dataset type will be loaded as a 

192 `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this 

193 object to load the object at a later time. 

194 """ 

195 deferLoad: bool = False 

196 

197 

198@dataclasses.dataclass(frozen=True) 

199class Input(BaseInput): 

200 pass 

201 

202 

203@dataclasses.dataclass(frozen=True) 

204class PrerequisiteInput(BaseInput): 

205 """Class used for declaring PipelineTask prerequisite connections 

206 

207 Parameters 

208 ---------- 

209 name : `str` 

210 The default name used to identify the dataset type 

211 storageClass : `str` 

212 The storage class used when (un)/persisting the dataset type 

213 multiple : `bool` 

214 Indicates if this connection should expect to contain multiple objects 

215 of the given dataset type 

216 dimensions : iterable of `str` 

217 The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used 

218 to identify the dataset type identified by the specified name 

219 deferLoad : `bool` 

220 Indicates that this dataset type will be loaded as a 

221 `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this 

222 object to load the object at a later time. 

223 lookupFunction: `typing.Callable`, optional 

224 An optional callable function that will look up PrerequisiteInputs 

225 using the DatasetType, registry, quantum dataId, and input collections 

226 passed to it. If no function is specified, the default temporal spatial 

227 lookup will be used. 

228 

229 Notes 

230 ----- 

231 Prerequisite inputs are used for datasets that must exist in the data 

232 repository before a pipeline including this is run; they cannot be produced 

233 by another task in the same pipeline. 

234 

235 In exchange for this limitation, they have a number of advantages relative 

236 to regular `Input` connections: 

237 

238 - The query used to find them then during `QuantumGraph` generation can be 

239 fully customized by providing a ``lookupFunction``. 

240 - Failed searches for prerequisites during `QuantumGraph` generation will 

241 usually generate more helpful diagnostics than those for regular `Input` 

242 connections. 

243 - The default query for prerequisite inputs relates the quantum dimensions 

244 directly to the dimensions of its dataset type, without being constrained 

245 by any of the other dimensions in the pipeline. This allows them to be 

246 used for temporal calibration lookups (which regular `Input` connections 

247 cannot do at present) and to work around `QuantumGraph` generation 

248 limitations involving cases where naive spatial overlap relationships 

249 between dimensions are not desired (e.g. a task that wants all detectors 

250 in each visit for which the visit overlaps a tract, not just those where 

251 that detector+visit combination overlaps the tract). 

252 

253 """ 

254 lookupFunction: Optional[Callable[[DatasetType, Registry, DataCoordinate, CollectionSearch], 

255 Iterable[DatasetRef]]] = None 

256 

257 

258@dataclasses.dataclass(frozen=True) 

259class Output(DimensionedConnection): 

260 pass 

261 

262 

263@dataclasses.dataclass(frozen=True) 

264class InitInput(BaseConnection): 

265 pass 

266 

267 

268@dataclasses.dataclass(frozen=True) 

269class InitOutput(BaseConnection): 

270 pass