Coverage for python/lsst/pipe/base/_dataset_handle.py: 19%

91 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-05 10:02 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["InMemoryDatasetHandle"] 

30 

31import dataclasses 

32from typing import Any, cast 

33 

34from frozendict import frozendict 

35from lsst.daf.butler import ( 

36 DataCoordinate, 

37 DataId, 

38 DimensionUniverse, 

39 StorageClass, 

40 StorageClassDelegate, 

41 StorageClassFactory, 

42) 

43 

44 

45# Use an empty dataID as a default. 

46def _default_dataId() -> DataCoordinate: 

47 return DataCoordinate.make_empty(DimensionUniverse()) 

48 

49 

50@dataclasses.dataclass(frozen=True, init=False) 

51class InMemoryDatasetHandle: 

52 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`. 

53 

54 Parameters 

55 ---------- 

56 inMemoryDataset : `~typing.Any` 

57 The dataset to be used by this handle. 

58 storageClass : `~lsst.daf.butler.StorageClass` or `None`, optional 

59 The storage class associated with the in-memory dataset. If `None` 

60 and if a storage class is needed, an attempt will be made to work one 

61 out from the underlying python type. 

62 parameters : `dict` [`str`, `~typing.Any`] 

63 Parameters to be used with `get`. 

64 dataId : `~lsst.daf.butler.DataId` or `None`, optional 

65 The dataId associated with this dataset. Only used for compatibility 

66 with the Butler implementation. Can be used for logging messages 

67 by calling code. If ``dataId`` is not specified, a default empty 

68 dataId will be constructed. 

69 copy : `bool`, optional 

70 Whether to copy on `get` or not. 

71 **kwargs : `~typing.Any` 

72 If ``kwargs`` are provided without specifying a ``dataId``, those 

73 parameters will be converted into a dataId-like entity. 

74 """ 

75 

76 _empty = DataCoordinate.make_empty(DimensionUniverse()) 

77 

78 def __init__( 

79 self, 

80 inMemoryDataset: Any, 

81 *, 

82 storageClass: StorageClass | str | None = None, 

83 parameters: dict[str, Any] | None = None, 

84 dataId: DataId | None = None, 

85 copy: bool = False, 

86 **kwargs: Any, 

87 ): 

88 object.__setattr__(self, "inMemoryDataset", inMemoryDataset) 

89 object.__setattr__(self, "storageClass", storageClass) 

90 object.__setattr__(self, "parameters", parameters) 

91 object.__setattr__(self, "copy", copy) 

92 # Need to be able to construct a dataId from kwargs for convenience. 

93 # This will not be a full DataCoordinate. 

94 if dataId is None: 

95 if kwargs: 

96 dataId = frozendict(kwargs) 

97 else: 

98 dataId = self._empty 

99 elif kwargs: 

100 if isinstance(dataId, DataCoordinate): 

101 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe) 

102 else: 

103 new = dict(dataId) 

104 new.update(kwargs) 

105 dataId = frozendict(new) 

106 object.__setattr__(self, "dataId", dataId) 

107 

108 def get( 

109 self, 

110 *, 

111 component: str | None = None, 

112 parameters: dict | None = None, 

113 storageClass: str | StorageClass | None = None, 

114 ) -> Any: 

115 """Retrieve the dataset pointed to by this handle. 

116 

117 This handle may be used multiple times, possibly with different 

118 parameters. 

119 

120 Parameters 

121 ---------- 

122 component : `str` or None 

123 If the deferred object is a component dataset type, this parameter 

124 may specify the name of the component to use in the get operation. 

125 parameters : `dict` or None 

126 The parameters argument will be passed to the butler get method. 

127 It defaults to `None`. If the value is not `None`, this `dict` will 

128 be merged with the parameters dict used to construct the 

129 `~lsst.daf.butler.DeferredDatasetHandle` class. 

130 storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional 

131 The storage class to be used to override the Python type 

132 returned by this method. By default the returned type matches 

133 the type stored. Specifying a read `~lsst.daf.butler.StorageClass` 

134 can force a different type to be returned. 

135 This type must be compatible with the original type. 

136 

137 Returns 

138 ------- 

139 return : `object` 

140 The dataset pointed to by this handle. Whether this returns the 

141 original object or a copy is controlled by the ``copy`` property 

142 of the handle that is set at handle construction time. 

143 If the stored object is `None` this method always returns `None` 

144 regardless of any component request or parameters. 

145 

146 Raises 

147 ------ 

148 KeyError 

149 Raised if a component or parameters are used but no storage 

150 class can be found. 

151 """ 

152 if self.inMemoryDataset is None: 

153 return None 

154 

155 if self.parameters is not None: 

156 mergedParameters = self.parameters.copy() 

157 if parameters is not None: 

158 mergedParameters.update(parameters) 

159 elif parameters is not None: 

160 mergedParameters = parameters 

161 else: 

162 mergedParameters = {} 

163 

164 returnStorageClass: StorageClass | None = None 

165 if storageClass: 

166 if isinstance(storageClass, str): 

167 factory = StorageClassFactory() 

168 returnStorageClass = factory.getStorageClass(storageClass) 

169 else: 

170 returnStorageClass = storageClass 

171 

172 inMemoryDataset = self.inMemoryDataset 

173 

174 if self.copy: 

175 # An optimization might be to defer copying until any components 

176 # and parameters have been applied. This can be a problem since 

177 # most component storage classes do not bother to define a 

178 # storage class delegate and the default delegate uses deepcopy() 

179 # which can fail if explicit support for deepcopy() is missing 

180 # or pickle does not work. 

181 # Copying will require a storage class be determined, which is 

182 # not normally required for the default case of no parameters and 

183 # no components. 

184 thisStorageClass = self._getStorageClass() 

185 try: 

186 delegate = thisStorageClass.delegate() 

187 except TypeError: 

188 # Try the default copy options if no delegate is available. 

189 delegate = StorageClassDelegate(thisStorageClass) 

190 

191 inMemoryDataset = delegate.copy(inMemoryDataset) 

192 

193 if component or mergedParameters: 

194 # This requires a storage class look up to locate the delegate 

195 # class. 

196 thisStorageClass = self._getStorageClass() 

197 

198 # Parameters for derived components are applied against the 

199 # composite. 

200 if component in thisStorageClass.derivedComponents: 

201 # For some reason MyPy doesn't see the line above as narrowing 

202 # 'component' from 'str | None' to 'str'. 

203 component = cast(str, component) 

204 thisStorageClass.validateParameters(parameters) 

205 

206 # Process the parameters (hoping this never modified the 

207 # original object). 

208 inMemoryDataset = thisStorageClass.delegate().handleParameters( 

209 inMemoryDataset, mergedParameters 

210 ) 

211 mergedParameters = {} # They have now been used 

212 

213 readStorageClass = thisStorageClass.derivedComponents[component] 

214 else: 

215 if component: 

216 readStorageClass = thisStorageClass.components[component] 

217 else: 

218 readStorageClass = thisStorageClass 

219 readStorageClass.validateParameters(mergedParameters) 

220 

221 if component: 

222 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component) 

223 

224 if mergedParameters: 

225 inMemoryDataset = readStorageClass.delegate().handleParameters( 

226 inMemoryDataset, mergedParameters 

227 ) 

228 

229 if returnStorageClass: 

230 return returnStorageClass.coerce_type(inMemoryDataset) 

231 return inMemoryDataset 

232 else: 

233 # If there are no parameters or component requests the object 

234 # can be returned as is, but possibly with conversion. 

235 if returnStorageClass: 

236 return returnStorageClass.coerce_type(inMemoryDataset) 

237 return inMemoryDataset 

238 

239 def _getStorageClass(self) -> StorageClass: 

240 """Return the relevant storage class. 

241 

242 Returns 

243 ------- 

244 storageClass : `~lsst.daf.butler.StorageClass` 

245 The storage class associated with this handle, or one derived 

246 from the python type of the stored object. 

247 

248 Raises 

249 ------ 

250 KeyError 

251 Raised if the storage class could not be found. 

252 """ 

253 factory = StorageClassFactory() 

254 if self.storageClass: 

255 if isinstance(self.storageClass, str): 

256 return factory.getStorageClass(self.storageClass) 

257 else: 

258 return self.storageClass 

259 

260 # Need to match python type. 

261 pytype = type(self.inMemoryDataset) 

262 return factory.findStorageClass(pytype) 

263 

264 inMemoryDataset: Any 

265 """The object to store in this dataset handle for later retrieval. 

266 """ 

267 

268 dataId: DataCoordinate | frozendict 

269 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset 

270 handle. 

271 """ 

272 

273 storageClass: StorageClass | str | None = None 

274 """The name of the `~lsst.daf.butler.StorageClass` associated with this 

275 dataset. 

276 

277 If `None`, the storage class will be looked up from the factory. 

278 """ 

279 

280 parameters: dict | None = None 

281 """Optional parameters that may be used to specify a subset of the dataset 

282 to be loaded (`dict` or `None`). 

283 """ 

284 

285 copy: bool = False 

286 """Control whether a copy of the in-memory dataset is returned for every 

287 call to `get()`."""