Coverage for python/lsst/pipe/base/_dataset_handle.py: 19%

89 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-18 10:50 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ["InMemoryDatasetHandle"] 

30 

31import dataclasses 

32from typing import Any, cast 

33 

34from frozendict import frozendict 

35from lsst.daf.butler import ( 

36 DataCoordinate, 

37 DataId, 

38 DimensionUniverse, 

39 StorageClass, 

40 StorageClassDelegate, 

41 StorageClassFactory, 

42) 

43 

44 

45# Use an empty dataID as a default. 

46def _default_dataId() -> DataCoordinate: 

47 return DataCoordinate.makeEmpty(DimensionUniverse()) 

48 

49 

50@dataclasses.dataclass(frozen=True, init=False) 

51class InMemoryDatasetHandle: 

52 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`. 

53 

54 If ``dataId`` is not specified, a default empty dataId will be constructed. 

55 If ``kwargs`` are provided without specifying a ``dataId``, those 

56 parameters will be converted into a dataId-like entity. 

57 """ 

58 

59 _empty = DataCoordinate.makeEmpty(DimensionUniverse()) 

60 

61 def __init__( 

62 self, 

63 inMemoryDataset: Any, 

64 *, 

65 storageClass: StorageClass | None = None, 

66 parameters: dict[str, Any] | None = None, 

67 dataId: DataId | None = None, 

68 copy: bool = False, 

69 **kwargs: Any, 

70 ): 

71 object.__setattr__(self, "inMemoryDataset", inMemoryDataset) 

72 object.__setattr__(self, "storageClass", storageClass) 

73 object.__setattr__(self, "parameters", parameters) 

74 object.__setattr__(self, "copy", copy) 

75 # Need to be able to construct a dataId from kwargs for convenience. 

76 # This will not be a full DataCoordinate. 

77 if dataId is None: 

78 if kwargs: 

79 dataId = frozendict(kwargs) 

80 else: 

81 dataId = self._empty 

82 elif kwargs: 

83 if isinstance(dataId, DataCoordinate): 

84 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe) 

85 else: 

86 new = dict(dataId) 

87 new.update(kwargs) 

88 dataId = frozendict(new) 

89 object.__setattr__(self, "dataId", dataId) 

90 

91 def get( 

92 self, 

93 *, 

94 component: str | None = None, 

95 parameters: dict | None = None, 

96 storageClass: str | StorageClass | None = None, 

97 ) -> Any: 

98 """Retrieve the dataset pointed to by this handle. 

99 

100 This handle may be used multiple times, possibly with different 

101 parameters. 

102 

103 Parameters 

104 ---------- 

105 component : `str` or None 

106 If the deferred object is a component dataset type, this parameter 

107 may specify the name of the component to use in the get operation. 

108 parameters : `dict` or None 

109 The parameters argument will be passed to the butler get method. 

110 It defaults to `None`. If the value is not `None`, this `dict` will 

111 be merged with the parameters dict used to construct the 

112 `~lsst.daf.butler.DeferredDatasetHandle` class. 

113 storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional 

114 The storage class to be used to override the Python type 

115 returned by this method. By default the returned type matches 

116 the type stored. Specifying a read `~lsst.daf.butler.StorageClass` 

117 can force a different type to be returned. 

118 This type must be compatible with the original type. 

119 

120 Returns 

121 ------- 

122 return : `object` 

123 The dataset pointed to by this handle. Whether this returns the 

124 original object or a copy is controlled by the ``copy`` property 

125 of the handle that is set at handle construction time. 

126 If the stored object is `None` this method always returns `None` 

127 regardless of any component request or parameters. 

128 

129 Raises 

130 ------ 

131 KeyError 

132 Raised if a component or parameters are used but no storage 

133 class can be found. 

134 """ 

135 if self.inMemoryDataset is None: 

136 return None 

137 

138 if self.parameters is not None: 

139 mergedParameters = self.parameters.copy() 

140 if parameters is not None: 

141 mergedParameters.update(parameters) 

142 elif parameters is not None: 

143 mergedParameters = parameters 

144 else: 

145 mergedParameters = {} 

146 

147 returnStorageClass: StorageClass | None = None 

148 if storageClass: 

149 if isinstance(storageClass, str): 

150 factory = StorageClassFactory() 

151 returnStorageClass = factory.getStorageClass(storageClass) 

152 else: 

153 returnStorageClass = storageClass 

154 

155 inMemoryDataset = self.inMemoryDataset 

156 

157 if self.copy: 

158 # An optimization might be to defer copying until any components 

159 # and parameters have been applied. This can be a problem since 

160 # most component storage classes do not bother to define a 

161 # storage class delegate and the default delegate uses deepcopy() 

162 # which can fail if explicit support for deepcopy() is missing 

163 # or pickle does not work. 

164 # Copying will require a storage class be determined, which is 

165 # not normally required for the default case of no parameters and 

166 # no components. 

167 thisStorageClass = self._getStorageClass() 

168 try: 

169 delegate = thisStorageClass.delegate() 

170 except TypeError: 

171 # Try the default copy options if no delegate is available. 

172 delegate = StorageClassDelegate(thisStorageClass) 

173 

174 inMemoryDataset = delegate.copy(inMemoryDataset) 

175 

176 if component or mergedParameters: 

177 # This requires a storage class look up to locate the delegate 

178 # class. 

179 thisStorageClass = self._getStorageClass() 

180 

181 # Parameters for derived components are applied against the 

182 # composite. 

183 if component in thisStorageClass.derivedComponents: 

184 # For some reason MyPy doesn't see the line above as narrowing 

185 # 'component' from 'str | None' to 'str'. 

186 component = cast(str, component) 

187 thisStorageClass.validateParameters(parameters) 

188 

189 # Process the parameters (hoping this never modified the 

190 # original object). 

191 inMemoryDataset = thisStorageClass.delegate().handleParameters( 

192 inMemoryDataset, mergedParameters 

193 ) 

194 mergedParameters = {} # They have now been used 

195 

196 readStorageClass = thisStorageClass.derivedComponents[component] 

197 else: 

198 if component: 

199 readStorageClass = thisStorageClass.components[component] 

200 else: 

201 readStorageClass = thisStorageClass 

202 readStorageClass.validateParameters(mergedParameters) 

203 

204 if component: 

205 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component) 

206 

207 if mergedParameters: 

208 inMemoryDataset = readStorageClass.delegate().handleParameters( 

209 inMemoryDataset, mergedParameters 

210 ) 

211 

212 if returnStorageClass: 

213 return returnStorageClass.coerce_type(inMemoryDataset) 

214 return inMemoryDataset 

215 else: 

216 # If there are no parameters or component requests the object 

217 # can be returned as is, but possibly with conversion. 

218 if returnStorageClass: 

219 return returnStorageClass.coerce_type(inMemoryDataset) 

220 return inMemoryDataset 

221 

222 def _getStorageClass(self) -> StorageClass: 

223 """Return the relevant storage class. 

224 

225 Returns 

226 ------- 

227 storageClass : `~lsst.daf.butler.StorageClass` 

228 The storage class associated with this handle, or one derived 

229 from the python type of the stored object. 

230 

231 Raises 

232 ------ 

233 KeyError 

234 Raised if the storage class could not be found. 

235 """ 

236 factory = StorageClassFactory() 

237 if self.storageClass: 

238 return factory.getStorageClass(self.storageClass) 

239 

240 # Need to match python type. 

241 pytype = type(self.inMemoryDataset) 

242 return factory.findStorageClass(pytype) 

243 

244 inMemoryDataset: Any 

245 """The object to store in this dataset handle for later retrieval. 

246 """ 

247 

248 dataId: DataCoordinate | frozendict 

249 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset 

250 handle. 

251 """ 

252 

253 storageClass: str | None = None 

254 """The name of the `~lsst.daf.butler.StorageClass` associated with this 

255 dataset. 

256 

257 If `None`, the storage class will be looked up from the factory. 

258 """ 

259 

260 parameters: dict | None = None 

261 """Optional parameters that may be used to specify a subset of the dataset 

262 to be loaded (`dict` or `None`). 

263 """ 

264 

265 copy: bool = False 

266 """Control whether a copy of the in-memory dataset is returned for every 

267 call to `get()`."""