Coverage for python/lsst/pipe/base/_dataset_handle.py: 20%

88 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-17 02:45 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["InMemoryDatasetHandle"] 

24 

25import dataclasses 

26from typing import Any, Optional 

27 

28from frozendict import frozendict 

29from lsst.daf.butler import ( 

30 DataCoordinate, 

31 DataId, 

32 DimensionUniverse, 

33 StorageClass, 

34 StorageClassDelegate, 

35 StorageClassFactory, 

36) 

37 

38 

39# Use an empty dataID as a default. 

40def _default_dataId() -> DataCoordinate: 

41 return DataCoordinate.makeEmpty(DimensionUniverse()) 

42 

43 

44@dataclasses.dataclass(frozen=True, init=False) 

45class InMemoryDatasetHandle: 

46 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`. 

47 

48 If ``dataId`` is not specified, a default empty dataId will be constructed. 

49 If ``kwargs`` are provided without specifying a ``dataId``, those 

50 parameters will be converted into a dataId-like entity. 

51 """ 

52 

53 _empty = DataCoordinate.makeEmpty(DimensionUniverse()) 

54 

55 def __init__( 

56 self, 

57 inMemoryDataset: Any, 

58 *, 

59 storageClass: StorageClass | None = None, 

60 parameters: dict[str, Any] | None = None, 

61 dataId: DataId | None = None, 

62 copy: bool = False, 

63 **kwargs: Any, 

64 ): 

65 object.__setattr__(self, "inMemoryDataset", inMemoryDataset) 

66 object.__setattr__(self, "storageClass", storageClass) 

67 object.__setattr__(self, "parameters", parameters) 

68 object.__setattr__(self, "copy", copy) 

69 # Need to be able to construct a dataId from kwargs for convenience. 

70 # This will not be a full DataCoordinate. 

71 if dataId is None: 

72 if kwargs: 

73 dataId = frozendict(kwargs) 

74 else: 

75 dataId = self._empty 

76 elif kwargs: 

77 if isinstance(dataId, DataCoordinate): 

78 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe) 

79 else: 

80 new = dict(dataId) 

81 new.update(kwargs) 

82 dataId = frozendict(new) 

83 object.__setattr__(self, "dataId", dataId) 

84 

85 def get( 

86 self, 

87 *, 

88 component: Optional[str] = None, 

89 parameters: Optional[dict] = None, 

90 storageClass: str | StorageClass | None = None, 

91 ) -> Any: 

92 """Retrieves the dataset pointed to by this handle 

93 

94 This handle may be used multiple times, possibly with different 

95 parameters. 

96 

97 Parameters 

98 ---------- 

99 component : `str` or None 

100 If the deferred object is a component dataset type, this parameter 

101 may specify the name of the component to use in the get operation. 

102 parameters : `dict` or None 

103 The parameters argument will be passed to the butler get method. 

104 It defaults to None. If the value is not None, this dict will 

105 be merged with the parameters dict used to construct the 

106 `DeferredDatasetHandle` class. 

107 storageClass : `StorageClass` or `str`, optional 

108 The storage class to be used to override the Python type 

109 returned by this method. By default the returned type matches 

110 the type stored. Specifying a read `StorageClass` can force a 

111 different type to be returned. 

112 This type must be compatible with the original type. 

113 

114 Returns 

115 ------- 

116 return : `object` 

117 The dataset pointed to by this handle. Whether this returns the 

118 original object or a copy is controlled by the ``copy`` property 

119 of the handle that is set at handle construction time. 

120 If the stored object is `None` this method always returns `None` 

121 regardless of any component request or parameters. 

122 

123 Raises 

124 ------ 

125 KeyError 

126 Raised if a component or parameters are used but no storage 

127 class can be found. 

128 """ 

129 if self.inMemoryDataset is None: 

130 return None 

131 

132 if self.parameters is not None: 

133 mergedParameters = self.parameters.copy() 

134 if parameters is not None: 

135 mergedParameters.update(parameters) 

136 elif parameters is not None: 

137 mergedParameters = parameters 

138 else: 

139 mergedParameters = {} 

140 

141 returnStorageClass: StorageClass | None = None 

142 if storageClass: 

143 if isinstance(storageClass, str): 

144 factory = StorageClassFactory() 

145 returnStorageClass = factory.getStorageClass(storageClass) 

146 else: 

147 returnStorageClass = storageClass 

148 

149 inMemoryDataset = self.inMemoryDataset 

150 

151 if self.copy: 

152 # An optimization might be to defer copying until any components 

153 # and parameters have been applied. This can be a problem since 

154 # most component storage classes do not bother to define a 

155 # storage class delegate and the default delegate uses deepcopy() 

156 # which can fail if explicit support for deepcopy() is missing 

157 # or pickle does not work. 

158 # Copying will require a storage class be determined, which is 

159 # not normally required for the default case of no parameters and 

160 # no components. 

161 thisStorageClass = self._getStorageClass() 

162 try: 

163 delegate = thisStorageClass.delegate() 

164 except TypeError: 

165 # Try the default copy options if no delegate is available. 

166 delegate = StorageClassDelegate(thisStorageClass) 

167 

168 inMemoryDataset = delegate.copy(inMemoryDataset) 

169 

170 if component or mergedParameters: 

171 # This requires a storage class look up to locate the delegate 

172 # class. 

173 thisStorageClass = self._getStorageClass() 

174 

175 # Parameters for derived components are applied against the 

176 # composite. 

177 if component in thisStorageClass.derivedComponents: 

178 thisStorageClass.validateParameters(parameters) 

179 

180 # Process the parameters (hoping this never modified the 

181 # original object). 

182 inMemoryDataset = thisStorageClass.delegate().handleParameters( 

183 inMemoryDataset, mergedParameters 

184 ) 

185 mergedParameters = {} # They have now been used 

186 

187 readStorageClass = thisStorageClass.derivedComponents[component] 

188 else: 

189 if component: 

190 readStorageClass = thisStorageClass.components[component] 

191 else: 

192 readStorageClass = thisStorageClass 

193 readStorageClass.validateParameters(mergedParameters) 

194 

195 if component: 

196 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component) 

197 

198 if mergedParameters: 

199 inMemoryDataset = readStorageClass.delegate().handleParameters( 

200 inMemoryDataset, mergedParameters 

201 ) 

202 

203 if returnStorageClass: 

204 return returnStorageClass.coerce_type(inMemoryDataset) 

205 return inMemoryDataset 

206 else: 

207 # If there are no parameters or component requests the object 

208 # can be returned as is, but possibly with conversion. 

209 if returnStorageClass: 

210 return returnStorageClass.coerce_type(inMemoryDataset) 

211 return inMemoryDataset 

212 

213 def _getStorageClass(self) -> StorageClass: 

214 """Return the relevant storage class. 

215 

216 Returns 

217 ------- 

218 storageClass : `StorageClass` 

219 The storage class associated with this handle, or one derived 

220 from the python type of the stored object. 

221 

222 Raises 

223 ------ 

224 KeyError 

225 Raised if the storage class could not be found. 

226 """ 

227 factory = StorageClassFactory() 

228 if self.storageClass: 

229 return factory.getStorageClass(self.storageClass) 

230 

231 # Need to match python type. 

232 pytype = type(self.inMemoryDataset) 

233 return factory.findStorageClass(pytype) 

234 

235 inMemoryDataset: Any 

236 """The object to store in this dataset handle for later retrieval. 

237 """ 

238 

239 dataId: DataCoordinate | frozendict 

240 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset 

241 handle. 

242 """ 

243 

244 storageClass: Optional[str] = None 

245 """The name of the `~lsst.daf.butler.StorageClass` associated with this 

246 dataset. 

247 

248 If `None`, the storage class will be looked up from the factory. 

249 """ 

250 

251 parameters: Optional[dict] = None 

252 """Optional parameters that may be used to specify a subset of the dataset 

253 to be loaded (`dict` or `None`). 

254 """ 

255 

256 copy: bool = False 

257 """Control whether a copy of the in-memory dataset is returned for every 

258 call to get()."""