Coverage for python/lsst/pipe/base/_dataset_handle.py: 19%

89 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-23 10:31 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["InMemoryDatasetHandle"] 

24 

25import dataclasses 

26from typing import Any, cast 

27 

28from frozendict import frozendict 

29from lsst.daf.butler import ( 

30 DataCoordinate, 

31 DataId, 

32 DimensionUniverse, 

33 StorageClass, 

34 StorageClassDelegate, 

35 StorageClassFactory, 

36) 

37 

38 

39# Use an empty dataID as a default. 

40def _default_dataId() -> DataCoordinate: 

41 return DataCoordinate.makeEmpty(DimensionUniverse()) 

42 

43 

44@dataclasses.dataclass(frozen=True, init=False) 

45class InMemoryDatasetHandle: 

46 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`. 

47 

48 If ``dataId`` is not specified, a default empty dataId will be constructed. 

49 If ``kwargs`` are provided without specifying a ``dataId``, those 

50 parameters will be converted into a dataId-like entity. 

51 """ 

52 

53 _empty = DataCoordinate.makeEmpty(DimensionUniverse()) 

54 

55 def __init__( 

56 self, 

57 inMemoryDataset: Any, 

58 *, 

59 storageClass: StorageClass | None = None, 

60 parameters: dict[str, Any] | None = None, 

61 dataId: DataId | None = None, 

62 copy: bool = False, 

63 **kwargs: Any, 

64 ): 

65 object.__setattr__(self, "inMemoryDataset", inMemoryDataset) 

66 object.__setattr__(self, "storageClass", storageClass) 

67 object.__setattr__(self, "parameters", parameters) 

68 object.__setattr__(self, "copy", copy) 

69 # Need to be able to construct a dataId from kwargs for convenience. 

70 # This will not be a full DataCoordinate. 

71 if dataId is None: 

72 if kwargs: 

73 dataId = frozendict(kwargs) 

74 else: 

75 dataId = self._empty 

76 elif kwargs: 

77 if isinstance(dataId, DataCoordinate): 

78 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe) 

79 else: 

80 new = dict(dataId) 

81 new.update(kwargs) 

82 dataId = frozendict(new) 

83 object.__setattr__(self, "dataId", dataId) 

84 

85 def get( 

86 self, 

87 *, 

88 component: str | None = None, 

89 parameters: dict | None = None, 

90 storageClass: str | StorageClass | None = None, 

91 ) -> Any: 

92 """Retrieve the dataset pointed to by this handle. 

93 

94 This handle may be used multiple times, possibly with different 

95 parameters. 

96 

97 Parameters 

98 ---------- 

99 component : `str` or None 

100 If the deferred object is a component dataset type, this parameter 

101 may specify the name of the component to use in the get operation. 

102 parameters : `dict` or None 

103 The parameters argument will be passed to the butler get method. 

104 It defaults to `None`. If the value is not `None`, this `dict` will 

105 be merged with the parameters dict used to construct the 

106 `~lsst.daf.butler.DeferredDatasetHandle` class. 

107 storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional 

108 The storage class to be used to override the Python type 

109 returned by this method. By default the returned type matches 

110 the type stored. Specifying a read `~lsst.daf.butler.StorageClass` 

111 can force a different type to be returned. 

112 This type must be compatible with the original type. 

113 

114 Returns 

115 ------- 

116 return : `object` 

117 The dataset pointed to by this handle. Whether this returns the 

118 original object or a copy is controlled by the ``copy`` property 

119 of the handle that is set at handle construction time. 

120 If the stored object is `None` this method always returns `None` 

121 regardless of any component request or parameters. 

122 

123 Raises 

124 ------ 

125 KeyError 

126 Raised if a component or parameters are used but no storage 

127 class can be found. 

128 """ 

129 if self.inMemoryDataset is None: 

130 return None 

131 

132 if self.parameters is not None: 

133 mergedParameters = self.parameters.copy() 

134 if parameters is not None: 

135 mergedParameters.update(parameters) 

136 elif parameters is not None: 

137 mergedParameters = parameters 

138 else: 

139 mergedParameters = {} 

140 

141 returnStorageClass: StorageClass | None = None 

142 if storageClass: 

143 if isinstance(storageClass, str): 

144 factory = StorageClassFactory() 

145 returnStorageClass = factory.getStorageClass(storageClass) 

146 else: 

147 returnStorageClass = storageClass 

148 

149 inMemoryDataset = self.inMemoryDataset 

150 

151 if self.copy: 

152 # An optimization might be to defer copying until any components 

153 # and parameters have been applied. This can be a problem since 

154 # most component storage classes do not bother to define a 

155 # storage class delegate and the default delegate uses deepcopy() 

156 # which can fail if explicit support for deepcopy() is missing 

157 # or pickle does not work. 

158 # Copying will require a storage class be determined, which is 

159 # not normally required for the default case of no parameters and 

160 # no components. 

161 thisStorageClass = self._getStorageClass() 

162 try: 

163 delegate = thisStorageClass.delegate() 

164 except TypeError: 

165 # Try the default copy options if no delegate is available. 

166 delegate = StorageClassDelegate(thisStorageClass) 

167 

168 inMemoryDataset = delegate.copy(inMemoryDataset) 

169 

170 if component or mergedParameters: 

171 # This requires a storage class look up to locate the delegate 

172 # class. 

173 thisStorageClass = self._getStorageClass() 

174 

175 # Parameters for derived components are applied against the 

176 # composite. 

177 if component in thisStorageClass.derivedComponents: 

178 # For some reason MyPy doesn't see the line above as narrowing 

179 # 'component' from 'str | None' to 'str'. 

180 component = cast(str, component) 

181 thisStorageClass.validateParameters(parameters) 

182 

183 # Process the parameters (hoping this never modified the 

184 # original object). 

185 inMemoryDataset = thisStorageClass.delegate().handleParameters( 

186 inMemoryDataset, mergedParameters 

187 ) 

188 mergedParameters = {} # They have now been used 

189 

190 readStorageClass = thisStorageClass.derivedComponents[component] 

191 else: 

192 if component: 

193 readStorageClass = thisStorageClass.components[component] 

194 else: 

195 readStorageClass = thisStorageClass 

196 readStorageClass.validateParameters(mergedParameters) 

197 

198 if component: 

199 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component) 

200 

201 if mergedParameters: 

202 inMemoryDataset = readStorageClass.delegate().handleParameters( 

203 inMemoryDataset, mergedParameters 

204 ) 

205 

206 if returnStorageClass: 

207 return returnStorageClass.coerce_type(inMemoryDataset) 

208 return inMemoryDataset 

209 else: 

210 # If there are no parameters or component requests the object 

211 # can be returned as is, but possibly with conversion. 

212 if returnStorageClass: 

213 return returnStorageClass.coerce_type(inMemoryDataset) 

214 return inMemoryDataset 

215 

216 def _getStorageClass(self) -> StorageClass: 

217 """Return the relevant storage class. 

218 

219 Returns 

220 ------- 

221 storageClass : `~lsst.daf.butler.StorageClass` 

222 The storage class associated with this handle, or one derived 

223 from the python type of the stored object. 

224 

225 Raises 

226 ------ 

227 KeyError 

228 Raised if the storage class could not be found. 

229 """ 

230 factory = StorageClassFactory() 

231 if self.storageClass: 

232 return factory.getStorageClass(self.storageClass) 

233 

234 # Need to match python type. 

235 pytype = type(self.inMemoryDataset) 

236 return factory.findStorageClass(pytype) 

237 

238 inMemoryDataset: Any 

239 """The object to store in this dataset handle for later retrieval. 

240 """ 

241 

242 dataId: DataCoordinate | frozendict 

243 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset 

244 handle. 

245 """ 

246 

247 storageClass: str | None = None 

248 """The name of the `~lsst.daf.butler.StorageClass` associated with this 

249 dataset. 

250 

251 If `None`, the storage class will be looked up from the factory. 

252 """ 

253 

254 parameters: dict | None = None 

255 """Optional parameters that may be used to specify a subset of the dataset 

256 to be loaded (`dict` or `None`). 

257 """ 

258 

259 copy: bool = False 

260 """Control whether a copy of the in-memory dataset is returned for every 

261 call to `get()`."""