Coverage for python / lsst / daf / butler / _storage_class_delegate.py: 19%

113 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 08:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for reading and writing composite objects.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("DatasetComponent", "StorageClassDelegate") 

33 

34import copy 

35import logging 

36from collections.abc import Iterable, Mapping 

37from dataclasses import dataclass 

38from typing import TYPE_CHECKING, Any 

39 

40from lsst.utils.introspection import get_full_type_name 

41 

42if TYPE_CHECKING: 

43 from lsst.daf.butler import DatasetProvenance, DatasetRef 

44 

45 from ._storage_class import StorageClass 

46 

47log = logging.getLogger(__name__) 

48 

49 

50@dataclass 

51class DatasetComponent: 

52 """Component of a dataset and associated information.""" 

53 

54 name: str 

55 """Name of the component. 

56 """ 

57 

58 storageClass: StorageClass 

59 """StorageClass to be used when reading or writing this component. 

60 """ 

61 

62 component: Any 

63 """Component extracted from the composite object. 

64 """ 

65 

66 

67class StorageClassDelegate: 

68 """Delegate class for StorageClass components and parameters. 

69 

70 This class delegates the handling of components and parameters for the 

71 python type associated with a particular `StorageClass`. 

72 

73 A delegate is required for any storage class that defines components 

74 (derived or otherwise) or support read parameters. It is used for 

75 composite disassembly and assembly. 

76 

77 Attributes 

78 ---------- 

79 storageClass : `StorageClass` 

80 

81 Parameters 

82 ---------- 

83 storageClass : `StorageClass` 

84 `StorageClass` to be used with this delegate. 

85 """ 

86 

87 def __init__(self, storageClass: StorageClass): 

88 assert storageClass is not None 

89 self.storageClass = storageClass 

90 

91 def can_accept(self, inMemoryDataset: Any) -> bool: 

92 """Indicate whether this delegate can accept the specified 

93 storage class directly. 

94 

95 Parameters 

96 ---------- 

97 inMemoryDataset : `object` 

98 The dataset that is to be stored. 

99 

100 Returns 

101 ------- 

102 accepts : `bool` 

103 If `True` the delegate can handle data of this type without 

104 requiring datastore to convert it. If `False` the datastore 

105 will attempt to convert before storage. 

106 

107 Notes 

108 ----- 

109 The base class always returns `False` even if the given type is an 

110 instance of the delegate type. This will result in a storage class 

111 conversion no-op but also allows mocks with mocked storage classes 

112 to work properly. 

113 """ 

114 return False 

115 

116 @staticmethod 

117 def _attrNames(componentName: str, getter: bool = True) -> tuple[str, ...]: 

118 """Return list of suitable attribute names to attempt to use. 

119 

120 Parameters 

121 ---------- 

122 componentName : `str` 

123 Name of component/attribute to look for. 

124 getter : `bool` 

125 If true, return getters, else return setters. 

126 

127 Returns 

128 ------- 

129 attrs : `tuple(str)` 

130 Tuple of strings to attempt. 

131 """ 

132 root = "get" if getter else "set" 

133 

134 # Capitalized name for getXxx must only capitalize first letter and not 

135 # downcase the rest. getVisitInfo and not getVisitinfo 

136 first = componentName[0].upper() 

137 if len(componentName) > 1: 

138 tail = componentName[1:] 

139 else: 

140 tail = "" 

141 capitalized = f"{root}{first}{tail}" 

142 return (componentName, f"{root}_{componentName}", capitalized) 

143 

144 def assemble(self, components: dict[str, Any], pytype: type | None = None) -> Any: 

145 """Construct an object from components based on storageClass. 

146 

147 This generic implementation assumes that instances of objects 

148 can be created either by passing all the components to a constructor 

149 or by calling setter methods with the name. 

150 

151 Parameters 

152 ---------- 

153 components : `dict` 

154 Collection of components from which to assemble a new composite 

155 object. Keys correspond to composite names in the `StorageClass`. 

156 pytype : `type`, optional 

157 Override the type from the 

158 :attr:`StorageClassDelegate.storageClass` 

159 to use when assembling the final object. 

160 

161 Returns 

162 ------- 

163 composite : `object` 

164 New composite object assembled from components. 

165 

166 Raises 

167 ------ 

168 ValueError 

169 Some components could not be used to create the object or, 

170 alternatively, some components were not defined in the associated 

171 StorageClass. 

172 """ 

173 if pytype is not None: 

174 cls = pytype 

175 else: 

176 cls = self.storageClass.pytype 

177 

178 # Check that the storage class components are consistent 

179 understood = set(self.storageClass.components) 

180 requested = set(components.keys()) 

181 unknown = requested - understood 

182 if unknown: 

183 raise ValueError(f"Requested component(s) not known to StorageClass: {unknown}") 

184 

185 # First try to create an instance directly using keyword args 

186 try: 

187 obj = cls(**components) 

188 except TypeError: 

189 obj = None 

190 

191 # Now try to use setters if direct instantiation didn't work 

192 if not obj: 

193 obj = cls() 

194 

195 failed = [] 

196 for name, component in components.items(): 

197 if component is None: 

198 continue 

199 for attr in self._attrNames(name, getter=False): 

200 if hasattr(obj, attr): 

201 if attr == name: # Real attribute 

202 setattr(obj, attr, component) 

203 else: 

204 setter = getattr(obj, attr) 

205 setter(component) 

206 break 

207 else: 

208 failed.append(name) 

209 

210 if failed: 

211 raise ValueError(f"Unhandled components during assembly ({failed})") 

212 

213 return obj 

214 

215 def getComponent(self, composite: Any, componentName: str) -> Any: 

216 """Attempt to retrieve component from composite object by heuristic. 

217 

218 Will attempt a direct attribute retrieval, or else getter methods of 

219 the form "get_componentName" and "getComponentName". 

220 

221 Parameters 

222 ---------- 

223 composite : `object` 

224 Item to query for the component. 

225 componentName : `str` 

226 Name of component to retrieve. 

227 

228 Returns 

229 ------- 

230 component : `object` 

231 Component extracted from composite. 

232 

233 Raises 

234 ------ 

235 AttributeError 

236 The attribute could not be read from the composite. 

237 """ 

238 component = None 

239 

240 if hasattr(composite, "__contains__") and componentName in composite: 

241 component = composite[componentName] 

242 return component 

243 

244 for attr in self._attrNames(componentName, getter=True): 

245 if hasattr(composite, attr): 

246 component = getattr(composite, attr) 

247 if attr != componentName: # We have a method 

248 component = component() 

249 break 

250 else: 

251 raise AttributeError(f"Unable to get component {componentName}") 

252 return component 

253 

254 def disassemble( 

255 self, composite: Any, subset: Iterable | None = None, override: Any | None = None 

256 ) -> dict[str, DatasetComponent]: 

257 """Disassembler a composite. 

258 

259 This is a generic implementation of a disassembler. 

260 This implementation attempts to extract components from the parent 

261 by looking for attributes of the same name or getter methods derived 

262 from the component name. 

263 

264 Parameters 

265 ---------- 

266 composite : `object` 

267 Parent composite object consisting of components to be extracted. 

268 subset : `~collections.abc.Iterable`, optional 

269 Iterable containing subset of components to extract from composite. 

270 Must be a subset of those defined in 

271 `StorageClassDelegate.storageClass`. 

272 override : `object`, optional 

273 Object to use for disassembly instead of parent. This can be useful 

274 when called from subclasses that have composites in a hierarchy. 

275 

276 Returns 

277 ------- 

278 components : `dict` 

279 `dict` with keys matching the components defined in 

280 `StorageClassDelegate.storageClass` 

281 and values being `DatasetComponent` instances describing the 

282 component. 

283 

284 Raises 

285 ------ 

286 ValueError 

287 A requested component can not be found in the parent using generic 

288 lookups. 

289 TypeError 

290 The parent object does not match the supplied 

291 `StorageClassDelegate.storageClass`. 

292 """ 

293 if not self.storageClass.isComposite(): 

294 raise TypeError( 

295 f"Can not disassemble something that is not a composite (storage class={self.storageClass})" 

296 ) 

297 

298 if not self.storageClass.validateInstance(composite): 

299 raise TypeError( 

300 "Unexpected type mismatch between parent and StorageClass " 

301 f"({type(composite)} != {self.storageClass.pytype})" 

302 ) 

303 

304 requested = set(self.storageClass.components) 

305 

306 if subset is not None: 

307 subset = set(subset) 

308 diff = subset - requested 

309 if diff: 

310 raise ValueError(f"Requested subset is not a subset of supported components: {diff}") 

311 requested = subset 

312 

313 if override is not None: 

314 composite = override 

315 

316 components = {} 

317 for c in list(requested): 

318 # Try three different ways to get a value associated with the 

319 # component name. 

320 try: 

321 component = self.getComponent(composite, c) 

322 except AttributeError: 

323 # Defer complaining so we get an idea of how many problems 

324 # we have 

325 pass 

326 else: 

327 # If we found a match store it in the results dict and remove 

328 # it from the list of components we are still looking for. 

329 if component is not None: 

330 components[c] = DatasetComponent(c, self.storageClass.components[c], component) 

331 requested.remove(c) 

332 

333 if requested: 

334 raise ValueError(f"Unhandled components during disassembly ({requested})") 

335 

336 return components 

337 

338 def add_provenance( 

339 self, inMemoryDataset: Any, ref: DatasetRef, provenance: DatasetProvenance | None = None 

340 ) -> Any: 

341 """Add provenance to the composite dataset. 

342 

343 Parameters 

344 ---------- 

345 inMemoryDataset : `object` 

346 The composite dataset to serialize. 

347 ref : `DatasetRef` 

348 The dataset associated with this in-memory dataset. 

349 provenance : `DatasetProvenance` or `None`, optional 

350 Any provenance that should be attached to the serialized dataset. 

351 Can be ignored by a delegate. 

352 

353 Returns 

354 ------- 

355 dataset_to_disassemble : `object` 

356 The dataset to use for serialization and disassembly. 

357 Can be the same object as given. 

358 

359 Notes 

360 ----- 

361 The base class implementation returns the given object unchanged. 

362 """ 

363 return inMemoryDataset 

364 

365 def handleParameters(self, inMemoryDataset: Any, parameters: Mapping[str, Any] | None = None) -> Any: 

366 """Modify the in-memory dataset using the supplied parameters. 

367 

368 Can return a possibly new object. 

369 

370 For safety, if any parameters are given to this method an 

371 exception will be raised. This is to protect the user from 

372 thinking that parameters have been applied when they have not been 

373 applied. 

374 

375 Parameters 

376 ---------- 

377 inMemoryDataset : `object` 

378 Object to modify based on the parameters. 

379 parameters : `dict` 

380 Parameters to apply. Values are specific to the parameter. 

381 Supported parameters are defined in the associated 

382 `StorageClass`. If no relevant parameters are specified the 

383 inMemoryDataset will be return unchanged. 

384 

385 Returns 

386 ------- 

387 inMemoryDataset : `object` 

388 Updated form of supplied in-memory dataset, after parameters 

389 have been used. 

390 

391 Raises 

392 ------ 

393 ValueError 

394 Parameters have been provided to this default implementation. 

395 """ 

396 if parameters: 

397 raise ValueError(f"Parameters ({parameters}) provided to default implementation.") 

398 

399 return inMemoryDataset 

400 

401 @classmethod 

402 def selectResponsibleComponent(cls, derivedComponent: str, fromComponents: set[str | None]) -> str: 

403 """Select the best component for calculating a derived component. 

404 

405 Given a possible set of components to choose from, return the 

406 component that should be used to calculate the requested derived 

407 component. 

408 

409 Parameters 

410 ---------- 

411 derivedComponent : `str` 

412 The derived component that is being requested. 

413 fromComponents : `set` of `str` 

414 The available set of component options from which that derived 

415 component can be computed. `None` can be included but should 

416 be ignored. 

417 

418 Returns 

419 ------- 

420 required : `str` 

421 The component that should be used. 

422 

423 Raises 

424 ------ 

425 NotImplementedError 

426 Raised if this delegate refuses to answer the question. 

427 ValueError 

428 Raised if this delegate can not determine a relevant component 

429 from the supplied options. 

430 """ 

431 raise NotImplementedError("This delegate does not support derived components") 

432 

433 def copy(self, inMemoryDataset: Any) -> Any: 

434 """Copy the supplied python type and return the copy. 

435 

436 Parameters 

437 ---------- 

438 inMemoryDataset : `object` 

439 Object to copy. 

440 

441 Returns 

442 ------- 

443 copied : `object` 

444 A copy of the supplied object. Can be the same object if the 

445 object is known to be read-only. 

446 

447 Raises 

448 ------ 

449 NotImplementedError 

450 Raised if none of the default methods for copying work. 

451 

452 Notes 

453 ----- 

454 The default implementation uses `copy.deepcopy`. 

455 It is generally expected that this method is the equivalent of a deep 

456 copy. Subclasses can override this method if they already know the 

457 optimal approach for deep copying. 

458 """ 

459 try: 

460 return copy.deepcopy(inMemoryDataset) 

461 except Exception as e: 

462 raise NotImplementedError( 

463 f"Unable to deep copy the supplied python type ({get_full_type_name(inMemoryDataset)}) " 

464 f"using default methods ({e})" 

465 ) from e