Coverage for python/lsst/daf/butler/core/storageClass.py: 52%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

252 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for Storage Classes.""" 

25 

26__all__ = ("StorageClass", "StorageClassFactory", "StorageClassConfig") 

27 

28import builtins 

29import copy 

30import logging 

31 

32from typing import ( 

33 Any, 

34 Collection, 

35 Dict, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Sequence, 

41 Tuple, 

42 Type, 

43 Union, 

44) 

45 

46from lsst.utils import doImportType 

47from lsst.utils.introspection import get_full_type_name 

48from lsst.utils.classes import Singleton 

49from .storageClassDelegate import StorageClassDelegate 

50from .config import ConfigSubset, Config 

51from .configSupport import LookupKey 

52 

53log = logging.getLogger(__name__) 

54 

55 

56class StorageClassConfig(ConfigSubset): 

57 """Configuration class for defining Storage Classes.""" 

58 

59 component = "storageClasses" 

60 defaultConfigFile = "storageClasses.yaml" 

61 

62 

63class StorageClass: 

64 """Class describing how a label maps to a particular Python type. 

65 

66 Parameters 

67 ---------- 

68 name : `str` 

69 Name to use for this class. 

70 pytype : `type` or `str` 

71 Python type (or name of type) to associate with the `StorageClass` 

72 components : `dict`, optional 

73 `dict` mapping name of a component to another `StorageClass`. 

74 derivedComponents : `dict`, optional 

75 `dict` mapping name of a derived component to another `StorageClass`. 

76 parameters : `~collections.abc.Sequence` or `~collections.abc.Set` 

77 Parameters understood by this `StorageClass` that can control 

78 reading of data from datastores. 

79 delegate : `str`, optional 

80 Fully qualified name of class supporting assembly and disassembly 

81 of a `pytype` instance. 

82 """ 

83 

84 _cls_name: str = "BaseStorageClass" 

85 _cls_components: Optional[Dict[str, StorageClass]] = None 

86 _cls_derivedComponents: Optional[Dict[str, StorageClass]] = None 

87 _cls_parameters: Optional[Union[Set[str], Sequence[str]]] = None 

88 _cls_delegate: Optional[str] = None 

89 _cls_pytype: Optional[Union[Type, str]] = None 

90 defaultDelegate: Type = StorageClassDelegate 

91 defaultDelegateName: str = get_full_type_name(defaultDelegate) 

92 

93 def __init__(self, name: Optional[str] = None, 

94 pytype: Optional[Union[Type, str]] = None, 

95 components: Optional[Dict[str, StorageClass]] = None, 

96 derivedComponents: Optional[Dict[str, StorageClass]] = None, 

97 parameters: Optional[Union[Sequence, Set]] = None, 

98 delegate: Optional[str] = None): 

99 if name is None: 

100 name = self._cls_name 

101 if pytype is None: 101 ↛ 103line 101 didn't jump to line 103, because the condition on line 101 was never false

102 pytype = self._cls_pytype 

103 if components is None: 103 ↛ 105line 103 didn't jump to line 105, because the condition on line 103 was never false

104 components = self._cls_components 

105 if derivedComponents is None: 105 ↛ 107line 105 didn't jump to line 107, because the condition on line 105 was never false

106 derivedComponents = self._cls_derivedComponents 

107 if parameters is None: 107 ↛ 109line 107 didn't jump to line 109, because the condition on line 107 was never false

108 parameters = self._cls_parameters 

109 if delegate is None: 109 ↛ 111line 109 didn't jump to line 111, because the condition on line 109 was never false

110 delegate = self._cls_delegate 

111 self.name = name 

112 

113 if pytype is None: 

114 pytype = object 

115 

116 self._pytype: Optional[Type] 

117 if not isinstance(pytype, str): 

118 # Already have a type so store it and get the name 

119 self._pytypeName = get_full_type_name(pytype) 

120 self._pytype = pytype 

121 else: 

122 # Store the type name and defer loading of type 

123 self._pytypeName = pytype 

124 self._pytype = None 

125 

126 if components is not None: 

127 if len(components) == 1: 127 ↛ 128line 127 didn't jump to line 128, because the condition on line 127 was never true

128 raise ValueError(f"Composite storage class {name} is not allowed to have" 

129 f" only one component '{next(iter(components))}'." 

130 " Did you mean it to be a derived component?") 

131 self._components = components 

132 else: 

133 self._components = {} 

134 self._derivedComponents = derivedComponents if derivedComponents is not None else {} 

135 self._parameters = frozenset(parameters) if parameters is not None else frozenset() 

136 # if the delegate is not None also set it and clear the default 

137 # delegate 

138 self._delegate: Optional[Type] 

139 self._delegateClassName: Optional[str] 

140 if delegate is not None: 

141 self._delegateClassName = delegate 

142 self._delegate = None 

143 elif components is not None: 143 ↛ 146line 143 didn't jump to line 146, because the condition on line 143 was never true

144 # We set a default delegate for composites so that a class is 

145 # guaranteed to support something if it is a composite. 

146 log.debug("Setting default delegate for %s", self.name) 

147 self._delegate = self.defaultDelegate 

148 self._delegateClassName = self.defaultDelegateName 

149 else: 

150 self._delegate = None 

151 self._delegateClassName = None 

152 

153 @property 

154 def components(self) -> Dict[str, StorageClass]: 

155 """Return the components associated with this `StorageClass`.""" 

156 return self._components 

157 

158 @property 

159 def derivedComponents(self) -> Dict[str, StorageClass]: 

160 """Return derived components associated with `StorageClass`.""" 

161 return self._derivedComponents 

162 

163 @property 

164 def parameters(self) -> Set[str]: 

165 """Return `set` of names of supported parameters.""" 

166 return set(self._parameters) 

167 

168 @property 

169 def pytype(self) -> Type: 

170 """Return Python type associated with this `StorageClass`.""" 

171 if self._pytype is not None: 

172 return self._pytype 

173 

174 if hasattr(builtins, self._pytypeName): 

175 pytype = getattr(builtins, self._pytypeName) 

176 else: 

177 pytype = doImportType(self._pytypeName) 

178 self._pytype = pytype 

179 return self._pytype 

180 

181 @property 

182 def delegateClass(self) -> Optional[Type]: 

183 """Class to use to delegate type-specific actions.""" 

184 if self._delegate is not None: 

185 return self._delegate 

186 if self._delegateClassName is None: 

187 return None 

188 delegate_class = doImportType(self._delegateClassName) 

189 self._delegate = delegate_class 

190 return self._delegate 

191 

192 def allComponents(self) -> Mapping[str, StorageClass]: 

193 """Return all defined components. 

194 

195 This mapping includes all the derived and read/write components 

196 for the corresponding storage class. 

197 

198 Returns 

199 ------- 

200 comp : `dict` of [`str`, `StorageClass`] 

201 The component name to storage class mapping. 

202 """ 

203 components = copy.copy(self.components) 

204 components.update(self.derivedComponents) 

205 return components 

206 

207 def delegate(self) -> StorageClassDelegate: 

208 """Return an instance of a storage class delegate. 

209 

210 Returns 

211 ------- 

212 delegate : `StorageClassDelegate` 

213 Instance of the delegate associated with this `StorageClass`. 

214 The delegate is constructed with this `StorageClass`. 

215 

216 Raises 

217 ------ 

218 TypeError 

219 This StorageClass has no associated delegate. 

220 """ 

221 cls = self.delegateClass 

222 if cls is None: 

223 raise TypeError(f"No delegate class is associated with StorageClass {self.name}") 

224 return cls(storageClass=self) 

225 

226 def isComposite(self) -> bool: 

227 """Return Boolean indicating whether this is a composite or not. 

228 

229 Returns 

230 ------- 

231 isComposite : `bool` 

232 `True` if this `StorageClass` is a composite, `False` 

233 otherwise. 

234 """ 

235 if self.components: 

236 return True 

237 return False 

238 

239 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

240 """Keys to use when looking up this DatasetRef in a configuration. 

241 

242 The names are returned in order of priority. 

243 

244 Returns 

245 ------- 

246 names : `tuple` of `LookupKey` 

247 Tuple of a `LookupKey` using the `StorageClass` name. 

248 """ 

249 return (LookupKey(name=self.name), ) 

250 

251 def knownParameters(self) -> Set[str]: 

252 """Return set of all parameters known to this `StorageClass`. 

253 

254 The set includes parameters understood by components of a composite. 

255 

256 Returns 

257 ------- 

258 known : `set` 

259 All parameter keys of this `StorageClass` and the component 

260 storage classes. 

261 """ 

262 known = set(self._parameters) 

263 for sc in self.components.values(): 

264 known.update(sc.knownParameters()) 

265 return known 

266 

267 def validateParameters(self, parameters: Collection = None) -> None: 

268 """Check that the parameters are known to this `StorageClass`. 

269 

270 Does not check the values. 

271 

272 Parameters 

273 ---------- 

274 parameters : `~collections.abc.Collection`, optional 

275 Collection containing the parameters. Can be `dict`-like or 

276 `set`-like. The parameter values are not checked. 

277 If no parameters are supplied, always returns without error. 

278 

279 Raises 

280 ------ 

281 KeyError 

282 Some parameters are not understood by this `StorageClass`. 

283 """ 

284 # No parameters is always okay 

285 if not parameters: 

286 return 

287 

288 # Extract the important information into a set. Works for dict and 

289 # list. 

290 external = set(parameters) 

291 

292 diff = external - self.knownParameters() 

293 if diff: 

294 s = "s" if len(diff) > 1 else "" 

295 unknown = '\', \''.join(diff) 

296 raise KeyError(f"Parameter{s} '{unknown}' not understood by StorageClass {self.name}") 

297 

298 def filterParameters(self, parameters: Dict[str, Any], 

299 subset: Collection = None) -> Dict[str, Any]: 

300 """Filter out parameters that are not known to this `StorageClass`. 

301 

302 Parameters 

303 ---------- 

304 parameters : `dict`, optional 

305 Candidate parameters. Can be `None` if no parameters have 

306 been provided. 

307 subset : `~collections.abc.Collection`, optional 

308 Subset of supported parameters that the caller is interested 

309 in using. The subset must be known to the `StorageClass` 

310 if specified. If `None` the supplied parameters will all 

311 be checked, else only the keys in this set will be checked. 

312 

313 Returns 

314 ------- 

315 filtered : `dict` 

316 Valid parameters. Empty `dict` if none are suitable. 

317 

318 Raises 

319 ------ 

320 ValueError 

321 Raised if the provided subset is not a subset of the supported 

322 parameters or if it is an empty set. 

323 """ 

324 if not parameters: 

325 return {} 

326 

327 known = self.knownParameters() 

328 

329 if subset is not None: 

330 if not subset: 

331 raise ValueError("Specified a parameter subset but it was empty") 

332 subset = set(subset) 

333 if not subset.issubset(known): 

334 raise ValueError(f"Requested subset ({subset}) is not a subset of" 

335 f" known parameters ({known})") 

336 wanted = subset 

337 else: 

338 wanted = known 

339 

340 return {k: parameters[k] for k in wanted if k in parameters} 

341 

342 def validateInstance(self, instance: Any) -> bool: 

343 """Check that the supplied Python object has the expected Python type. 

344 

345 Parameters 

346 ---------- 

347 instance : `object` 

348 Object to check. 

349 

350 Returns 

351 ------- 

352 isOk : `bool` 

353 True if the supplied instance object can be handled by this 

354 `StorageClass`, False otherwise. 

355 """ 

356 return isinstance(instance, self.pytype) 

357 

358 def __eq__(self, other: Any) -> bool: 

359 """Equality checks name, pytype name, delegate name, and components.""" 

360 if not isinstance(other, StorageClass): 

361 return NotImplemented 

362 

363 if self.name != other.name: 

364 return False 

365 

366 # We must compare pytype and delegate by name since we do not want 

367 # to trigger an import of external module code here 

368 if self._delegateClassName != other._delegateClassName: 

369 return False 

370 if self._pytypeName != other._pytypeName: 

371 return False 

372 

373 # Ensure we have the same component keys in each 

374 if set(self.components.keys()) != set(other.components.keys()): 

375 return False 

376 

377 # Same parameters 

378 if self.parameters != other.parameters: 

379 return False 

380 

381 # Ensure that all the components have the same type 

382 for k in self.components: 

383 if self.components[k] != other.components[k]: 

384 return False 

385 

386 # If we got to this point everything checks out 

387 return True 

388 

389 def __hash__(self) -> int: 

390 return hash(self.name) 

391 

392 def __repr__(self) -> str: 

393 optionals: Dict[str, Any] = {} 

394 if self._pytypeName != "object": 

395 optionals["pytype"] = self._pytypeName 

396 if self._delegateClassName is not None: 

397 optionals["delegate"] = self._delegateClassName 

398 if self._parameters: 

399 optionals["parameters"] = self._parameters 

400 if self.components: 

401 optionals["components"] = self.components 

402 

403 # order is preserved in the dict 

404 options = ", ".join(f"{k}={v!r}" for k, v in optionals.items()) 

405 

406 # Start with mandatory fields 

407 r = f"{self.__class__.__name__}({self.name!r}" 

408 if options: 

409 r = r + ", " + options 

410 r = r + ")" 

411 return r 

412 

413 def __str__(self) -> str: 

414 return self.name 

415 

416 

417class StorageClassFactory(metaclass=Singleton): 

418 """Factory for `StorageClass` instances. 

419 

420 This class is a singleton, with each instance sharing the pool of 

421 StorageClasses. Since code can not know whether it is the first 

422 time the instance has been created, the constructor takes no arguments. 

423 To populate the factory with storage classes, a call to 

424 `~StorageClassFactory.addFromConfig()` should be made. 

425 

426 Parameters 

427 ---------- 

428 config : `StorageClassConfig` or `str`, optional 

429 Load configuration. In a ButlerConfig` the relevant configuration 

430 is located in the ``storageClasses`` section. 

431 """ 

432 

433 def __init__(self, config: Optional[Union[StorageClassConfig, str]] = None): 

434 self._storageClasses: Dict[str, StorageClass] = {} 

435 self._configs: List[StorageClassConfig] = [] 

436 

437 # Always seed with the default config 

438 self.addFromConfig(StorageClassConfig()) 

439 

440 if config is not None: 440 ↛ 441line 440 didn't jump to line 441, because the condition on line 440 was never true

441 self.addFromConfig(config) 

442 

443 def __str__(self) -> str: 

444 """Return summary of factory. 

445 

446 Returns 

447 ------- 

448 summary : `str` 

449 Summary of the factory status. 

450 """ 

451 sep = "\n" 

452 return f"""Number of registered StorageClasses: {len(self._storageClasses)} 

453 

454StorageClasses 

455-------------- 

456{sep.join(f"{s}: {self._storageClasses[s]}" for s in self._storageClasses)} 

457""" 

458 

459 def __contains__(self, storageClassOrName: Union[StorageClass, str]) -> bool: 

460 """Indicate whether the storage class exists in the factory. 

461 

462 Parameters 

463 ---------- 

464 storageClassOrName : `str` or `StorageClass` 

465 If `str` is given existence of the named StorageClass 

466 in the factory is checked. If `StorageClass` is given 

467 existence and equality are checked. 

468 

469 Returns 

470 ------- 

471 in : `bool` 

472 True if the supplied string is present, or if the supplied 

473 `StorageClass` is present and identical. 

474 

475 Notes 

476 ----- 

477 The two different checks (one for "key" and one for "value") based on 

478 the type of the given argument mean that it is possible for 

479 StorageClass.name to be in the factory but StorageClass to not be 

480 in the factory. 

481 """ 

482 if isinstance(storageClassOrName, str): 482 ↛ 484line 482 didn't jump to line 484, because the condition on line 482 was never false

483 return storageClassOrName in self._storageClasses 

484 elif isinstance(storageClassOrName, StorageClass): 

485 if storageClassOrName.name in self._storageClasses: 

486 return storageClassOrName == self._storageClasses[storageClassOrName.name] 

487 return False 

488 

489 def addFromConfig(self, config: Union[StorageClassConfig, Config, str]) -> None: 

490 """Add more `StorageClass` definitions from a config file. 

491 

492 Parameters 

493 ---------- 

494 config : `StorageClassConfig`, `Config` or `str` 

495 Storage class configuration. Can contain a ``storageClasses`` 

496 key if part of a global configuration. 

497 """ 

498 sconfig = StorageClassConfig(config) 

499 self._configs.append(sconfig) 

500 

501 # Since we can not assume that we will get definitions of 

502 # components or parents before their classes are defined 

503 # we have a helper function that we can call recursively 

504 # to extract definitions from the configuration. 

505 def processStorageClass(name: str, sconfig: StorageClassConfig) -> None: 

506 # Maybe we've already processed this through recursion 

507 if name not in sconfig: 

508 return 

509 info = sconfig.pop(name) 

510 

511 # Always create the storage class so we can ensure that 

512 # we are not trying to overwrite with a different definition 

513 components = None 

514 

515 # Extract scalar items from dict that are needed for 

516 # StorageClass Constructor 

517 storageClassKwargs = {k: info[k] for k in ("pytype", "delegate", "parameters") if k in info} 

518 

519 for compName in ("components", "derivedComponents"): 

520 if compName not in info: 

521 continue 

522 components = {} 

523 for cname, ctype in info[compName].items(): 

524 if ctype not in self: 

525 processStorageClass(ctype, sconfig) 

526 components[cname] = self.getStorageClass(ctype) 

527 

528 # Fill in other items 

529 storageClassKwargs[compName] = components 

530 

531 # Create the new storage class and register it 

532 baseClass = None 

533 if "inheritsFrom" in info: 

534 baseName = info["inheritsFrom"] 

535 if baseName not in self: 535 ↛ 536line 535 didn't jump to line 536, because the condition on line 535 was never true

536 processStorageClass(baseName, sconfig) 

537 baseClass = type(self.getStorageClass(baseName)) 

538 

539 newStorageClassType = self.makeNewStorageClass(name, baseClass, **storageClassKwargs) 

540 newStorageClass = newStorageClassType() 

541 self.registerStorageClass(newStorageClass) 

542 

543 for name in list(sconfig.keys()): 

544 processStorageClass(name, sconfig) 

545 

546 @staticmethod 

547 def makeNewStorageClass(name: str, 

548 baseClass: Optional[Type[StorageClass]] = StorageClass, 

549 **kwargs: Any) -> Type[StorageClass]: 

550 """Create a new Python class as a subclass of `StorageClass`. 

551 

552 Parameters 

553 ---------- 

554 name : `str` 

555 Name to use for this class. 

556 baseClass : `type`, optional 

557 Base class for this `StorageClass`. Must be either `StorageClass` 

558 or a subclass of `StorageClass`. If `None`, `StorageClass` will 

559 be used. 

560 

561 Returns 

562 ------- 

563 newtype : `type` subclass of `StorageClass` 

564 Newly created Python type. 

565 """ 

566 if baseClass is None: 

567 baseClass = StorageClass 

568 if not issubclass(baseClass, StorageClass): 568 ↛ 569line 568 didn't jump to line 569, because the condition on line 568 was never true

569 raise ValueError(f"Base class must be a StorageClass not {baseClass}") 

570 

571 # convert the arguments to use different internal names 

572 clsargs = {f"_cls_{k}": v for k, v in kwargs.items() if v is not None} 

573 clsargs["_cls_name"] = name 

574 

575 # Some container items need to merge with the base class values 

576 # so that a child can inherit but override one bit. 

577 # lists (which you get from configs) are treated as sets for this to 

578 # work consistently. 

579 for k in ("components", "parameters", "derivedComponents"): 

580 classKey = f"_cls_{k}" 

581 if classKey in clsargs: 

582 baseValue = getattr(baseClass, classKey, None) 

583 if baseValue is not None: 

584 currentValue = clsargs[classKey] 

585 if isinstance(currentValue, dict): 585 ↛ 588line 585 didn't jump to line 588, because the condition on line 585 was never false

586 newValue = baseValue.copy() 

587 else: 

588 newValue = set(baseValue) 

589 newValue.update(currentValue) 

590 clsargs[classKey] = newValue 

591 

592 # If we have parameters they should be a frozen set so that the 

593 # parameters in the class can not be modified. 

594 pk = "_cls_parameters" 

595 if pk in clsargs: 

596 clsargs[pk] = frozenset(clsargs[pk]) 

597 

598 return type(f"StorageClass{name}", (baseClass,), clsargs) 

599 

600 def getStorageClass(self, storageClassName: str) -> StorageClass: 

601 """Get a StorageClass instance associated with the supplied name. 

602 

603 Parameters 

604 ---------- 

605 storageClassName : `str` 

606 Name of the storage class to retrieve. 

607 

608 Returns 

609 ------- 

610 instance : `StorageClass` 

611 Instance of the correct `StorageClass`. 

612 

613 Raises 

614 ------ 

615 KeyError 

616 The requested storage class name is not registered. 

617 """ 

618 return self._storageClasses[storageClassName] 

619 

620 def registerStorageClass(self, storageClass: StorageClass) -> None: 

621 """Store the `StorageClass` in the factory. 

622 

623 Will be indexed by `StorageClass.name` and will return instances 

624 of the supplied `StorageClass`. 

625 

626 Parameters 

627 ---------- 

628 storageClass : `StorageClass` 

629 Type of the Python `StorageClass` to register. 

630 

631 Raises 

632 ------ 

633 ValueError 

634 If a storage class has already been registered with 

635 storageClassName and the previous definition differs. 

636 """ 

637 if storageClass.name in self._storageClasses: 637 ↛ 638line 637 didn't jump to line 638, because the condition on line 637 was never true

638 existing = self.getStorageClass(storageClass.name) 

639 if existing != storageClass: 

640 raise ValueError(f"New definition for StorageClass {storageClass.name} ({storageClass}) " 

641 f"differs from current definition ({existing})") 

642 else: 

643 self._storageClasses[storageClass.name] = storageClass 

644 

645 def _unregisterStorageClass(self, storageClassName: str) -> None: 

646 """Remove the named StorageClass from the factory. 

647 

648 Parameters 

649 ---------- 

650 storageClassName : `str` 

651 Name of storage class to remove. 

652 

653 Raises 

654 ------ 

655 KeyError 

656 The named storage class is not registered. 

657 

658 Notes 

659 ----- 

660 This method is intended to simplify testing of StorageClassFactory 

661 functionality and it is not expected to be required for normal usage. 

662 """ 

663 del self._storageClasses[storageClassName]