Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for Storage Classes.""" 

25 

26__all__ = ("StorageClass", "StorageClassFactory", "StorageClassConfig") 

27 

28import builtins 

29import copy 

30import logging 

31 

32from typing import ( 

33 Any, 

34 Collection, 

35 Dict, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Sequence, 

41 Tuple, 

42 Type, 

43 Union, 

44) 

45 

46from lsst.utils import doImport 

47from .utils import Singleton, getFullTypeName 

48from .storageClassDelegate import StorageClassDelegate 

49from .config import ConfigSubset, Config 

50from .configSupport import LookupKey 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class StorageClassConfig(ConfigSubset): 

56 component = "storageClasses" 

57 defaultConfigFile = "storageClasses.yaml" 

58 

59 

60class StorageClass: 

61 """Class describing how a label maps to a particular Python type. 

62 

63 Parameters 

64 ---------- 

65 name : `str` 

66 Name to use for this class. 

67 pytype : `type` or `str` 

68 Python type (or name of type) to associate with the `StorageClass` 

69 components : `dict`, optional 

70 `dict` mapping name of a component to another `StorageClass`. 

71 derivedComponents : `dict`, optional 

72 `dict` mapping name of a derived component to another `StorageClass`. 

73 parameters : `~collections.abc.Sequence` or `~collections.abc.Set` 

74 Parameters understood by this `StorageClass` that can control 

75 reading of data from datastores. 

76 delegate : `str`, optional 

77 Fully qualified name of class supporting assembly and disassembly 

78 of a `pytype` instance. 

79 """ 

80 _cls_name: str = "BaseStorageClass" 

81 _cls_components: Optional[Dict[str, StorageClass]] = None 

82 _cls_derivedComponents: Optional[Dict[str, StorageClass]] = None 

83 _cls_parameters: Optional[Union[Set[str], Sequence[str]]] = None 

84 _cls_delegate: Optional[str] = None 

85 _cls_pytype: Optional[Union[Type, str]] = None 

86 defaultDelegate: Type = StorageClassDelegate 

87 defaultDelegateName: str = getFullTypeName(defaultDelegate) 

88 

89 def __init__(self, name: Optional[str] = None, 

90 pytype: Optional[Union[Type, str]] = None, 

91 components: Optional[Dict[str, StorageClass]] = None, 

92 derivedComponents: Optional[Dict[str, StorageClass]] = None, 

93 parameters: Optional[Union[Sequence, Set]] = None, 

94 delegate: Optional[str] = None): 

95 if name is None: 

96 name = self._cls_name 

97 if pytype is None: 97 ↛ 99line 97 didn't jump to line 99, because the condition on line 97 was never false

98 pytype = self._cls_pytype 

99 if components is None: 99 ↛ 101line 99 didn't jump to line 101, because the condition on line 99 was never false

100 components = self._cls_components 

101 if derivedComponents is None: 101 ↛ 103line 101 didn't jump to line 103, because the condition on line 101 was never false

102 derivedComponents = self._cls_derivedComponents 

103 if parameters is None: 103 ↛ 105line 103 didn't jump to line 105, because the condition on line 103 was never false

104 parameters = self._cls_parameters 

105 if delegate is None: 105 ↛ 107line 105 didn't jump to line 107, because the condition on line 105 was never false

106 delegate = self._cls_delegate 

107 self.name = name 

108 

109 if pytype is None: 

110 pytype = object 

111 

112 self._pytype: Optional[Type] 

113 if not isinstance(pytype, str): 

114 # Already have a type so store it and get the name 

115 self._pytypeName = getFullTypeName(pytype) 

116 self._pytype = pytype 

117 else: 

118 # Store the type name and defer loading of type 

119 self._pytypeName = pytype 

120 self._pytype = None 

121 

122 if components is not None: 

123 if len(components) == 1: 123 ↛ 124line 123 didn't jump to line 124, because the condition on line 123 was never true

124 raise ValueError(f"Composite storage class {name} is not allowed to have" 

125 f" only one component '{next(iter(components))}'." 

126 " Did you mean it to be a derived component?") 

127 self._components = components 

128 else: 

129 self._components = {} 

130 self._derivedComponents = derivedComponents if derivedComponents is not None else {} 

131 self._parameters = frozenset(parameters) if parameters is not None else frozenset() 

132 # if the delegate is not None also set it and clear the default 

133 # delegate 

134 self._delegate: Optional[Type] 

135 self._delegateClassName: Optional[str] 

136 if delegate is not None: 

137 self._delegateClassName = delegate 

138 self._delegate = None 

139 elif components is not None: 139 ↛ 142line 139 didn't jump to line 142, because the condition on line 139 was never true

140 # We set a default delegate for composites so that a class is 

141 # guaranteed to support something if it is a composite. 

142 log.debug("Setting default delegate for %s", self.name) 

143 self._delegate = self.defaultDelegate 

144 self._delegateClassName = self.defaultDelegateName 

145 else: 

146 self._delegate = None 

147 self._delegateClassName = None 

148 

149 @property 

150 def components(self) -> Dict[str, StorageClass]: 

151 """Component names mapped to associated `StorageClass` 

152 """ 

153 return self._components 

154 

155 @property 

156 def derivedComponents(self) -> Dict[str, StorageClass]: 

157 """Derived component names mapped to associated `StorageClass` 

158 """ 

159 return self._derivedComponents 

160 

161 @property 

162 def parameters(self) -> Set[str]: 

163 """`set` of names of parameters supported by this `StorageClass` 

164 """ 

165 return set(self._parameters) 

166 

167 @property 

168 def pytype(self) -> Type: 

169 """Python type associated with this `StorageClass`.""" 

170 if self._pytype is not None: 

171 return self._pytype 

172 

173 if hasattr(builtins, self._pytypeName): 

174 pytype = getattr(builtins, self._pytypeName) 

175 else: 

176 pytype = doImport(self._pytypeName) 

177 self._pytype = pytype 

178 return self._pytype 

179 

180 @property 

181 def delegateClass(self) -> Optional[Type]: 

182 """Class to use to delegate type-specific actions.""" 

183 if self._delegate is not None: 

184 return self._delegate 

185 if self._delegateClassName is None: 

186 return None 

187 self._delegate = doImport(self._delegateClassName) 

188 return self._delegate 

189 

190 def allComponents(self) -> Mapping[str, StorageClass]: 

191 """Return a mapping of all the derived and read/write components 

192 to the corresponding storage class. 

193 

194 Returns 

195 ------- 

196 comp : `dict` of [`str`, `StorageClass`] 

197 The component name to storage class mapping. 

198 """ 

199 components = copy.copy(self.components) 

200 components.update(self.derivedComponents) 

201 return components 

202 

203 def delegate(self) -> StorageClassDelegate: 

204 """Return an instance of a storage class delegate. 

205 

206 Returns 

207 ------- 

208 delegate : `StorageClassDelegate` 

209 Instance of the delegate associated with this `StorageClass`. 

210 The delegate is constructed with this `StorageClass`. 

211 

212 Raises 

213 ------ 

214 TypeError 

215 This StorageClass has no associated delegate. 

216 """ 

217 cls = self.delegateClass 

218 if cls is None: 

219 raise TypeError(f"No delegate class is associated with StorageClass {self.name}") 

220 return cls(storageClass=self) 

221 

222 def isComposite(self) -> bool: 

223 """Boolean indicating whether this `StorageClass` is a composite 

224 or not. 

225 

226 Returns 

227 ------- 

228 isComposite : `bool` 

229 `True` if this `StorageClass` is a composite, `False` 

230 otherwise. 

231 """ 

232 if self.components: 

233 return True 

234 return False 

235 

236 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

237 """Keys to use when looking up this DatasetRef in a configuration. 

238 

239 The names are returned in order of priority. 

240 

241 Returns 

242 ------- 

243 names : `tuple` of `LookupKey` 

244 Tuple of a `LookupKey` using the `StorageClass` name. 

245 """ 

246 return (LookupKey(name=self.name), ) 

247 

248 def knownParameters(self) -> Set[str]: 

249 """Return set of all parameters known to this `StorageClass` 

250 

251 The set includes parameters understood by components of a composite. 

252 

253 Returns 

254 ------- 

255 known : `set` 

256 All parameter keys of this `StorageClass` and the component 

257 storage classes. 

258 """ 

259 known = set(self._parameters) 

260 for sc in self.components.values(): 

261 known.update(sc.knownParameters()) 

262 return known 

263 

264 def validateParameters(self, parameters: Collection = None) -> None: 

265 """Check that the parameters are known to this `StorageClass` 

266 

267 Does not check the values. 

268 

269 Parameters 

270 ---------- 

271 parameters : `~collections.abc.Collection`, optional 

272 Collection containing the parameters. Can be `dict`-like or 

273 `set`-like. The parameter values are not checked. 

274 If no parameters are supplied, always returns without error. 

275 

276 Raises 

277 ------ 

278 KeyError 

279 Some parameters are not understood by this `StorageClass`. 

280 """ 

281 # No parameters is always okay 

282 if not parameters: 

283 return 

284 

285 # Extract the important information into a set. Works for dict and 

286 # list. 

287 external = set(parameters) 

288 

289 diff = external - self.knownParameters() 

290 if diff: 

291 s = "s" if len(diff) > 1 else "" 

292 unknown = '\', \''.join(diff) 

293 raise KeyError(f"Parameter{s} '{unknown}' not understood by StorageClass {self.name}") 

294 

295 def filterParameters(self, parameters: Dict[str, Any], 

296 subset: Collection = None) -> Dict[str, Any]: 

297 """Filter out parameters that are not known to this StorageClass 

298 

299 Parameters 

300 ---------- 

301 parameters : `dict`, optional 

302 Candidate parameters. Can be `None` if no parameters have 

303 been provided. 

304 subset : `~collections.abc.Collection`, optional 

305 Subset of supported parameters that the caller is interested 

306 in using. The subset must be known to the `StorageClass` 

307 if specified. If `None` the supplied parameters will all 

308 be checked, else only the keys in this set will be checked. 

309 

310 Returns 

311 ------- 

312 filtered : `dict` 

313 Valid parameters. Empty `dict` if none are suitable. 

314 

315 Raises 

316 ------ 

317 ValueError 

318 Raised if the provided subset is not a subset of the supported 

319 parameters or if it is an empty set. 

320 """ 

321 if not parameters: 

322 return {} 

323 

324 known = self.knownParameters() 

325 

326 if subset is not None: 

327 if not subset: 

328 raise ValueError("Specified a parameter subset but it was empty") 

329 subset = set(subset) 

330 if not subset.issubset(known): 

331 raise ValueError(f"Requested subset ({subset}) is not a subset of" 

332 f" known parameters ({known})") 

333 wanted = subset 

334 else: 

335 wanted = known 

336 

337 return {k: parameters[k] for k in wanted if k in parameters} 

338 

339 def validateInstance(self, instance: Any) -> bool: 

340 """Check that the supplied Python object has the expected Python type 

341 

342 Parameters 

343 ---------- 

344 instance : `object` 

345 Object to check. 

346 

347 Returns 

348 ------- 

349 isOk : `bool` 

350 True if the supplied instance object can be handled by this 

351 `StorageClass`, False otherwise. 

352 """ 

353 return isinstance(instance, self.pytype) 

354 

355 def __eq__(self, other: Any) -> bool: 

356 """Equality checks name, pytype name, delegate name, and components""" 

357 

358 if not isinstance(other, StorageClass): 

359 return False 

360 

361 if self.name != other.name: 

362 return False 

363 

364 # We must compare pytype and delegate by name since we do not want 

365 # to trigger an import of external module code here 

366 if self._delegateClassName != other._delegateClassName: 

367 return False 

368 if self._pytypeName != other._pytypeName: 

369 return False 

370 

371 # Ensure we have the same component keys in each 

372 if set(self.components.keys()) != set(other.components.keys()): 

373 return False 

374 

375 # Same parameters 

376 if self.parameters != other.parameters: 

377 return False 

378 

379 # Ensure that all the components have the same type 

380 for k in self.components: 

381 if self.components[k] != other.components[k]: 

382 return False 

383 

384 # If we got to this point everything checks out 

385 return True 

386 

387 def __hash__(self) -> int: 

388 return hash(self.name) 

389 

390 def __repr__(self) -> str: 

391 optionals: Dict[str, Any] = {} 

392 if self._pytypeName != "object": 

393 optionals["pytype"] = self._pytypeName 

394 if self._delegateClassName is not None: 

395 optionals["delegate"] = self._delegateClassName 

396 if self._parameters: 

397 optionals["parameters"] = self._parameters 

398 if self.components: 

399 optionals["components"] = self.components 

400 

401 # order is preserved in the dict 

402 options = ", ".join(f"{k}={v!r}" for k, v in optionals.items()) 

403 

404 # Start with mandatory fields 

405 r = f"{self.__class__.__name__}({self.name!r}" 

406 if options: 

407 r = r + ", " + options 

408 r = r + ")" 

409 return r 

410 

411 def __str__(self) -> str: 

412 return self.name 

413 

414 

415class StorageClassFactory(metaclass=Singleton): 

416 """Factory for `StorageClass` instances. 

417 

418 This class is a singleton, with each instance sharing the pool of 

419 StorageClasses. Since code can not know whether it is the first 

420 time the instance has been created, the constructor takes no arguments. 

421 To populate the factory with storage classes, a call to 

422 `~StorageClassFactory.addFromConfig()` should be made. 

423 

424 Parameters 

425 ---------- 

426 config : `StorageClassConfig` or `str`, optional 

427 Load configuration. In a ButlerConfig` the relevant configuration 

428 is located in the ``storageClasses`` section. 

429 """ 

430 

431 def __init__(self, config: Optional[Union[StorageClassConfig, str]] = None): 

432 self._storageClasses: Dict[str, StorageClass] = {} 

433 self._configs: List[StorageClassConfig] = [] 

434 

435 # Always seed with the default config 

436 self.addFromConfig(StorageClassConfig()) 

437 

438 if config is not None: 438 ↛ 439line 438 didn't jump to line 439, because the condition on line 438 was never true

439 self.addFromConfig(config) 

440 

441 def __str__(self) -> str: 

442 """Return summary of factory. 

443 

444 Returns 

445 ------- 

446 summary : `str` 

447 Summary of the factory status. 

448 """ 

449 sep = "\n" 

450 return f"""Number of registered StorageClasses: {len(self._storageClasses)} 

451 

452StorageClasses 

453-------------- 

454{sep.join(f"{s}: {self._storageClasses[s]}" for s in self._storageClasses)} 

455""" 

456 

457 def __contains__(self, storageClassOrName: Union[StorageClass, str]) -> bool: 

458 """Indicates whether the storage class exists in the factory. 

459 

460 Parameters 

461 ---------- 

462 storageClassOrName : `str` or `StorageClass` 

463 If `str` is given existence of the named StorageClass 

464 in the factory is checked. If `StorageClass` is given 

465 existence and equality are checked. 

466 

467 Returns 

468 ------- 

469 in : `bool` 

470 True if the supplied string is present, or if the supplied 

471 `StorageClass` is present and identical. 

472 

473 Notes 

474 ----- 

475 The two different checks (one for "key" and one for "value") based on 

476 the type of the given argument mean that it is possible for 

477 StorageClass.name to be in the factory but StorageClass to not be 

478 in the factory. 

479 """ 

480 if isinstance(storageClassOrName, str): 480 ↛ 482line 480 didn't jump to line 482, because the condition on line 480 was never false

481 return storageClassOrName in self._storageClasses 

482 elif isinstance(storageClassOrName, StorageClass): 

483 if storageClassOrName.name in self._storageClasses: 

484 return storageClassOrName == self._storageClasses[storageClassOrName.name] 

485 return False 

486 

487 def addFromConfig(self, config: Union[StorageClassConfig, Config, str]) -> None: 

488 """Add more `StorageClass` definitions from a config file. 

489 

490 Parameters 

491 ---------- 

492 config : `StorageClassConfig`, `Config` or `str` 

493 Storage class configuration. Can contain a ``storageClasses`` 

494 key if part of a global configuration. 

495 """ 

496 sconfig = StorageClassConfig(config) 

497 self._configs.append(sconfig) 

498 

499 # Since we can not assume that we will get definitions of 

500 # components or parents before their classes are defined 

501 # we have a helper function that we can call recursively 

502 # to extract definitions from the configuration. 

503 def processStorageClass(name: str, sconfig: StorageClassConfig) -> None: 

504 # Maybe we've already processed this through recursion 

505 if name not in sconfig: 

506 return 

507 info = sconfig.pop(name) 

508 

509 # Always create the storage class so we can ensure that 

510 # we are not trying to overwrite with a different definition 

511 components = None 

512 

513 # Extract scalar items from dict that are needed for 

514 # StorageClass Constructor 

515 storageClassKwargs = {k: info[k] for k in ("pytype", "delegate", "parameters") if k in info} 

516 

517 for compName in ("components", "derivedComponents"): 

518 if compName not in info: 

519 continue 

520 components = {} 

521 for cname, ctype in info[compName].items(): 

522 if ctype not in self: 

523 processStorageClass(ctype, sconfig) 

524 components[cname] = self.getStorageClass(ctype) 

525 

526 # Fill in other items 

527 storageClassKwargs[compName] = components 

528 

529 # Create the new storage class and register it 

530 baseClass = None 

531 if "inheritsFrom" in info: 

532 baseName = info["inheritsFrom"] 

533 if baseName not in self: 533 ↛ 534line 533 didn't jump to line 534, because the condition on line 533 was never true

534 processStorageClass(baseName, sconfig) 

535 baseClass = type(self.getStorageClass(baseName)) 

536 

537 newStorageClassType = self.makeNewStorageClass(name, baseClass, **storageClassKwargs) 

538 newStorageClass = newStorageClassType() 

539 self.registerStorageClass(newStorageClass) 

540 

541 for name in list(sconfig.keys()): 

542 processStorageClass(name, sconfig) 

543 

544 @staticmethod 

545 def makeNewStorageClass(name: str, 

546 baseClass: Optional[Type[StorageClass]] = StorageClass, 

547 **kwargs: Any) -> Type[StorageClass]: 

548 """Create a new Python class as a subclass of `StorageClass`. 

549 

550 Parameters 

551 ---------- 

552 name : `str` 

553 Name to use for this class. 

554 baseClass : `type`, optional 

555 Base class for this `StorageClass`. Must be either `StorageClass` 

556 or a subclass of `StorageClass`. If `None`, `StorageClass` will 

557 be used. 

558 

559 Returns 

560 ------- 

561 newtype : `type` subclass of `StorageClass` 

562 Newly created Python type. 

563 """ 

564 

565 if baseClass is None: 

566 baseClass = StorageClass 

567 if not issubclass(baseClass, StorageClass): 567 ↛ 568line 567 didn't jump to line 568, because the condition on line 567 was never true

568 raise ValueError(f"Base class must be a StorageClass not {baseClass}") 

569 

570 # convert the arguments to use different internal names 

571 clsargs = {f"_cls_{k}": v for k, v in kwargs.items() if v is not None} 

572 clsargs["_cls_name"] = name 

573 

574 # Some container items need to merge with the base class values 

575 # so that a child can inherit but override one bit. 

576 # lists (which you get from configs) are treated as sets for this to 

577 # work consistently. 

578 for k in ("components", "parameters", "derivedComponents"): 

579 classKey = f"_cls_{k}" 

580 if classKey in clsargs: 

581 baseValue = getattr(baseClass, classKey, None) 

582 if baseValue is not None: 

583 currentValue = clsargs[classKey] 

584 if isinstance(currentValue, dict): 584 ↛ 587line 584 didn't jump to line 587, because the condition on line 584 was never false

585 newValue = baseValue.copy() 

586 else: 

587 newValue = set(baseValue) 

588 newValue.update(currentValue) 

589 clsargs[classKey] = newValue 

590 

591 # If we have parameters they should be a frozen set so that the 

592 # parameters in the class can not be modified. 

593 pk = "_cls_parameters" 

594 if pk in clsargs: 

595 clsargs[pk] = frozenset(clsargs[pk]) 

596 

597 return type(f"StorageClass{name}", (baseClass,), clsargs) 

598 

599 def getStorageClass(self, storageClassName: str) -> StorageClass: 

600 """Get a StorageClass instance associated with the supplied name. 

601 

602 Parameters 

603 ---------- 

604 storageClassName : `str` 

605 Name of the storage class to retrieve. 

606 

607 Returns 

608 ------- 

609 instance : `StorageClass` 

610 Instance of the correct `StorageClass`. 

611 

612 Raises 

613 ------ 

614 KeyError 

615 The requested storage class name is not registered. 

616 """ 

617 return self._storageClasses[storageClassName] 

618 

619 def registerStorageClass(self, storageClass: StorageClass) -> None: 

620 """Store the `StorageClass` in the factory. 

621 

622 Will be indexed by `StorageClass.name` and will return instances 

623 of the supplied `StorageClass`. 

624 

625 Parameters 

626 ---------- 

627 storageClass : `StorageClass` 

628 Type of the Python `StorageClass` to register. 

629 

630 Raises 

631 ------ 

632 ValueError 

633 If a storage class has already been registered with 

634 storageClassName and the previous definition differs. 

635 """ 

636 if storageClass.name in self._storageClasses: 636 ↛ 637line 636 didn't jump to line 637, because the condition on line 636 was never true

637 existing = self.getStorageClass(storageClass.name) 

638 if existing != storageClass: 

639 raise ValueError(f"New definition for StorageClass {storageClass.name} ({storageClass}) " 

640 f"differs from current definition ({existing})") 

641 else: 

642 self._storageClasses[storageClass.name] = storageClass 

643 

644 def _unregisterStorageClass(self, storageClassName: str) -> None: 

645 """Remove the named StorageClass from the factory. 

646 

647 Parameters 

648 ---------- 

649 storageClassName : `str` 

650 Name of storage class to remove. 

651 

652 Raises 

653 ------ 

654 KeyError 

655 The named storage class is not registered. 

656 

657 Notes 

658 ----- 

659 This method is intended to simplify testing of StorageClassFactory 

660 functionality and it is not expected to be required for normal usage. 

661 """ 

662 del self._storageClasses[storageClassName]