Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for Storage Classes.""" 

25 

26__all__ = ("StorageClass", "StorageClassFactory", "StorageClassConfig") 

27 

28import builtins 

29import copy 

30import logging 

31 

32from typing import ( 

33 Any, 

34 Collection, 

35 Dict, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Sequence, 

41 Tuple, 

42 Type, 

43 Union, 

44) 

45 

46from lsst.utils import doImport 

47from .utils import Singleton, getFullTypeName 

48from .storageClassDelegate import StorageClassDelegate 

49from .config import ConfigSubset, Config 

50from .configSupport import LookupKey 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class StorageClassConfig(ConfigSubset): 

56 """Configuration class for defining Storage Classes.""" 

57 

58 component = "storageClasses" 

59 defaultConfigFile = "storageClasses.yaml" 

60 

61 

62class StorageClass: 

63 """Class describing how a label maps to a particular Python type. 

64 

65 Parameters 

66 ---------- 

67 name : `str` 

68 Name to use for this class. 

69 pytype : `type` or `str` 

70 Python type (or name of type) to associate with the `StorageClass` 

71 components : `dict`, optional 

72 `dict` mapping name of a component to another `StorageClass`. 

73 derivedComponents : `dict`, optional 

74 `dict` mapping name of a derived component to another `StorageClass`. 

75 parameters : `~collections.abc.Sequence` or `~collections.abc.Set` 

76 Parameters understood by this `StorageClass` that can control 

77 reading of data from datastores. 

78 delegate : `str`, optional 

79 Fully qualified name of class supporting assembly and disassembly 

80 of a `pytype` instance. 

81 """ 

82 

83 _cls_name: str = "BaseStorageClass" 

84 _cls_components: Optional[Dict[str, StorageClass]] = None 

85 _cls_derivedComponents: Optional[Dict[str, StorageClass]] = None 

86 _cls_parameters: Optional[Union[Set[str], Sequence[str]]] = None 

87 _cls_delegate: Optional[str] = None 

88 _cls_pytype: Optional[Union[Type, str]] = None 

89 defaultDelegate: Type = StorageClassDelegate 

90 defaultDelegateName: str = getFullTypeName(defaultDelegate) 

91 

92 def __init__(self, name: Optional[str] = None, 

93 pytype: Optional[Union[Type, str]] = None, 

94 components: Optional[Dict[str, StorageClass]] = None, 

95 derivedComponents: Optional[Dict[str, StorageClass]] = None, 

96 parameters: Optional[Union[Sequence, Set]] = None, 

97 delegate: Optional[str] = None): 

98 if name is None: 

99 name = self._cls_name 

100 if pytype is None: 100 ↛ 102line 100 didn't jump to line 102, because the condition on line 100 was never false

101 pytype = self._cls_pytype 

102 if components is None: 102 ↛ 104line 102 didn't jump to line 104, because the condition on line 102 was never false

103 components = self._cls_components 

104 if derivedComponents is None: 104 ↛ 106line 104 didn't jump to line 106, because the condition on line 104 was never false

105 derivedComponents = self._cls_derivedComponents 

106 if parameters is None: 106 ↛ 108line 106 didn't jump to line 108, because the condition on line 106 was never false

107 parameters = self._cls_parameters 

108 if delegate is None: 108 ↛ 110line 108 didn't jump to line 110, because the condition on line 108 was never false

109 delegate = self._cls_delegate 

110 self.name = name 

111 

112 if pytype is None: 

113 pytype = object 

114 

115 self._pytype: Optional[Type] 

116 if not isinstance(pytype, str): 

117 # Already have a type so store it and get the name 

118 self._pytypeName = getFullTypeName(pytype) 

119 self._pytype = pytype 

120 else: 

121 # Store the type name and defer loading of type 

122 self._pytypeName = pytype 

123 self._pytype = None 

124 

125 if components is not None: 

126 if len(components) == 1: 126 ↛ 127line 126 didn't jump to line 127, because the condition on line 126 was never true

127 raise ValueError(f"Composite storage class {name} is not allowed to have" 

128 f" only one component '{next(iter(components))}'." 

129 " Did you mean it to be a derived component?") 

130 self._components = components 

131 else: 

132 self._components = {} 

133 self._derivedComponents = derivedComponents if derivedComponents is not None else {} 

134 self._parameters = frozenset(parameters) if parameters is not None else frozenset() 

135 # if the delegate is not None also set it and clear the default 

136 # delegate 

137 self._delegate: Optional[Type] 

138 self._delegateClassName: Optional[str] 

139 if delegate is not None: 

140 self._delegateClassName = delegate 

141 self._delegate = None 

142 elif components is not None: 142 ↛ 145line 142 didn't jump to line 145, because the condition on line 142 was never true

143 # We set a default delegate for composites so that a class is 

144 # guaranteed to support something if it is a composite. 

145 log.debug("Setting default delegate for %s", self.name) 

146 self._delegate = self.defaultDelegate 

147 self._delegateClassName = self.defaultDelegateName 

148 else: 

149 self._delegate = None 

150 self._delegateClassName = None 

151 

152 @property 

153 def components(self) -> Dict[str, StorageClass]: 

154 """Return the components associated with this `StorageClass`.""" 

155 return self._components 

156 

157 @property 

158 def derivedComponents(self) -> Dict[str, StorageClass]: 

159 """Return derived components associated with `StorageClass`.""" 

160 return self._derivedComponents 

161 

162 @property 

163 def parameters(self) -> Set[str]: 

164 """Return `set` of names of supported parameters.""" 

165 return set(self._parameters) 

166 

167 @property 

168 def pytype(self) -> Type: 

169 """Return Python type associated with this `StorageClass`.""" 

170 if self._pytype is not None: 

171 return self._pytype 

172 

173 if hasattr(builtins, self._pytypeName): 

174 pytype = getattr(builtins, self._pytypeName) 

175 else: 

176 pytype = doImport(self._pytypeName) 

177 self._pytype = pytype 

178 return self._pytype 

179 

180 @property 

181 def delegateClass(self) -> Optional[Type]: 

182 """Class to use to delegate type-specific actions.""" 

183 if self._delegate is not None: 

184 return self._delegate 

185 if self._delegateClassName is None: 

186 return None 

187 self._delegate = doImport(self._delegateClassName) 

188 return self._delegate 

189 

190 def allComponents(self) -> Mapping[str, StorageClass]: 

191 """Return all defined components. 

192 

193 This mapping includes all the derived and read/write components 

194 for the corresponding storage class. 

195 

196 Returns 

197 ------- 

198 comp : `dict` of [`str`, `StorageClass`] 

199 The component name to storage class mapping. 

200 """ 

201 components = copy.copy(self.components) 

202 components.update(self.derivedComponents) 

203 return components 

204 

205 def delegate(self) -> StorageClassDelegate: 

206 """Return an instance of a storage class delegate. 

207 

208 Returns 

209 ------- 

210 delegate : `StorageClassDelegate` 

211 Instance of the delegate associated with this `StorageClass`. 

212 The delegate is constructed with this `StorageClass`. 

213 

214 Raises 

215 ------ 

216 TypeError 

217 This StorageClass has no associated delegate. 

218 """ 

219 cls = self.delegateClass 

220 if cls is None: 

221 raise TypeError(f"No delegate class is associated with StorageClass {self.name}") 

222 return cls(storageClass=self) 

223 

224 def isComposite(self) -> bool: 

225 """Return Boolean indicating whether this is a composite or not. 

226 

227 Returns 

228 ------- 

229 isComposite : `bool` 

230 `True` if this `StorageClass` is a composite, `False` 

231 otherwise. 

232 """ 

233 if self.components: 

234 return True 

235 return False 

236 

237 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

238 """Keys to use when looking up this DatasetRef in a configuration. 

239 

240 The names are returned in order of priority. 

241 

242 Returns 

243 ------- 

244 names : `tuple` of `LookupKey` 

245 Tuple of a `LookupKey` using the `StorageClass` name. 

246 """ 

247 return (LookupKey(name=self.name), ) 

248 

249 def knownParameters(self) -> Set[str]: 

250 """Return set of all parameters known to this `StorageClass`. 

251 

252 The set includes parameters understood by components of a composite. 

253 

254 Returns 

255 ------- 

256 known : `set` 

257 All parameter keys of this `StorageClass` and the component 

258 storage classes. 

259 """ 

260 known = set(self._parameters) 

261 for sc in self.components.values(): 

262 known.update(sc.knownParameters()) 

263 return known 

264 

265 def validateParameters(self, parameters: Collection = None) -> None: 

266 """Check that the parameters are known to this `StorageClass`. 

267 

268 Does not check the values. 

269 

270 Parameters 

271 ---------- 

272 parameters : `~collections.abc.Collection`, optional 

273 Collection containing the parameters. Can be `dict`-like or 

274 `set`-like. The parameter values are not checked. 

275 If no parameters are supplied, always returns without error. 

276 

277 Raises 

278 ------ 

279 KeyError 

280 Some parameters are not understood by this `StorageClass`. 

281 """ 

282 # No parameters is always okay 

283 if not parameters: 

284 return 

285 

286 # Extract the important information into a set. Works for dict and 

287 # list. 

288 external = set(parameters) 

289 

290 diff = external - self.knownParameters() 

291 if diff: 

292 s = "s" if len(diff) > 1 else "" 

293 unknown = '\', \''.join(diff) 

294 raise KeyError(f"Parameter{s} '{unknown}' not understood by StorageClass {self.name}") 

295 

296 def filterParameters(self, parameters: Dict[str, Any], 

297 subset: Collection = None) -> Dict[str, Any]: 

298 """Filter out parameters that are not known to this `StorageClass`. 

299 

300 Parameters 

301 ---------- 

302 parameters : `dict`, optional 

303 Candidate parameters. Can be `None` if no parameters have 

304 been provided. 

305 subset : `~collections.abc.Collection`, optional 

306 Subset of supported parameters that the caller is interested 

307 in using. The subset must be known to the `StorageClass` 

308 if specified. If `None` the supplied parameters will all 

309 be checked, else only the keys in this set will be checked. 

310 

311 Returns 

312 ------- 

313 filtered : `dict` 

314 Valid parameters. Empty `dict` if none are suitable. 

315 

316 Raises 

317 ------ 

318 ValueError 

319 Raised if the provided subset is not a subset of the supported 

320 parameters or if it is an empty set. 

321 """ 

322 if not parameters: 

323 return {} 

324 

325 known = self.knownParameters() 

326 

327 if subset is not None: 

328 if not subset: 

329 raise ValueError("Specified a parameter subset but it was empty") 

330 subset = set(subset) 

331 if not subset.issubset(known): 

332 raise ValueError(f"Requested subset ({subset}) is not a subset of" 

333 f" known parameters ({known})") 

334 wanted = subset 

335 else: 

336 wanted = known 

337 

338 return {k: parameters[k] for k in wanted if k in parameters} 

339 

340 def validateInstance(self, instance: Any) -> bool: 

341 """Check that the supplied Python object has the expected Python type. 

342 

343 Parameters 

344 ---------- 

345 instance : `object` 

346 Object to check. 

347 

348 Returns 

349 ------- 

350 isOk : `bool` 

351 True if the supplied instance object can be handled by this 

352 `StorageClass`, False otherwise. 

353 """ 

354 return isinstance(instance, self.pytype) 

355 

356 def __eq__(self, other: Any) -> bool: 

357 """Equality checks name, pytype name, delegate name, and components.""" 

358 if not isinstance(other, StorageClass): 

359 return NotImplemented 

360 

361 if self.name != other.name: 

362 return False 

363 

364 # We must compare pytype and delegate by name since we do not want 

365 # to trigger an import of external module code here 

366 if self._delegateClassName != other._delegateClassName: 

367 return False 

368 if self._pytypeName != other._pytypeName: 

369 return False 

370 

371 # Ensure we have the same component keys in each 

372 if set(self.components.keys()) != set(other.components.keys()): 

373 return False 

374 

375 # Same parameters 

376 if self.parameters != other.parameters: 

377 return False 

378 

379 # Ensure that all the components have the same type 

380 for k in self.components: 

381 if self.components[k] != other.components[k]: 

382 return False 

383 

384 # If we got to this point everything checks out 

385 return True 

386 

387 def __hash__(self) -> int: 

388 return hash(self.name) 

389 

390 def __repr__(self) -> str: 

391 optionals: Dict[str, Any] = {} 

392 if self._pytypeName != "object": 

393 optionals["pytype"] = self._pytypeName 

394 if self._delegateClassName is not None: 

395 optionals["delegate"] = self._delegateClassName 

396 if self._parameters: 

397 optionals["parameters"] = self._parameters 

398 if self.components: 

399 optionals["components"] = self.components 

400 

401 # order is preserved in the dict 

402 options = ", ".join(f"{k}={v!r}" for k, v in optionals.items()) 

403 

404 # Start with mandatory fields 

405 r = f"{self.__class__.__name__}({self.name!r}" 

406 if options: 

407 r = r + ", " + options 

408 r = r + ")" 

409 return r 

410 

411 def __str__(self) -> str: 

412 return self.name 

413 

414 

415class StorageClassFactory(metaclass=Singleton): 

416 """Factory for `StorageClass` instances. 

417 

418 This class is a singleton, with each instance sharing the pool of 

419 StorageClasses. Since code can not know whether it is the first 

420 time the instance has been created, the constructor takes no arguments. 

421 To populate the factory with storage classes, a call to 

422 `~StorageClassFactory.addFromConfig()` should be made. 

423 

424 Parameters 

425 ---------- 

426 config : `StorageClassConfig` or `str`, optional 

427 Load configuration. In a ButlerConfig` the relevant configuration 

428 is located in the ``storageClasses`` section. 

429 """ 

430 

431 def __init__(self, config: Optional[Union[StorageClassConfig, str]] = None): 

432 self._storageClasses: Dict[str, StorageClass] = {} 

433 self._configs: List[StorageClassConfig] = [] 

434 

435 # Always seed with the default config 

436 self.addFromConfig(StorageClassConfig()) 

437 

438 if config is not None: 438 ↛ 439line 438 didn't jump to line 439, because the condition on line 438 was never true

439 self.addFromConfig(config) 

440 

441 def __str__(self) -> str: 

442 """Return summary of factory. 

443 

444 Returns 

445 ------- 

446 summary : `str` 

447 Summary of the factory status. 

448 """ 

449 sep = "\n" 

450 return f"""Number of registered StorageClasses: {len(self._storageClasses)} 

451 

452StorageClasses 

453-------------- 

454{sep.join(f"{s}: {self._storageClasses[s]}" for s in self._storageClasses)} 

455""" 

456 

457 def __contains__(self, storageClassOrName: Union[StorageClass, str]) -> bool: 

458 """Indicate whether the storage class exists in the factory. 

459 

460 Parameters 

461 ---------- 

462 storageClassOrName : `str` or `StorageClass` 

463 If `str` is given existence of the named StorageClass 

464 in the factory is checked. If `StorageClass` is given 

465 existence and equality are checked. 

466 

467 Returns 

468 ------- 

469 in : `bool` 

470 True if the supplied string is present, or if the supplied 

471 `StorageClass` is present and identical. 

472 

473 Notes 

474 ----- 

475 The two different checks (one for "key" and one for "value") based on 

476 the type of the given argument mean that it is possible for 

477 StorageClass.name to be in the factory but StorageClass to not be 

478 in the factory. 

479 """ 

480 if isinstance(storageClassOrName, str): 480 ↛ 482line 480 didn't jump to line 482, because the condition on line 480 was never false

481 return storageClassOrName in self._storageClasses 

482 elif isinstance(storageClassOrName, StorageClass): 

483 if storageClassOrName.name in self._storageClasses: 

484 return storageClassOrName == self._storageClasses[storageClassOrName.name] 

485 return False 

486 

487 def addFromConfig(self, config: Union[StorageClassConfig, Config, str]) -> None: 

488 """Add more `StorageClass` definitions from a config file. 

489 

490 Parameters 

491 ---------- 

492 config : `StorageClassConfig`, `Config` or `str` 

493 Storage class configuration. Can contain a ``storageClasses`` 

494 key if part of a global configuration. 

495 """ 

496 sconfig = StorageClassConfig(config) 

497 self._configs.append(sconfig) 

498 

499 # Since we can not assume that we will get definitions of 

500 # components or parents before their classes are defined 

501 # we have a helper function that we can call recursively 

502 # to extract definitions from the configuration. 

503 def processStorageClass(name: str, sconfig: StorageClassConfig) -> None: 

504 # Maybe we've already processed this through recursion 

505 if name not in sconfig: 

506 return 

507 info = sconfig.pop(name) 

508 

509 # Always create the storage class so we can ensure that 

510 # we are not trying to overwrite with a different definition 

511 components = None 

512 

513 # Extract scalar items from dict that are needed for 

514 # StorageClass Constructor 

515 storageClassKwargs = {k: info[k] for k in ("pytype", "delegate", "parameters") if k in info} 

516 

517 for compName in ("components", "derivedComponents"): 

518 if compName not in info: 

519 continue 

520 components = {} 

521 for cname, ctype in info[compName].items(): 

522 if ctype not in self: 

523 processStorageClass(ctype, sconfig) 

524 components[cname] = self.getStorageClass(ctype) 

525 

526 # Fill in other items 

527 storageClassKwargs[compName] = components 

528 

529 # Create the new storage class and register it 

530 baseClass = None 

531 if "inheritsFrom" in info: 

532 baseName = info["inheritsFrom"] 

533 if baseName not in self: 533 ↛ 534line 533 didn't jump to line 534, because the condition on line 533 was never true

534 processStorageClass(baseName, sconfig) 

535 baseClass = type(self.getStorageClass(baseName)) 

536 

537 newStorageClassType = self.makeNewStorageClass(name, baseClass, **storageClassKwargs) 

538 newStorageClass = newStorageClassType() 

539 self.registerStorageClass(newStorageClass) 

540 

541 for name in list(sconfig.keys()): 

542 processStorageClass(name, sconfig) 

543 

544 @staticmethod 

545 def makeNewStorageClass(name: str, 

546 baseClass: Optional[Type[StorageClass]] = StorageClass, 

547 **kwargs: Any) -> Type[StorageClass]: 

548 """Create a new Python class as a subclass of `StorageClass`. 

549 

550 Parameters 

551 ---------- 

552 name : `str` 

553 Name to use for this class. 

554 baseClass : `type`, optional 

555 Base class for this `StorageClass`. Must be either `StorageClass` 

556 or a subclass of `StorageClass`. If `None`, `StorageClass` will 

557 be used. 

558 

559 Returns 

560 ------- 

561 newtype : `type` subclass of `StorageClass` 

562 Newly created Python type. 

563 """ 

564 if baseClass is None: 

565 baseClass = StorageClass 

566 if not issubclass(baseClass, StorageClass): 566 ↛ 567line 566 didn't jump to line 567, because the condition on line 566 was never true

567 raise ValueError(f"Base class must be a StorageClass not {baseClass}") 

568 

569 # convert the arguments to use different internal names 

570 clsargs = {f"_cls_{k}": v for k, v in kwargs.items() if v is not None} 

571 clsargs["_cls_name"] = name 

572 

573 # Some container items need to merge with the base class values 

574 # so that a child can inherit but override one bit. 

575 # lists (which you get from configs) are treated as sets for this to 

576 # work consistently. 

577 for k in ("components", "parameters", "derivedComponents"): 

578 classKey = f"_cls_{k}" 

579 if classKey in clsargs: 

580 baseValue = getattr(baseClass, classKey, None) 

581 if baseValue is not None: 

582 currentValue = clsargs[classKey] 

583 if isinstance(currentValue, dict): 583 ↛ 586line 583 didn't jump to line 586, because the condition on line 583 was never false

584 newValue = baseValue.copy() 

585 else: 

586 newValue = set(baseValue) 

587 newValue.update(currentValue) 

588 clsargs[classKey] = newValue 

589 

590 # If we have parameters they should be a frozen set so that the 

591 # parameters in the class can not be modified. 

592 pk = "_cls_parameters" 

593 if pk in clsargs: 

594 clsargs[pk] = frozenset(clsargs[pk]) 

595 

596 return type(f"StorageClass{name}", (baseClass,), clsargs) 

597 

598 def getStorageClass(self, storageClassName: str) -> StorageClass: 

599 """Get a StorageClass instance associated with the supplied name. 

600 

601 Parameters 

602 ---------- 

603 storageClassName : `str` 

604 Name of the storage class to retrieve. 

605 

606 Returns 

607 ------- 

608 instance : `StorageClass` 

609 Instance of the correct `StorageClass`. 

610 

611 Raises 

612 ------ 

613 KeyError 

614 The requested storage class name is not registered. 

615 """ 

616 return self._storageClasses[storageClassName] 

617 

618 def registerStorageClass(self, storageClass: StorageClass) -> None: 

619 """Store the `StorageClass` in the factory. 

620 

621 Will be indexed by `StorageClass.name` and will return instances 

622 of the supplied `StorageClass`. 

623 

624 Parameters 

625 ---------- 

626 storageClass : `StorageClass` 

627 Type of the Python `StorageClass` to register. 

628 

629 Raises 

630 ------ 

631 ValueError 

632 If a storage class has already been registered with 

633 storageClassName and the previous definition differs. 

634 """ 

635 if storageClass.name in self._storageClasses: 635 ↛ 636line 635 didn't jump to line 636, because the condition on line 635 was never true

636 existing = self.getStorageClass(storageClass.name) 

637 if existing != storageClass: 

638 raise ValueError(f"New definition for StorageClass {storageClass.name} ({storageClass}) " 

639 f"differs from current definition ({existing})") 

640 else: 

641 self._storageClasses[storageClass.name] = storageClass 

642 

643 def _unregisterStorageClass(self, storageClassName: str) -> None: 

644 """Remove the named StorageClass from the factory. 

645 

646 Parameters 

647 ---------- 

648 storageClassName : `str` 

649 Name of storage class to remove. 

650 

651 Raises 

652 ------ 

653 KeyError 

654 The named storage class is not registered. 

655 

656 Notes 

657 ----- 

658 This method is intended to simplify testing of StorageClassFactory 

659 functionality and it is not expected to be required for normal usage. 

660 """ 

661 del self._storageClasses[storageClassName]