Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for Storage Classes.""" 

25 

26__all__ = ("StorageClass", "StorageClassFactory", "StorageClassConfig") 

27 

28import builtins 

29import logging 

30 

31from typing import ( 

32 Any, 

33 Collection, 

34 Dict, 

35 List, 

36 Optional, 

37 Set, 

38 Sequence, 

39 Tuple, 

40 Type, 

41 Union, 

42) 

43 

44from lsst.utils import doImport 

45from .utils import Singleton, getFullTypeName 

46from .assembler import CompositeAssembler 

47from .config import ConfigSubset, Config 

48from .configSupport import LookupKey 

49 

50log = logging.getLogger(__name__) 

51 

52 

53class StorageClassConfig(ConfigSubset): 

54 component = "storageClasses" 

55 defaultConfigFile = "storageClasses.yaml" 

56 

57 

58class StorageClass: 

59 """Class describing how a label maps to a particular Python type. 

60 

61 Parameters 

62 ---------- 

63 name : `str` 

64 Name to use for this class. 

65 pytype : `type` or `str` 

66 Python type (or name of type) to associate with the `StorageClass` 

67 components : `dict`, optional 

68 `dict` mapping name of a component to another `StorageClass`. 

69 parameters : `~collections.abc.Sequence` or `~collections.abc.Set` 

70 Parameters understood by this `StorageClass` that can control 

71 reading of data from datastores. 

72 assembler : `str`, optional 

73 Fully qualified name of class supporting assembly and disassembly 

74 of a `pytype` instance. 

75 """ 

76 _cls_name: str = "BaseStorageClass" 

77 _cls_components: Optional[Dict[str, StorageClass]] = None 

78 _cls_parameters: Optional[Union[Set[str], Sequence[str]]] = None 

79 _cls_assembler: Optional[str] = None 

80 _cls_pytype: Optional[Union[Type, str]] = None 

81 defaultAssembler: Type = CompositeAssembler 

82 defaultAssemblerName: str = getFullTypeName(defaultAssembler) 

83 

84 def __init__(self, name: Optional[str] = None, 

85 pytype: Optional[Union[Type, str]] = None, 

86 components: Optional[Dict[str, StorageClass]] = None, 

87 parameters: Optional[Union[Sequence, Set]] = None, 

88 assembler: Optional[str] = None): 

89 if name is None: 

90 name = self._cls_name 

91 if pytype is None: 

92 pytype = self._cls_pytype 

93 if components is None: 

94 components = self._cls_components 

95 if parameters is None: 

96 parameters = self._cls_parameters 

97 if assembler is None: 

98 assembler = self._cls_assembler 

99 self.name = name 

100 

101 if pytype is None: 

102 pytype = object 

103 

104 self._pytype: Optional[Type] 

105 if not isinstance(pytype, str): 

106 # Already have a type so store it and get the name 

107 self._pytypeName = getFullTypeName(pytype) 

108 self._pytype = pytype 

109 else: 

110 # Store the type name and defer loading of type 

111 self._pytypeName = pytype 

112 self._pytype = None 

113 

114 self._components = components if components is not None else {} 

115 self._parameters = frozenset(parameters) if parameters is not None else frozenset() 

116 # if the assembler is not None also set it and clear the default 

117 # assembler 

118 self._assembler: Optional[Type] 

119 self._assemblerClassName: Optional[str] 

120 if assembler is not None: 

121 self._assemblerClassName = assembler 

122 self._assembler = None 

123 elif components is not None: 

124 # We set a default assembler for composites so that a class is 

125 # guaranteed to support something if it is a composite. 

126 log.debug("Setting default assembler for %s", self.name) 

127 self._assembler = self.defaultAssembler 

128 self._assemblerClassName = self.defaultAssemblerName 

129 else: 

130 self._assembler = None 

131 self._assemblerClassName = None 

132 

133 @property 

134 def components(self) -> Dict[str, StorageClass]: 

135 """Component names mapped to associated `StorageClass` 

136 """ 

137 return self._components 

138 

139 @property 

140 def parameters(self) -> Set[str]: 

141 """`set` of names of parameters supported by this `StorageClass` 

142 """ 

143 return set(self._parameters) 

144 

145 @property 

146 def pytype(self) -> Type: 

147 """Python type associated with this `StorageClass`.""" 

148 if self._pytype is not None: 

149 return self._pytype 

150 

151 if hasattr(builtins, self._pytypeName): 

152 pytype = getattr(builtins, self._pytypeName) 

153 else: 

154 pytype = doImport(self._pytypeName) 

155 self._pytype = pytype 

156 return self._pytype 

157 

158 @property 

159 def assemblerClass(self) -> Optional[Type]: 

160 """Class to use to (dis)assemble an object from components.""" 

161 if self._assembler is not None: 

162 return self._assembler 

163 if self._assemblerClassName is None: 

164 return None 

165 self._assembler = doImport(self._assemblerClassName) 

166 return self._assembler 

167 

168 def assembler(self) -> CompositeAssembler: 

169 """Return an instance of an assembler. 

170 

171 Returns 

172 ------- 

173 assembler : `CompositeAssembler` 

174 Instance of the assembler associated with this `StorageClass`. 

175 Assembler is constructed with this `StorageClass`. 

176 

177 Raises 

178 ------ 

179 TypeError 

180 This StorageClass has no associated assembler. 

181 """ 

182 cls = self.assemblerClass 

183 if cls is None: 

184 raise TypeError(f"No assembler class is associated with StorageClass {self.name}") 

185 return cls(storageClass=self) 

186 

187 def isComposite(self) -> bool: 

188 """Boolean indicating whether this `StorageClass` is a composite 

189 or not. 

190 

191 Returns 

192 ------- 

193 isComposite : `bool` 

194 `True` if this `StorageClass` is a composite, `False` 

195 otherwise. 

196 """ 

197 if self.components: 

198 return True 

199 return False 

200 

201 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

202 """Keys to use when looking up this DatasetRef in a configuration. 

203 

204 The names are returned in order of priority. 

205 

206 Returns 

207 ------- 

208 names : `tuple` of `LookupKey` 

209 Tuple of a `LookupKey` using the `StorageClass` name. 

210 """ 

211 return (LookupKey(name=self.name), ) 

212 

213 def knownParameters(self) -> Set[str]: 

214 """Return set of all parameters known to this `StorageClass` 

215 

216 The set includes parameters understood by components of a composite. 

217 

218 Returns 

219 ------- 

220 known : `set` 

221 All parameter keys of this `StorageClass` and the component 

222 storage classes. 

223 """ 

224 known = set(self._parameters) 

225 for sc in self.components.values(): 

226 known.update(sc.knownParameters()) 

227 return known 

228 

229 def validateParameters(self, parameters: Collection = None) -> None: 

230 """Check that the parameters are known to this `StorageClass` 

231 

232 Does not check the values. 

233 

234 Parameters 

235 ---------- 

236 parameters : `~collections.abc.Collection`, optional 

237 Collection containing the parameters. Can be `dict`-like or 

238 `set`-like. The parameter values are not checked. 

239 If no parameters are supplied, always returns without error. 

240 

241 Raises 

242 ------ 

243 KeyError 

244 Some parameters are not understood by this `StorageClass`. 

245 """ 

246 # No parameters is always okay 

247 if not parameters: 

248 return 

249 

250 # Extract the important information into a set. Works for dict and 

251 # list. 

252 external = set(parameters) 

253 

254 diff = external - self.knownParameters() 

255 if diff: 

256 s = "s" if len(diff) > 1 else "" 

257 unknown = '\', \''.join(diff) 

258 raise KeyError(f"Parameter{s} '{unknown}' not understood by StorageClass {self.name}") 

259 

260 def filterParameters(self, parameters: Dict[str, Any], 

261 subset: Collection = None) -> Dict[str, Any]: 

262 """Filter out parameters that are not known to this StorageClass 

263 

264 Parameters 

265 ---------- 

266 parameters : `dict`, optional 

267 Candidate parameters. Can be `None` if no parameters have 

268 been provided. 

269 subset : `~collections.abc.Collection`, optional 

270 Subset of supported parameters that the caller is interested 

271 in using. The subset must be known to the `StorageClass` 

272 if specified. If `None` the supplied parameters will all 

273 be checked, else only the keys in this set will be checked. 

274 

275 Returns 

276 ------- 

277 filtered : `dict` 

278 Valid parameters. Empty `dict` if none are suitable. 

279 

280 Raises 

281 ------ 

282 ValueError 

283 Raised if the provided subset is not a subset of the supported 

284 parameters or if it is an empty set. 

285 """ 

286 if not parameters: 

287 return {} 

288 

289 known = self.knownParameters() 

290 

291 if subset is not None: 

292 if not subset: 

293 raise ValueError("Specified a parameter subset but it was empty") 

294 subset = set(subset) 

295 if not subset.issubset(known): 

296 raise ValueError(f"Requested subset ({subset}) is not a subset of" 

297 f" known parameters ({known})") 

298 wanted = subset 

299 else: 

300 wanted = known 

301 

302 return {k: parameters[k] for k in wanted if k in parameters} 

303 

304 def validateInstance(self, instance: Any) -> bool: 

305 """Check that the supplied Python object has the expected Python type 

306 

307 Parameters 

308 ---------- 

309 instance : `object` 

310 Object to check. 

311 

312 Returns 

313 ------- 

314 isOk : `bool` 

315 True if the supplied instance object can be handled by this 

316 `StorageClass`, False otherwise. 

317 """ 

318 return isinstance(instance, self.pytype) 

319 

320 def __eq__(self, other: Any) -> bool: 

321 """Equality checks name, pytype name, assembler name, and components""" 

322 

323 if not isinstance(other, StorageClass): 

324 return False 

325 

326 if self.name != other.name: 

327 return False 

328 

329 # We must compare pytype and assembler by name since we do not want 

330 # to trigger an import of external module code here 

331 if self._assemblerClassName != other._assemblerClassName: 

332 return False 

333 if self._pytypeName != other._pytypeName: 

334 return False 

335 

336 # Ensure we have the same component keys in each 

337 if set(self.components.keys()) != set(other.components.keys()): 

338 return False 

339 

340 # Same parameters 

341 if self.parameters != other.parameters: 

342 return False 

343 

344 # Ensure that all the components have the same type 

345 for k in self.components: 

346 if self.components[k] != other.components[k]: 

347 return False 

348 

349 # If we got to this point everything checks out 

350 return True 

351 

352 def __hash__(self) -> int: 

353 return hash(self.name) 

354 

355 def __repr__(self) -> str: 

356 optionals: Dict[str, Any] = {} 

357 if self._pytypeName != "object": 

358 optionals["pytype"] = self._pytypeName 

359 if self._assemblerClassName is not None: 

360 optionals["assembler"] = self._assemblerClassName 

361 if self._parameters: 

362 optionals["parameters"] = self._parameters 

363 if self.components: 

364 optionals["components"] = self.components 

365 

366 # order is preserved in the dict 

367 options = ", ".join(f"{k}={v!r}" for k, v in optionals.items()) 

368 

369 # Start with mandatory fields 

370 r = f"{self.__class__.__name__}({self.name!r}" 

371 if options: 

372 r = r + ", " + options 

373 r = r + ")" 

374 return r 

375 

376 def __str__(self) -> str: 

377 return self.name 

378 

379 

380class StorageClassFactory(metaclass=Singleton): 

381 """Factory for `StorageClass` instances. 

382 

383 This class is a singleton, with each instance sharing the pool of 

384 StorageClasses. Since code can not know whether it is the first 

385 time the instance has been created, the constructor takes no arguments. 

386 To populate the factory with storage classes, a call to 

387 `~StorageClassFactory.addFromConfig()` should be made. 

388 

389 Parameters 

390 ---------- 

391 config : `StorageClassConfig` or `str`, optional 

392 Load configuration. In a ButlerConfig` the relevant configuration 

393 is located in the ``storageClasses`` section. 

394 """ 

395 

396 def __init__(self, config: Optional[Union[StorageClassConfig, str]] = None): 

397 self._storageClasses: Dict[str, StorageClass] = {} 

398 self._configs: List[StorageClassConfig] = [] 

399 

400 # Always seed with the default config 

401 self.addFromConfig(StorageClassConfig()) 

402 

403 if config is not None: 

404 self.addFromConfig(config) 

405 

406 def __str__(self) -> str: 

407 """Return summary of factory. 

408 

409 Returns 

410 ------- 

411 summary : `str` 

412 Summary of the factory status. 

413 """ 

414 sep = "\n" 

415 return f"""Number of registered StorageClasses: {len(self._storageClasses)} 

416 

417StorageClasses 

418-------------- 

419{sep.join(f"{s}: {self._storageClasses[s]}" for s in self._storageClasses)} 

420""" 

421 

422 def __contains__(self, storageClassOrName: Union[StorageClass, str]) -> bool: 

423 """Indicates whether the storage class exists in the factory. 

424 

425 Parameters 

426 ---------- 

427 storageClassOrName : `str` or `StorageClass` 

428 If `str` is given existence of the named StorageClass 

429 in the factory is checked. If `StorageClass` is given 

430 existence and equality are checked. 

431 

432 Returns 

433 ------- 

434 in : `bool` 

435 True if the supplied string is present, or if the supplied 

436 `StorageClass` is present and identical. 

437 

438 Notes 

439 ----- 

440 The two different checks (one for "key" and one for "value") based on 

441 the type of the given argument mean that it is possible for 

442 StorageClass.name to be in the factory but StorageClass to not be 

443 in the factory. 

444 """ 

445 if isinstance(storageClassOrName, str): 

446 return storageClassOrName in self._storageClasses 

447 elif isinstance(storageClassOrName, StorageClass): 

448 if storageClassOrName.name in self._storageClasses: 

449 return storageClassOrName == self._storageClasses[storageClassOrName.name] 

450 return False 

451 

452 def addFromConfig(self, config: Union[StorageClassConfig, Config, str]) -> None: 

453 """Add more `StorageClass` definitions from a config file. 

454 

455 Parameters 

456 ---------- 

457 config : `StorageClassConfig`, `Config` or `str` 

458 Storage class configuration. Can contain a ``storageClasses`` 

459 key if part of a global configuration. 

460 """ 

461 sconfig = StorageClassConfig(config) 

462 self._configs.append(sconfig) 

463 

464 # Since we can not assume that we will get definitions of 

465 # components or parents before their classes are defined 

466 # we have a helper function that we can call recursively 

467 # to extract definitions from the configuration. 

468 def processStorageClass(name: str, sconfig: StorageClassConfig) -> None: 

469 # Maybe we've already processed this through recursion 

470 if name not in sconfig: 

471 return 

472 info = sconfig.pop(name) 

473 

474 # Always create the storage class so we can ensure that 

475 # we are not trying to overwrite with a different definition 

476 components = None 

477 if "components" in info: 

478 components = {} 

479 for cname, ctype in info["components"].items(): 

480 if ctype not in self: 

481 processStorageClass(ctype, sconfig) 

482 components[cname] = self.getStorageClass(ctype) 

483 

484 # Extract scalar items from dict that are needed for 

485 # StorageClass Constructor 

486 storageClassKwargs = {k: info[k] for k in ("pytype", "assembler", "parameters") if k in info} 

487 

488 # Fill in other items 

489 storageClassKwargs["components"] = components 

490 

491 # Create the new storage class and register it 

492 baseClass = None 

493 if "inheritsFrom" in info: 

494 baseName = info["inheritsFrom"] 

495 if baseName not in self: 

496 processStorageClass(baseName, sconfig) 

497 baseClass = type(self.getStorageClass(baseName)) 

498 

499 newStorageClassType = self.makeNewStorageClass(name, baseClass, **storageClassKwargs) 

500 newStorageClass = newStorageClassType() 

501 self.registerStorageClass(newStorageClass) 

502 

503 for name in list(sconfig.keys()): 

504 processStorageClass(name, sconfig) 

505 

506 @staticmethod 

507 def makeNewStorageClass(name: str, 

508 baseClass: Optional[Type[StorageClass]] = StorageClass, 

509 **kwargs: Any) -> Type[StorageClass]: 

510 """Create a new Python class as a subclass of `StorageClass`. 

511 

512 Parameters 

513 ---------- 

514 name : `str` 

515 Name to use for this class. 

516 baseClass : `type`, optional 

517 Base class for this `StorageClass`. Must be either `StorageClass` 

518 or a subclass of `StorageClass`. If `None`, `StorageClass` will 

519 be used. 

520 

521 Returns 

522 ------- 

523 newtype : `type` subclass of `StorageClass` 

524 Newly created Python type. 

525 """ 

526 

527 if baseClass is None: 

528 baseClass = StorageClass 

529 if not issubclass(baseClass, StorageClass): 

530 raise ValueError(f"Base class must be a StorageClass not {baseClass}") 

531 

532 # convert the arguments to use different internal names 

533 clsargs = {f"_cls_{k}": v for k, v in kwargs.items() if v is not None} 

534 clsargs["_cls_name"] = name 

535 

536 # Some container items need to merge with the base class values 

537 # so that a child can inherit but override one bit. 

538 # lists (which you get from configs) are treated as sets for this to 

539 # work consistently. 

540 for k in ("components", "parameters"): 

541 classKey = f"_cls_{k}" 

542 if classKey in clsargs: 

543 baseValue = getattr(baseClass, classKey, None) 

544 if baseValue is not None: 

545 currentValue = clsargs[classKey] 

546 if isinstance(currentValue, dict): 

547 newValue = baseValue.copy() 

548 else: 

549 newValue = set(baseValue) 

550 newValue.update(currentValue) 

551 clsargs[classKey] = newValue 

552 

553 # If we have parameters they should be a frozen set so that the 

554 # parameters in the class can not be modified. 

555 pk = "_cls_parameters" 

556 if pk in clsargs: 

557 clsargs[pk] = frozenset(clsargs[pk]) 

558 

559 return type(f"StorageClass{name}", (baseClass,), clsargs) 

560 

561 def getStorageClass(self, storageClassName: str) -> StorageClass: 

562 """Get a StorageClass instance associated with the supplied name. 

563 

564 Parameters 

565 ---------- 

566 storageClassName : `str` 

567 Name of the storage class to retrieve. 

568 

569 Returns 

570 ------- 

571 instance : `StorageClass` 

572 Instance of the correct `StorageClass`. 

573 

574 Raises 

575 ------ 

576 KeyError 

577 The requested storage class name is not registered. 

578 """ 

579 return self._storageClasses[storageClassName] 

580 

581 def registerStorageClass(self, storageClass: StorageClass) -> None: 

582 """Store the `StorageClass` in the factory. 

583 

584 Will be indexed by `StorageClass.name` and will return instances 

585 of the supplied `StorageClass`. 

586 

587 Parameters 

588 ---------- 

589 storageClass : `StorageClass` 

590 Type of the Python `StorageClass` to register. 

591 

592 Raises 

593 ------ 

594 ValueError 

595 If a storage class has already been registered with 

596 storageClassName and the previous definition differs. 

597 """ 

598 if storageClass.name in self._storageClasses: 

599 existing = self.getStorageClass(storageClass.name) 

600 if existing != storageClass: 

601 raise ValueError(f"New definition for StorageClass {storageClass.name} ({storageClass}) " 

602 f"differs from current definition ({existing})") 

603 else: 

604 self._storageClasses[storageClass.name] = storageClass 

605 

606 def _unregisterStorageClass(self, storageClassName: str) -> None: 

607 """Remove the named StorageClass from the factory. 

608 

609 Parameters 

610 ---------- 

611 storageClassName : `str` 

612 Name of storage class to remove. 

613 

614 Raises 

615 ------ 

616 KeyError 

617 The named storage class is not registered. 

618 

619 Notes 

620 ----- 

621 This method is intended to simplify testing of StorageClassFactory 

622 functionality and it is not expected to be required for normal usage. 

623 """ 

624 del self._storageClasses[storageClassName]