Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45 

46from ..storageClass import StorageClass, StorageClassFactory 

47from ..dimensions import DimensionGraph 

48from ..configSupport import LookupKey 

49from ..json import from_json_generic, to_json_generic 

50 

51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true

52 from ..dimensions import Dimension, DimensionUniverse 

53 from ...registry import Registry 

54 

55 

56def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

57 if data is None: 

58 data = {} 

59 return MappingProxyType(data) 

60 

61 

62class DatasetType: 

63 r"""A named category of Datasets that defines how they are organized, 

64 related, and stored. 

65 

66 A concrete, final class whose instances represent `DatasetType`\ s. 

67 `DatasetType` instances may be constructed without a `Registry`, 

68 but they must be registered 

69 via `Registry.registerDatasetType()` before corresponding Datasets 

70 may be added. 

71 `DatasetType` instances are immutable. 

72 

73 Parameters 

74 ---------- 

75 name : `str` 

76 A string name for the Dataset; must correspond to the same 

77 `DatasetType` across all Registries. Names must start with an 

78 upper or lowercase letter, and may contain only letters, numbers, 

79 and underscores. Component dataset types should contain a single 

80 period separating the base dataset type name from the component name 

81 (and may be recursive). 

82 dimensions : `DimensionGraph` or iterable of `Dimension` 

83 Dimensions used to label and relate instances of this `DatasetType`. 

84 If not a `DimensionGraph`, ``universe`` must be provided as well. 

85 storageClass : `StorageClass` or `str` 

86 Instance of a `StorageClass` or name of `StorageClass` that defines 

87 how this `DatasetType` is persisted. 

88 parentStorageClass : `StorageClass` or `str`, optional 

89 Instance of a `StorageClass` or name of `StorageClass` that defines 

90 how the composite parent is persisted. Must be `None` if this 

91 is not a component. Mandatory if it is a component but can be the 

92 special temporary placeholder 

93 (`DatasetType.PlaceholderParentStorageClass`) to allow 

94 construction with an intent to finalize later. 

95 universe : `DimensionUniverse`, optional 

96 Set of all known dimensions, used to normalize ``dimensions`` if it 

97 is not already a `DimensionGraph`. 

98 isCalibration : `bool`, optional 

99 If `True`, this dataset type may be included in 

100 `~CollectionType.CALIBRATION` collections. 

101 """ 

102 

103 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

104 "_parentStorageClass", "_parentStorageClassName", 

105 "_isCalibration") 

106 

107 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

108 

109 PlaceholderParentStorageClass = StorageClass("PlaceHolder") 

110 """Placeholder StorageClass that can be used temporarily for a 

111 component. 

112 

113 This can be useful in pipeline construction where we are creating 

114 dataset types without a registry. 

115 """ 

116 

117 @staticmethod 

118 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

119 """Form a valid DatasetTypeName from a parent and component. 

120 

121 No validation is performed. 

122 

123 Parameters 

124 ---------- 

125 datasetTypeName : `str` 

126 Base type name. 

127 componentName : `str` 

128 Name of component. 

129 

130 Returns 

131 ------- 

132 compTypeName : `str` 

133 Name to use for component DatasetType. 

134 """ 

135 return "{}.{}".format(datasetTypeName, componentName) 

136 

137 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

138 storageClass: Union[StorageClass, str], 

139 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

140 universe: Optional[DimensionUniverse] = None, 

141 isCalibration: bool = False): 

142 if self.VALID_NAME_REGEX.match(name) is None: 

143 raise ValueError(f"DatasetType name '{name}' is invalid.") 

144 self._name = name 

145 if not isinstance(dimensions, DimensionGraph): 

146 if universe is None: 

147 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

148 "a universe must be provided.") 

149 dimensions = universe.extract(dimensions) 

150 self._dimensions = dimensions 

151 if name in self._dimensions.universe.getGovernorDimensions().names: 

152 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

153 if not isinstance(storageClass, (StorageClass, str)): 

154 raise ValueError("StorageClass argument must be StorageClass or str. " 

155 f"Got {storageClass}") 

156 self._storageClass: Optional[StorageClass] 

157 if isinstance(storageClass, StorageClass): 

158 self._storageClass = storageClass 

159 self._storageClassName = storageClass.name 

160 else: 

161 self._storageClass = None 

162 self._storageClassName = storageClass 

163 

164 self._parentStorageClass: Optional[StorageClass] = None 

165 self._parentStorageClassName: Optional[str] = None 

166 if parentStorageClass is not None: 

167 if not isinstance(storageClass, (StorageClass, str)): 

168 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

169 f"Got {parentStorageClass}") 

170 

171 # Only allowed for a component dataset type 

172 _, componentName = self.splitDatasetTypeName(self._name) 

173 if componentName is None: 

174 raise ValueError("Can not specify a parent storage class if this is not a component" 

175 f" ({self._name})") 

176 if isinstance(parentStorageClass, StorageClass): 

177 self._parentStorageClass = parentStorageClass 

178 self._parentStorageClassName = parentStorageClass.name 

179 else: 

180 self._parentStorageClassName = parentStorageClass 

181 

182 # Ensure that parent storage class is specified when we have 

183 # a component and is not specified when we don't 

184 _, componentName = self.splitDatasetTypeName(self._name) 

185 if parentStorageClass is None and componentName is not None: 

186 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

187 " storage class") 

188 if parentStorageClass is not None and componentName is None: 

189 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

190 self._isCalibration = isCalibration 

191 

192 def __repr__(self) -> str: 

193 extra = "" 

194 if self._parentStorageClassName: 

195 extra = f", parentStorageClass={self._parentStorageClassName}" 

196 if self._isCalibration: 

197 extra += ", isCalibration=True" 

198 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

199 

200 def __eq__(self, other: Any) -> bool: 

201 if not isinstance(other, type(self)): 

202 return False 

203 if self._name != other._name: 

204 return False 

205 if self._dimensions != other._dimensions: 

206 return False 

207 if self._storageClass is not None and other._storageClass is not None: 

208 if self._storageClass != other._storageClass: 

209 return False 

210 else: 

211 if self._storageClassName != other._storageClassName: 

212 return False 

213 if self._isCalibration != other._isCalibration: 

214 return False 

215 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

216 return self._parentStorageClass == other._parentStorageClass 

217 else: 

218 return self._parentStorageClassName == other._parentStorageClassName 

219 

220 def __hash__(self) -> int: 

221 """Hash DatasetType instance. 

222 

223 This only uses StorageClass name which is it consistent with the 

224 implementation of StorageClass hash method. 

225 """ 

226 return hash((self._name, self._dimensions, self._storageClassName, 

227 self._parentStorageClassName)) 

228 

229 def __lt__(self, other: Any) -> bool: 

230 """Sort using the dataset type name. 

231 """ 

232 if not isinstance(other, type(self)): 

233 return NotImplemented 

234 return self.name < other.name 

235 

236 @property 

237 def name(self) -> str: 

238 """A string name for the Dataset; must correspond to the same 

239 `DatasetType` across all Registries. 

240 """ 

241 return self._name 

242 

243 @property 

244 def dimensions(self) -> DimensionGraph: 

245 r"""The `Dimension`\ s that label and relate instances of this 

246 `DatasetType` (`DimensionGraph`). 

247 """ 

248 return self._dimensions 

249 

250 @property 

251 def storageClass(self) -> StorageClass: 

252 """`StorageClass` instance that defines how this `DatasetType` 

253 is persisted. Note that if DatasetType was constructed with a name 

254 of a StorageClass then Butler has to be initialized before using 

255 this property. 

256 """ 

257 if self._storageClass is None: 

258 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

259 return self._storageClass 

260 

261 @property 

262 def parentStorageClass(self) -> Optional[StorageClass]: 

263 """`StorageClass` instance that defines how the composite associated 

264 with this `DatasetType` is persisted. 

265 

266 Note that if DatasetType was constructed with a name of a 

267 StorageClass then Butler has to be initialized before using this 

268 property. Can be `None` if this is not a component of a composite. 

269 Must be defined if this is a component. 

270 """ 

271 if self._parentStorageClass is None and self._parentStorageClassName is None: 

272 return None 

273 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

274 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

275 return self._parentStorageClass 

276 

277 def isCalibration(self) -> bool: 

278 """Return whether datasets of this type may be included in calibration 

279 collections. 

280 

281 Returns 

282 ------- 

283 flag : `bool` 

284 `True` if datasets of this type may be included in calibration 

285 collections. 

286 """ 

287 return self._isCalibration 

288 

289 def finalizeParentStorageClass(self, newParent: StorageClass) -> None: 

290 """Replace the current placeholder parent storage class with 

291 the real parent. 

292 

293 Parameters 

294 ---------- 

295 newParent : `StorageClass` 

296 The new parent to be associated with this composite dataset 

297 type. This replaces the temporary placeholder parent that 

298 was specified during construction. 

299 

300 Raises 

301 ------ 

302 ValueError 

303 Raised if this dataset type is not a component of a composite. 

304 Raised if a StorageClass is not given. 

305 Raised if the parent currently associated with the dataset 

306 type is not a placeholder. 

307 """ 

308 if not self.isComponent(): 

309 raise ValueError("Can not set a parent storage class if this is not a component" 

310 f" ({self.name})") 

311 if self._parentStorageClass != self.PlaceholderParentStorageClass: 

312 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and" 

313 " is not a placeholder.") 

314 if not isinstance(newParent, StorageClass): 

315 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}") 

316 self._parentStorageClass = newParent 

317 self._parentStorageClassName = newParent.name 

318 

319 @staticmethod 

320 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

321 """Given a dataset type name, return the root name and the component 

322 name. 

323 

324 Parameters 

325 ---------- 

326 datasetTypeName : `str` 

327 The name of the dataset type, can include a component using 

328 a "."-separator. 

329 

330 Returns 

331 ------- 

332 rootName : `str` 

333 Root name without any components. 

334 componentName : `str` 

335 The component if it has been specified, else `None`. 

336 

337 Notes 

338 ----- 

339 If the dataset type name is ``a.b.c`` this method will return a 

340 root name of ``a`` and a component name of ``b.c``. 

341 """ 

342 comp = None 

343 root = datasetTypeName 

344 if "." in root: 

345 # If there is doubt, the component is after the first "." 

346 root, comp = root.split(".", maxsplit=1) 

347 return root, comp 

348 

349 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

350 """Return the root name of this dataset type and the component 

351 name (if defined). 

352 

353 Returns 

354 ------- 

355 rootName : `str` 

356 Root name for this `DatasetType` without any components. 

357 componentName : `str` 

358 The component if it has been specified, else `None`. 

359 """ 

360 return self.splitDatasetTypeName(self.name) 

361 

362 def component(self) -> Optional[str]: 

363 """Component name (if defined) 

364 

365 Returns 

366 ------- 

367 comp : `str` 

368 Name of component part of DatasetType name. `None` if this 

369 `DatasetType` is not associated with a component. 

370 """ 

371 _, comp = self.nameAndComponent() 

372 return comp 

373 

374 def componentTypeName(self, component: str) -> str: 

375 """Given a component name, derive the datasetTypeName of that component 

376 

377 Parameters 

378 ---------- 

379 component : `str` 

380 Name of component 

381 

382 Returns 

383 ------- 

384 derived : `str` 

385 Compound name of this `DatasetType` and the component. 

386 

387 Raises 

388 ------ 

389 KeyError 

390 Requested component is not supported by this `DatasetType`. 

391 """ 

392 if component in self.storageClass.allComponents(): 

393 return self.nameWithComponent(self.name, component) 

394 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

395 

396 def makeCompositeDatasetType(self) -> DatasetType: 

397 """Return a DatasetType suitable for the composite version of this 

398 component dataset type. 

399 

400 Returns 

401 ------- 

402 composite : `DatasetType` 

403 The composite dataset type. 

404 

405 Raises 

406 ------ 

407 RuntimeError 

408 Raised if this dataset type is not a component dataset type. 

409 """ 

410 if not self.isComponent(): 

411 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

412 composite_name, _ = self.nameAndComponent() 

413 if self.parentStorageClass is None: 

414 raise ValueError("Parent storage class is not set. " 

415 f"Unable to create composite type from {self.name}") 

416 return DatasetType(composite_name, dimensions=self.dimensions, 

417 storageClass=self.parentStorageClass) 

418 

419 def makeComponentDatasetType(self, component: str) -> DatasetType: 

420 """Return a DatasetType suitable for the given component, assuming the 

421 same dimensions as the parent. 

422 

423 Parameters 

424 ---------- 

425 component : `str` 

426 Name of component 

427 

428 Returns 

429 ------- 

430 datasetType : `DatasetType` 

431 A new DatasetType instance. 

432 """ 

433 # The component could be a read/write or read component 

434 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

435 storageClass=self.storageClass.allComponents()[component], 

436 parentStorageClass=self.storageClass) 

437 

438 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

439 """Return all the component dataset types assocaited with this 

440 dataset type. 

441 

442 Returns 

443 ------- 

444 all : `list` of `DatasetType` 

445 All the component dataset types. If this is not a composite 

446 then returns an empty list. 

447 """ 

448 return [self.makeComponentDatasetType(componentName) 

449 for componentName in self.storageClass.allComponents()] 

450 

451 def isComponent(self) -> bool: 

452 """Boolean indicating whether this `DatasetType` refers to a 

453 component of a composite. 

454 

455 Returns 

456 ------- 

457 isComponent : `bool` 

458 `True` if this `DatasetType` is a component, `False` otherwise. 

459 """ 

460 if self.component(): 

461 return True 

462 return False 

463 

464 def isComposite(self) -> bool: 

465 """Boolean indicating whether this `DatasetType` is a composite type. 

466 

467 Returns 

468 ------- 

469 isComposite : `bool` 

470 `True` if this `DatasetType` is a composite type, `False` 

471 otherwise. 

472 """ 

473 return self.storageClass.isComposite() 

474 

475 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

476 """Name keys to use when looking up this datasetType in a 

477 configuration. 

478 

479 The names are returned in order of priority. 

480 

481 Returns 

482 ------- 

483 names : `tuple` of `LookupKey` 

484 Tuple of the `DatasetType` name and the `StorageClass` name. 

485 If the name includes a component the name with the component 

486 is first, then the name without the component and finally 

487 the storage class name. 

488 """ 

489 rootName, componentName = self.nameAndComponent() 

490 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

491 if componentName is not None: 

492 lookups = lookups + (LookupKey(name=rootName),) 

493 

494 if self.dimensions: 

495 # Dimensions are a lower priority than dataset type name 

496 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

497 

498 return lookups + self.storageClass._lookupNames() 

499 

500 def to_simple(self, minimal: bool = False) -> Union[Dict, str]: 

501 """Convert this class to a simple python type suitable for 

502 serialization. 

503 

504 Parameters 

505 ---------- 

506 minimal : `bool`, optional 

507 Use minimal serialization. Requires Registry to convert 

508 back to a full type. 

509 

510 Returns 

511 ------- 

512 simple : `dict` or `str` 

513 The object converted to a dictionary or a simple string. 

514 """ 

515 if minimal: 

516 # Only needs the name. 

517 return self.name 

518 

519 # Convert to a dict form 

520 as_dict = {"name": self.name, 

521 "storageClass": self._storageClassName, 

522 "isCalibration": self._isCalibration, 

523 "dimensions": self.dimensions.to_simple(), 

524 } 

525 

526 if self._parentStorageClassName is not None: 

527 as_dict["parentStorageClass"] = self._parentStorageClassName 

528 return as_dict 

529 

530 @classmethod 

531 def from_simple(cls, simple: Union[Dict, str], 

532 universe: Optional[DimensionUniverse] = None, 

533 registry: Optional[Registry] = None) -> DatasetType: 

534 """Construct a new object from the data returned from the `to_simple` 

535 method. 

536 

537 Parameters 

538 ---------- 

539 simple : `dict` of [`str`, `Any`] or `str` 

540 The value returned by `to_simple()`. 

541 universe : `DimensionUniverse` 

542 The special graph of all known dimensions of which this graph will 

543 be a subset. Can be `None` if a registry is provided. 

544 registry : `lsst.daf.butler.Registry`, optional 

545 Registry to use to convert simple name of a DatasetType to 

546 a full `DatasetType`. Can be `None` if a full description of 

547 the type is provided along with a universe. 

548 

549 Returns 

550 ------- 

551 datasetType : `DatasetType` 

552 Newly-constructed object. 

553 """ 

554 if isinstance(simple, str): 

555 if registry is None: 

556 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType" 

557 " without a Registry") 

558 return registry.getDatasetType(simple) 

559 

560 if universe is None and registry is None: 

561 raise ValueError("One of universe or registry must be provided.") 

562 

563 if universe is None and registry is not None: 

564 # registry should not be none by now but test helps mypy 

565 universe = registry.dimensions 

566 

567 if universe is None: 

568 # this is for mypy 

569 raise ValueError("Unable to determine a usable universe") 

570 

571 return cls(name=simple["name"], 

572 dimensions=DimensionGraph.from_simple(simple["dimensions"], universe=universe), 

573 storageClass=simple["storageClass"], 

574 isCalibration=simple.get("isCalibration", False), 

575 parentStorageClass=simple.get("parentStorageClass"), 

576 universe=universe) 

577 

578 to_json = to_json_generic 

579 from_json = classmethod(from_json_generic) 

580 

581 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

582 Tuple[str, DimensionGraph, str, Optional[str]], 

583 Dict[str, bool]]]: 

584 """Support pickling. 

585 

586 StorageClass instances can not normally be pickled, so we pickle 

587 StorageClass name instead of instance. 

588 """ 

589 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

590 self._parentStorageClassName), 

591 {"isCalibration": self._isCalibration}) 

592 

593 def __deepcopy__(self, memo: Any) -> DatasetType: 

594 """Support for deep copy method. 

595 

596 Normally ``deepcopy`` will use pickle mechanism to make copies. 

597 We want to avoid that to support (possibly degenerate) use case when 

598 DatasetType is constructed with StorageClass instance which is not 

599 registered with StorageClassFactory (this happens in unit tests). 

600 Instead we re-implement ``__deepcopy__`` method. 

601 """ 

602 return DatasetType(name=deepcopy(self.name, memo), 

603 dimensions=deepcopy(self.dimensions, memo), 

604 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

605 parentStorageClass=deepcopy(self._parentStorageClass 

606 or self._parentStorageClassName, memo), 

607 isCalibration=deepcopy(self._isCalibration, memo)) 

608 

609 

610def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

611 """Unpickle something by calling a factory 

612 

613 Allows subclasses to unpickle using `__reduce__` with keyword 

614 arguments as well as positional arguments. 

615 """ 

616 return factory(*args, **kwargs)