Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45 

46from ..storageClass import StorageClass, StorageClassFactory 

47from ..dimensions import DimensionGraph 

48from ..configSupport import LookupKey 

49from ..json import from_json_generic, to_json_generic 

50 

51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true

52 from ..dimensions import Dimension, DimensionUniverse 

53 from ...registry import Registry 

54 

55 

56def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

57 if data is None: 

58 data = {} 

59 return MappingProxyType(data) 

60 

61 

62class DatasetType: 

63 r"""A named category of Datasets. 

64 

65 Defines how they are organized, related, and stored. 

66 

67 A concrete, final class whose instances represent `DatasetType`\ s. 

68 `DatasetType` instances may be constructed without a `Registry`, 

69 but they must be registered 

70 via `Registry.registerDatasetType()` before corresponding Datasets 

71 may be added. 

72 `DatasetType` instances are immutable. 

73 

74 Parameters 

75 ---------- 

76 name : `str` 

77 A string name for the Dataset; must correspond to the same 

78 `DatasetType` across all Registries. Names must start with an 

79 upper or lowercase letter, and may contain only letters, numbers, 

80 and underscores. Component dataset types should contain a single 

81 period separating the base dataset type name from the component name 

82 (and may be recursive). 

83 dimensions : `DimensionGraph` or iterable of `Dimension` 

84 Dimensions used to label and relate instances of this `DatasetType`. 

85 If not a `DimensionGraph`, ``universe`` must be provided as well. 

86 storageClass : `StorageClass` or `str` 

87 Instance of a `StorageClass` or name of `StorageClass` that defines 

88 how this `DatasetType` is persisted. 

89 parentStorageClass : `StorageClass` or `str`, optional 

90 Instance of a `StorageClass` or name of `StorageClass` that defines 

91 how the composite parent is persisted. Must be `None` if this 

92 is not a component. Mandatory if it is a component but can be the 

93 special temporary placeholder 

94 (`DatasetType.PlaceholderParentStorageClass`) to allow 

95 construction with an intent to finalize later. 

96 universe : `DimensionUniverse`, optional 

97 Set of all known dimensions, used to normalize ``dimensions`` if it 

98 is not already a `DimensionGraph`. 

99 isCalibration : `bool`, optional 

100 If `True`, this dataset type may be included in 

101 `~CollectionType.CALIBRATION` collections. 

102 """ 

103 

104 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

105 "_parentStorageClass", "_parentStorageClassName", 

106 "_isCalibration") 

107 

108 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

109 

110 PlaceholderParentStorageClass = StorageClass("PlaceHolder") 

111 """Placeholder StorageClass that can be used temporarily for a 

112 component. 

113 

114 This can be useful in pipeline construction where we are creating 

115 dataset types without a registry. 

116 """ 

117 

118 @staticmethod 

119 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

120 """Form a valid DatasetTypeName from a parent and component. 

121 

122 No validation is performed. 

123 

124 Parameters 

125 ---------- 

126 datasetTypeName : `str` 

127 Base type name. 

128 componentName : `str` 

129 Name of component. 

130 

131 Returns 

132 ------- 

133 compTypeName : `str` 

134 Name to use for component DatasetType. 

135 """ 

136 return "{}.{}".format(datasetTypeName, componentName) 

137 

138 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

139 storageClass: Union[StorageClass, str], 

140 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

141 universe: Optional[DimensionUniverse] = None, 

142 isCalibration: bool = False): 

143 if self.VALID_NAME_REGEX.match(name) is None: 

144 raise ValueError(f"DatasetType name '{name}' is invalid.") 

145 self._name = name 

146 if not isinstance(dimensions, DimensionGraph): 

147 if universe is None: 

148 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

149 "a universe must be provided.") 

150 dimensions = universe.extract(dimensions) 

151 self._dimensions = dimensions 

152 if name in self._dimensions.universe.getGovernorDimensions().names: 

153 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

154 if not isinstance(storageClass, (StorageClass, str)): 

155 raise ValueError("StorageClass argument must be StorageClass or str. " 

156 f"Got {storageClass}") 

157 self._storageClass: Optional[StorageClass] 

158 if isinstance(storageClass, StorageClass): 

159 self._storageClass = storageClass 

160 self._storageClassName = storageClass.name 

161 else: 

162 self._storageClass = None 

163 self._storageClassName = storageClass 

164 

165 self._parentStorageClass: Optional[StorageClass] = None 

166 self._parentStorageClassName: Optional[str] = None 

167 if parentStorageClass is not None: 

168 if not isinstance(storageClass, (StorageClass, str)): 

169 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

170 f"Got {parentStorageClass}") 

171 

172 # Only allowed for a component dataset type 

173 _, componentName = self.splitDatasetTypeName(self._name) 

174 if componentName is None: 

175 raise ValueError("Can not specify a parent storage class if this is not a component" 

176 f" ({self._name})") 

177 if isinstance(parentStorageClass, StorageClass): 

178 self._parentStorageClass = parentStorageClass 

179 self._parentStorageClassName = parentStorageClass.name 

180 else: 

181 self._parentStorageClassName = parentStorageClass 

182 

183 # Ensure that parent storage class is specified when we have 

184 # a component and is not specified when we don't 

185 _, componentName = self.splitDatasetTypeName(self._name) 

186 if parentStorageClass is None and componentName is not None: 

187 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

188 " storage class") 

189 if parentStorageClass is not None and componentName is None: 

190 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

191 self._isCalibration = isCalibration 

192 

193 def __repr__(self) -> str: 

194 extra = "" 

195 if self._parentStorageClassName: 

196 extra = f", parentStorageClass={self._parentStorageClassName}" 

197 if self._isCalibration: 

198 extra += ", isCalibration=True" 

199 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

200 

201 def __eq__(self, other: Any) -> bool: 

202 if not isinstance(other, type(self)): 

203 return False 

204 if self._name != other._name: 

205 return False 

206 if self._dimensions != other._dimensions: 

207 return False 

208 if self._storageClass is not None and other._storageClass is not None: 

209 if self._storageClass != other._storageClass: 

210 return False 

211 else: 

212 if self._storageClassName != other._storageClassName: 

213 return False 

214 if self._isCalibration != other._isCalibration: 

215 return False 

216 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

217 return self._parentStorageClass == other._parentStorageClass 

218 else: 

219 return self._parentStorageClassName == other._parentStorageClassName 

220 

221 def __hash__(self) -> int: 

222 """Hash DatasetType instance. 

223 

224 This only uses StorageClass name which is it consistent with the 

225 implementation of StorageClass hash method. 

226 """ 

227 return hash((self._name, self._dimensions, self._storageClassName, 

228 self._parentStorageClassName)) 

229 

230 def __lt__(self, other: Any) -> bool: 

231 """Sort using the dataset type name.""" 

232 if not isinstance(other, type(self)): 

233 return NotImplemented 

234 return self.name < other.name 

235 

236 @property 

237 def name(self) -> str: 

238 """Return a string name for the Dataset. 

239 

240 Mmust correspond to the same `DatasetType` across all Registries. 

241 """ 

242 return self._name 

243 

244 @property 

245 def dimensions(self) -> DimensionGraph: 

246 r"""Return the `Dimension`\ s fir this dataset type. 

247 

248 The dimensions label and relate instances of this 

249 `DatasetType` (`DimensionGraph`). 

250 """ 

251 return self._dimensions 

252 

253 @property 

254 def storageClass(self) -> StorageClass: 

255 """Return `StorageClass` instance associated with this dataset type. 

256 

257 The `StorageClass` defines how this `DatasetType` 

258 is persisted. Note that if DatasetType was constructed with a name 

259 of a StorageClass then Butler has to be initialized before using 

260 this property. 

261 """ 

262 if self._storageClass is None: 

263 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

264 return self._storageClass 

265 

266 @property 

267 def parentStorageClass(self) -> Optional[StorageClass]: 

268 """Return the storage class of the composite containing this component. 

269 

270 Note that if DatasetType was constructed with a name of a 

271 StorageClass then Butler has to be initialized before using this 

272 property. Can be `None` if this is not a component of a composite. 

273 Must be defined if this is a component. 

274 """ 

275 if self._parentStorageClass is None and self._parentStorageClassName is None: 

276 return None 

277 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

278 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

279 return self._parentStorageClass 

280 

281 def isCalibration(self) -> bool: 

282 """Return if datasets of this type can be in calibration collections. 

283 

284 Returns 

285 ------- 

286 flag : `bool` 

287 `True` if datasets of this type may be included in calibration 

288 collections. 

289 """ 

290 return self._isCalibration 

291 

292 def finalizeParentStorageClass(self, newParent: StorageClass) -> None: 

293 """Finalize the parent storage class definition. 

294 

295 Replaces the current placeholder parent storage class with 

296 the real parent. 

297 

298 Parameters 

299 ---------- 

300 newParent : `StorageClass` 

301 The new parent to be associated with this composite dataset 

302 type. This replaces the temporary placeholder parent that 

303 was specified during construction. 

304 

305 Raises 

306 ------ 

307 ValueError 

308 Raised if this dataset type is not a component of a composite. 

309 Raised if a StorageClass is not given. 

310 Raised if the parent currently associated with the dataset 

311 type is not a placeholder. 

312 """ 

313 if not self.isComponent(): 

314 raise ValueError("Can not set a parent storage class if this is not a component" 

315 f" ({self.name})") 

316 if self._parentStorageClass != self.PlaceholderParentStorageClass: 

317 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and" 

318 " is not a placeholder.") 

319 if not isinstance(newParent, StorageClass): 

320 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}") 

321 self._parentStorageClass = newParent 

322 self._parentStorageClassName = newParent.name 

323 

324 @staticmethod 

325 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

326 """Return the root name and the component from a composite name. 

327 

328 Parameters 

329 ---------- 

330 datasetTypeName : `str` 

331 The name of the dataset type, can include a component using 

332 a "."-separator. 

333 

334 Returns 

335 ------- 

336 rootName : `str` 

337 Root name without any components. 

338 componentName : `str` 

339 The component if it has been specified, else `None`. 

340 

341 Notes 

342 ----- 

343 If the dataset type name is ``a.b.c`` this method will return a 

344 root name of ``a`` and a component name of ``b.c``. 

345 """ 

346 comp = None 

347 root = datasetTypeName 

348 if "." in root: 

349 # If there is doubt, the component is after the first "." 

350 root, comp = root.split(".", maxsplit=1) 

351 return root, comp 

352 

353 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

354 """Return the root name of this dataset type and any component. 

355 

356 Returns 

357 ------- 

358 rootName : `str` 

359 Root name for this `DatasetType` without any components. 

360 componentName : `str` 

361 The component if it has been specified, else `None`. 

362 """ 

363 return self.splitDatasetTypeName(self.name) 

364 

365 def component(self) -> Optional[str]: 

366 """Return the component name (if defined). 

367 

368 Returns 

369 ------- 

370 comp : `str` 

371 Name of component part of DatasetType name. `None` if this 

372 `DatasetType` is not associated with a component. 

373 """ 

374 _, comp = self.nameAndComponent() 

375 return comp 

376 

377 def componentTypeName(self, component: str) -> str: 

378 """Derive a component dataset type from a composite. 

379 

380 Parameters 

381 ---------- 

382 component : `str` 

383 Name of component 

384 

385 Returns 

386 ------- 

387 derived : `str` 

388 Compound name of this `DatasetType` and the component. 

389 

390 Raises 

391 ------ 

392 KeyError 

393 Requested component is not supported by this `DatasetType`. 

394 """ 

395 if component in self.storageClass.allComponents(): 

396 return self.nameWithComponent(self.name, component) 

397 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

398 

399 def makeCompositeDatasetType(self) -> DatasetType: 

400 """Return a composite dataset type from the component. 

401 

402 Returns 

403 ------- 

404 composite : `DatasetType` 

405 The composite dataset type. 

406 

407 Raises 

408 ------ 

409 RuntimeError 

410 Raised if this dataset type is not a component dataset type. 

411 """ 

412 if not self.isComponent(): 

413 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

414 composite_name, _ = self.nameAndComponent() 

415 if self.parentStorageClass is None: 

416 raise ValueError("Parent storage class is not set. " 

417 f"Unable to create composite type from {self.name}") 

418 return DatasetType(composite_name, dimensions=self.dimensions, 

419 storageClass=self.parentStorageClass) 

420 

421 def makeComponentDatasetType(self, component: str) -> DatasetType: 

422 """Return a component dataset type from a composite. 

423 

424 Assumes the same dimensions as the parent. 

425 

426 Parameters 

427 ---------- 

428 component : `str` 

429 Name of component 

430 

431 Returns 

432 ------- 

433 datasetType : `DatasetType` 

434 A new DatasetType instance. 

435 """ 

436 # The component could be a read/write or read component 

437 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

438 storageClass=self.storageClass.allComponents()[component], 

439 parentStorageClass=self.storageClass) 

440 

441 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

442 """Return all component dataset types for this composite. 

443 

444 Returns 

445 ------- 

446 all : `list` of `DatasetType` 

447 All the component dataset types. If this is not a composite 

448 then returns an empty list. 

449 """ 

450 return [self.makeComponentDatasetType(componentName) 

451 for componentName in self.storageClass.allComponents()] 

452 

453 def isComponent(self) -> bool: 

454 """Return whether this `DatasetType` refers to a component. 

455 

456 Returns 

457 ------- 

458 isComponent : `bool` 

459 `True` if this `DatasetType` is a component, `False` otherwise. 

460 """ 

461 if self.component(): 

462 return True 

463 return False 

464 

465 def isComposite(self) -> bool: 

466 """Return whether this `DatasetType` is a composite. 

467 

468 Returns 

469 ------- 

470 isComposite : `bool` 

471 `True` if this `DatasetType` is a composite type, `False` 

472 otherwise. 

473 """ 

474 return self.storageClass.isComposite() 

475 

476 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

477 """Return name keys to use for lookups in configurations. 

478 

479 The names are returned in order of priority. 

480 

481 Returns 

482 ------- 

483 names : `tuple` of `LookupKey` 

484 Tuple of the `DatasetType` name and the `StorageClass` name. 

485 If the name includes a component the name with the component 

486 is first, then the name without the component and finally 

487 the storage class name. 

488 """ 

489 rootName, componentName = self.nameAndComponent() 

490 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

491 if componentName is not None: 

492 lookups = lookups + (LookupKey(name=rootName),) 

493 

494 if self.dimensions: 

495 # Dimensions are a lower priority than dataset type name 

496 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

497 

498 return lookups + self.storageClass._lookupNames() 

499 

500 def to_simple(self, minimal: bool = False) -> Union[Dict, str]: 

501 """Convert this class to a simple python type. 

502 

503 This makes it suitable for serialization. 

504 

505 Parameters 

506 ---------- 

507 minimal : `bool`, optional 

508 Use minimal serialization. Requires Registry to convert 

509 back to a full type. 

510 

511 Returns 

512 ------- 

513 simple : `dict` or `str` 

514 The object converted to a dictionary or a simple string. 

515 """ 

516 if minimal: 

517 # Only needs the name. 

518 return self.name 

519 

520 # Convert to a dict form 

521 as_dict = {"name": self.name, 

522 "storageClass": self._storageClassName, 

523 "isCalibration": self._isCalibration, 

524 "dimensions": self.dimensions.to_simple(), 

525 } 

526 

527 if self._parentStorageClassName is not None: 

528 as_dict["parentStorageClass"] = self._parentStorageClassName 

529 return as_dict 

530 

531 @classmethod 

532 def from_simple(cls, simple: Union[Dict, str], 

533 universe: Optional[DimensionUniverse] = None, 

534 registry: Optional[Registry] = None) -> DatasetType: 

535 """Construct a new object from the simplified form. 

536 

537 This is usally data returned from the `to_simple` method. 

538 

539 Parameters 

540 ---------- 

541 simple : `dict` of [`str`, `Any`] or `str` 

542 The value returned by `to_simple()`. 

543 universe : `DimensionUniverse` 

544 The special graph of all known dimensions of which this graph will 

545 be a subset. Can be `None` if a registry is provided. 

546 registry : `lsst.daf.butler.Registry`, optional 

547 Registry to use to convert simple name of a DatasetType to 

548 a full `DatasetType`. Can be `None` if a full description of 

549 the type is provided along with a universe. 

550 

551 Returns 

552 ------- 

553 datasetType : `DatasetType` 

554 Newly-constructed object. 

555 """ 

556 if isinstance(simple, str): 

557 if registry is None: 

558 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType" 

559 " without a Registry") 

560 return registry.getDatasetType(simple) 

561 

562 if universe is None and registry is None: 

563 raise ValueError("One of universe or registry must be provided.") 

564 

565 if universe is None and registry is not None: 

566 # registry should not be none by now but test helps mypy 

567 universe = registry.dimensions 

568 

569 if universe is None: 

570 # this is for mypy 

571 raise ValueError("Unable to determine a usable universe") 

572 

573 return cls(name=simple["name"], 

574 dimensions=DimensionGraph.from_simple(simple["dimensions"], universe=universe), 

575 storageClass=simple["storageClass"], 

576 isCalibration=simple.get("isCalibration", False), 

577 parentStorageClass=simple.get("parentStorageClass"), 

578 universe=universe) 

579 

580 to_json = to_json_generic 

581 from_json = classmethod(from_json_generic) 

582 

583 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

584 Tuple[str, DimensionGraph, str, Optional[str]], 

585 Dict[str, bool]]]: 

586 """Support pickling. 

587 

588 StorageClass instances can not normally be pickled, so we pickle 

589 StorageClass name instead of instance. 

590 """ 

591 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

592 self._parentStorageClassName), 

593 {"isCalibration": self._isCalibration}) 

594 

595 def __deepcopy__(self, memo: Any) -> DatasetType: 

596 """Support for deep copy method. 

597 

598 Normally ``deepcopy`` will use pickle mechanism to make copies. 

599 We want to avoid that to support (possibly degenerate) use case when 

600 DatasetType is constructed with StorageClass instance which is not 

601 registered with StorageClassFactory (this happens in unit tests). 

602 Instead we re-implement ``__deepcopy__`` method. 

603 """ 

604 return DatasetType(name=deepcopy(self.name, memo), 

605 dimensions=deepcopy(self.dimensions, memo), 

606 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

607 parentStorageClass=deepcopy(self._parentStorageClass 

608 or self._parentStorageClassName, memo), 

609 isCalibration=deepcopy(self._isCalibration, memo)) 

610 

611 

612def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

613 """Unpickle something by calling a factory. 

614 

615 Allows subclasses to unpickle using `__reduce__` with keyword 

616 arguments as well as positional arguments. 

617 """ 

618 return factory(*args, **kwargs)