Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45 

46from ..storageClass import StorageClass, StorageClassFactory 

47from ..dimensions import DimensionGraph 

48from ..configSupport import LookupKey 

49from ..json import from_json_generic, to_json_generic 

50 

51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true

52 from ..dimensions import Dimension, DimensionUniverse 

53 from ...registry import Registry 

54 

55 

56def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

57 if data is None: 

58 data = {} 

59 return MappingProxyType(data) 

60 

61 

62class DatasetType: 

63 r"""A named category of Datasets that defines how they are organized, 

64 related, and stored. 

65 

66 A concrete, final class whose instances represent `DatasetType`\ s. 

67 `DatasetType` instances may be constructed without a `Registry`, 

68 but they must be registered 

69 via `Registry.registerDatasetType()` before corresponding Datasets 

70 may be added. 

71 `DatasetType` instances are immutable. 

72 

73 Parameters 

74 ---------- 

75 name : `str` 

76 A string name for the Dataset; must correspond to the same 

77 `DatasetType` across all Registries. Names must start with an 

78 upper or lowercase letter, and may contain only letters, numbers, 

79 and underscores. Component dataset types should contain a single 

80 period separating the base dataset type name from the component name 

81 (and may be recursive). 

82 dimensions : `DimensionGraph` or iterable of `Dimension` 

83 Dimensions used to label and relate instances of this `DatasetType`. 

84 If not a `DimensionGraph`, ``universe`` must be provided as well. 

85 storageClass : `StorageClass` or `str` 

86 Instance of a `StorageClass` or name of `StorageClass` that defines 

87 how this `DatasetType` is persisted. 

88 parentStorageClass : `StorageClass` or `str`, optional 

89 Instance of a `StorageClass` or name of `StorageClass` that defines 

90 how the composite parent is persisted. Must be `None` if this 

91 is not a component. Mandatory if it is a component but can be the 

92 special temporary placeholder 

93 (`DatasetType.PlaceholderParentStorageClass`) to allow 

94 construction with an intent to finalize later. 

95 universe : `DimensionUniverse`, optional 

96 Set of all known dimensions, used to normalize ``dimensions`` if it 

97 is not already a `DimensionGraph`. 

98 isCalibration : `bool`, optional 

99 If `True`, this dataset type may be included in 

100 `~CollectionType.CALIBRATION` collections. 

101 """ 

102 

103 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

104 "_parentStorageClass", "_parentStorageClassName", 

105 "_isCalibration") 

106 

107 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

108 

109 PlaceholderParentStorageClass = StorageClass("PlaceHolder") 

110 """Placeholder StorageClass that can be used temporarily for a 

111 component. 

112 

113 This can be useful in pipeline construction where we are creating 

114 dataset types without a registry. 

115 """ 

116 

117 @staticmethod 

118 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

119 """Form a valid DatasetTypeName from a parent and component. 

120 

121 No validation is performed. 

122 

123 Parameters 

124 ---------- 

125 datasetTypeName : `str` 

126 Base type name. 

127 componentName : `str` 

128 Name of component. 

129 

130 Returns 

131 ------- 

132 compTypeName : `str` 

133 Name to use for component DatasetType. 

134 """ 

135 return "{}.{}".format(datasetTypeName, componentName) 

136 

137 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

138 storageClass: Union[StorageClass, str], 

139 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

140 universe: Optional[DimensionUniverse] = None, 

141 isCalibration: bool = False): 

142 if self.VALID_NAME_REGEX.match(name) is None: 

143 raise ValueError(f"DatasetType name '{name}' is invalid.") 

144 self._name = name 

145 if not isinstance(dimensions, DimensionGraph): 

146 if universe is None: 

147 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

148 "a universe must be provided.") 

149 dimensions = universe.extract(dimensions) 

150 self._dimensions = dimensions 

151 if name in self._dimensions.universe.getGovernorDimensions().names: 

152 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

153 if not isinstance(storageClass, (StorageClass, str)): 

154 raise ValueError("StorageClass argument must be StorageClass or str. " 

155 f"Got {storageClass}") 

156 self._storageClass: Optional[StorageClass] 

157 if isinstance(storageClass, StorageClass): 

158 self._storageClass = storageClass 

159 self._storageClassName = storageClass.name 

160 else: 

161 self._storageClass = None 

162 self._storageClassName = storageClass 

163 

164 self._parentStorageClass: Optional[StorageClass] = None 

165 self._parentStorageClassName: Optional[str] = None 

166 if parentStorageClass is not None: 

167 if not isinstance(storageClass, (StorageClass, str)): 

168 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

169 f"Got {parentStorageClass}") 

170 

171 # Only allowed for a component dataset type 

172 _, componentName = self.splitDatasetTypeName(self._name) 

173 if componentName is None: 

174 raise ValueError("Can not specify a parent storage class if this is not a component" 

175 f" ({self._name})") 

176 if isinstance(parentStorageClass, StorageClass): 

177 self._parentStorageClass = parentStorageClass 

178 self._parentStorageClassName = parentStorageClass.name 

179 else: 

180 self._parentStorageClassName = parentStorageClass 

181 

182 # Ensure that parent storage class is specified when we have 

183 # a component and is not specified when we don't 

184 _, componentName = self.splitDatasetTypeName(self._name) 

185 if parentStorageClass is None and componentName is not None: 

186 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

187 " storage class") 

188 if parentStorageClass is not None and componentName is None: 

189 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

190 self._isCalibration = isCalibration 

191 

192 def __repr__(self) -> str: 

193 extra = "" 

194 if self._parentStorageClassName: 

195 extra = f", parentStorageClass={self._parentStorageClassName}" 

196 if self._isCalibration: 

197 extra += ", isCalibration=True" 

198 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

199 

200 def __eq__(self, other: Any) -> bool: 

201 if not isinstance(other, type(self)): 

202 return False 

203 if self._name != other._name: 

204 return False 

205 if self._dimensions != other._dimensions: 

206 return False 

207 if self._storageClass is not None and other._storageClass is not None: 

208 if self._storageClass != other._storageClass: 

209 return False 

210 else: 

211 if self._storageClassName != other._storageClassName: 

212 return False 

213 if self._isCalibration != other._isCalibration: 

214 return False 

215 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

216 return self._parentStorageClass == other._parentStorageClass 

217 else: 

218 return self._parentStorageClassName == other._parentStorageClassName 

219 

220 def __hash__(self) -> int: 

221 """Hash DatasetType instance. 

222 

223 This only uses StorageClass name which is it consistent with the 

224 implementation of StorageClass hash method. 

225 """ 

226 return hash((self._name, self._dimensions, self._storageClassName, 

227 self._parentStorageClassName)) 

228 

229 def __lt__(self, other: Any) -> bool: 

230 """Sort using the dataset type name. 

231 """ 

232 if not isinstance(other, type(self)): 

233 return NotImplemented 

234 return self.name < other.name 

235 

236 @property 

237 def name(self) -> str: 

238 """A string name for the Dataset; must correspond to the same 

239 `DatasetType` across all Registries. 

240 """ 

241 return self._name 

242 

243 @property 

244 def dimensions(self) -> DimensionGraph: 

245 r"""The `Dimension`\ s that label and relate instances of this 

246 `DatasetType` (`DimensionGraph`). 

247 """ 

248 return self._dimensions 

249 

250 @property 

251 def storageClass(self) -> StorageClass: 

252 """`StorageClass` instance that defines how this `DatasetType` 

253 is persisted. Note that if DatasetType was constructed with a name 

254 of a StorageClass then Butler has to be initialized before using 

255 this property. 

256 """ 

257 if self._storageClass is None: 

258 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

259 return self._storageClass 

260 

261 @property 

262 def parentStorageClass(self) -> Optional[StorageClass]: 

263 """`StorageClass` instance that defines how the composite associated 

264 with this `DatasetType` is persisted. 

265 

266 Note that if DatasetType was constructed with a name of a 

267 StorageClass then Butler has to be initialized before using this 

268 property. Can be `None` if this is not a component of a composite. 

269 Must be defined if this is a component. 

270 """ 

271 if self._parentStorageClass is None and self._parentStorageClassName is None: 

272 return None 

273 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

274 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

275 return self._parentStorageClass 

276 

277 def isCalibration(self) -> bool: 

278 """Return whether datasets of this type may be included in calibration 

279 collections. 

280 

281 Returns 

282 ------- 

283 flag : `bool` 

284 `True` if datasets of this type may be included in calibration 

285 collections. 

286 """ 

287 return self._isCalibration 

288 

289 def finalizeParentStorageClass(self, newParent: StorageClass) -> None: 

290 """Replace the current placeholder parent storage class with 

291 the real parent. 

292 

293 Parameters 

294 ---------- 

295 newParent : `StorageClass` 

296 The new parent to be associated with this composite dataset 

297 type. This replaces the temporary placeholder parent that 

298 was specified during construction. 

299 

300 Raises 

301 ------ 

302 ValueError 

303 Raised if this dataset type is not a component of a composite. 

304 Raised if a StorageClass is not given. 

305 Raised if the parent currently associated with the dataset 

306 type is not a placeholder. 

307 """ 

308 if not self.isComponent(): 

309 raise ValueError("Can not set a parent storage class if this is not a component" 

310 f" ({self.name})") 

311 if self._parentStorageClass != self.PlaceholderParentStorageClass: 

312 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and" 

313 " is not a placeholder.") 

314 if not isinstance(newParent, StorageClass): 

315 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}") 

316 self._parentStorageClass = newParent 

317 self._parentStorageClassName = newParent.name 

318 

319 @staticmethod 

320 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

321 """Given a dataset type name, return the root name and the component 

322 name. 

323 

324 Parameters 

325 ---------- 

326 datasetTypeName : `str` 

327 The name of the dataset type, can include a component using 

328 a "."-separator. 

329 

330 Returns 

331 ------- 

332 rootName : `str` 

333 Root name without any components. 

334 componentName : `str` 

335 The component if it has been specified, else `None`. 

336 

337 Notes 

338 ----- 

339 If the dataset type name is ``a.b.c`` this method will return a 

340 root name of ``a`` and a component name of ``b.c``. 

341 """ 

342 comp = None 

343 root = datasetTypeName 

344 if "." in root: 

345 # If there is doubt, the component is after the first "." 

346 root, comp = root.split(".", maxsplit=1) 

347 return root, comp 

348 

349 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

350 """Return the root name of this dataset type and the component 

351 name (if defined). 

352 

353 Returns 

354 ------- 

355 rootName : `str` 

356 Root name for this `DatasetType` without any components. 

357 componentName : `str` 

358 The component if it has been specified, else `None`. 

359 """ 

360 return self.splitDatasetTypeName(self.name) 

361 

362 def component(self) -> Optional[str]: 

363 """Component name (if defined) 

364 

365 Returns 

366 ------- 

367 comp : `str` 

368 Name of component part of DatasetType name. `None` if this 

369 `DatasetType` is not associated with a component. 

370 """ 

371 _, comp = self.nameAndComponent() 

372 return comp 

373 

374 def componentTypeName(self, component: str) -> str: 

375 """Given a component name, derive the datasetTypeName of that component 

376 

377 Parameters 

378 ---------- 

379 component : `str` 

380 Name of component 

381 

382 Returns 

383 ------- 

384 derived : `str` 

385 Compound name of this `DatasetType` and the component. 

386 

387 Raises 

388 ------ 

389 KeyError 

390 Requested component is not supported by this `DatasetType`. 

391 """ 

392 if component in self.storageClass.allComponents(): 

393 return self.nameWithComponent(self.name, component) 

394 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

395 

396 def makeComponentDatasetType(self, component: str) -> DatasetType: 

397 """Return a DatasetType suitable for the given component, assuming the 

398 same dimensions as the parent. 

399 

400 Parameters 

401 ---------- 

402 component : `str` 

403 Name of component 

404 

405 Returns 

406 ------- 

407 datasetType : `DatasetType` 

408 A new DatasetType instance. 

409 """ 

410 # The component could be a read/write or read component 

411 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

412 storageClass=self.storageClass.allComponents()[component], 

413 parentStorageClass=self.storageClass) 

414 

415 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

416 """Return all the component dataset types assocaited with this 

417 dataset type. 

418 

419 Returns 

420 ------- 

421 all : `list` of `DatasetType` 

422 All the component dataset types. If this is not a composite 

423 then returns an empty list. 

424 """ 

425 return [self.makeComponentDatasetType(componentName) 

426 for componentName in self.storageClass.allComponents()] 

427 

428 def isComponent(self) -> bool: 

429 """Boolean indicating whether this `DatasetType` refers to a 

430 component of a composite. 

431 

432 Returns 

433 ------- 

434 isComponent : `bool` 

435 `True` if this `DatasetType` is a component, `False` otherwise. 

436 """ 

437 if self.component(): 

438 return True 

439 return False 

440 

441 def isComposite(self) -> bool: 

442 """Boolean indicating whether this `DatasetType` is a composite type. 

443 

444 Returns 

445 ------- 

446 isComposite : `bool` 

447 `True` if this `DatasetType` is a composite type, `False` 

448 otherwise. 

449 """ 

450 return self.storageClass.isComposite() 

451 

452 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

453 """Name keys to use when looking up this datasetType in a 

454 configuration. 

455 

456 The names are returned in order of priority. 

457 

458 Returns 

459 ------- 

460 names : `tuple` of `LookupKey` 

461 Tuple of the `DatasetType` name and the `StorageClass` name. 

462 If the name includes a component the name with the component 

463 is first, then the name without the component and finally 

464 the storage class name. 

465 """ 

466 rootName, componentName = self.nameAndComponent() 

467 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

468 if componentName is not None: 

469 lookups = lookups + (LookupKey(name=rootName),) 

470 

471 if self.dimensions: 

472 # Dimensions are a lower priority than dataset type name 

473 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

474 

475 return lookups + self.storageClass._lookupNames() 

476 

477 def to_simple(self, minimal: bool = False) -> Union[Dict, str]: 

478 """Convert this class to a simple python type suitable for 

479 serialization. 

480 

481 Parameters 

482 ---------- 

483 minimal : `bool`, optional 

484 Use minimal serialization. Requires Registry to convert 

485 back to a full type. 

486 

487 Returns 

488 ------- 

489 simple : `dict` or `str` 

490 The object converted to a dictionary or a simple string. 

491 """ 

492 if minimal: 

493 # Only needs the name. 

494 return self.name 

495 

496 # Convert to a dict form 

497 as_dict = {"name": self.name, 

498 "storageClass": self._storageClassName, 

499 "isCalibration": self._isCalibration, 

500 "dimensions": self.dimensions.to_simple(), 

501 } 

502 

503 if self._parentStorageClassName is not None: 

504 as_dict["parentStorageClass"] = self._parentStorageClassName 

505 return as_dict 

506 

507 @classmethod 

508 def from_simple(cls, simple: Union[Dict, str], 

509 universe: Optional[DimensionUniverse] = None, 

510 registry: Optional[Registry] = None) -> DatasetType: 

511 """Construct a new object from the data returned from the `to_simple` 

512 method. 

513 

514 Parameters 

515 ---------- 

516 simple : `dict` of [`str`, `Any`] or `str` 

517 The value returned by `to_simple()`. 

518 universe : `DimensionUniverse` 

519 The special graph of all known dimensions of which this graph will 

520 be a subset. Can be `None` if a registry is provided. 

521 registry : `lsst.daf.butler.Registry`, optional 

522 Registry to use to convert simple name of a DatasetType to 

523 a full `DatasetType`. Can be `None` if a full description of 

524 the type is provided along with a universe. 

525 

526 Returns 

527 ------- 

528 datasetType : `DatasetType` 

529 Newly-constructed object. 

530 """ 

531 if isinstance(simple, str): 

532 if registry is None: 

533 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType" 

534 " without a Registry") 

535 return registry.getDatasetType(simple) 

536 

537 if universe is None and registry is None: 

538 raise ValueError("One of universe or registry must be provided.") 

539 

540 if universe is None and registry is not None: 

541 # registry should not be none by now but test helps mypy 

542 universe = registry.dimensions 

543 

544 if universe is None: 

545 # this is for mypy 

546 raise ValueError("Unable to determine a usable universe") 

547 

548 return cls(name=simple["name"], 

549 dimensions=DimensionGraph.from_simple(simple["dimensions"], universe=universe), 

550 storageClass=simple["storageClass"], 

551 isCalibration=simple.get("isCalibration", False), 

552 parentStorageClass=simple.get("parentStorageClass"), 

553 universe=universe) 

554 

555 to_json = to_json_generic 

556 from_json = classmethod(from_json_generic) 

557 

558 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

559 Tuple[str, DimensionGraph, str, Optional[str]], 

560 Dict[str, bool]]]: 

561 """Support pickling. 

562 

563 StorageClass instances can not normally be pickled, so we pickle 

564 StorageClass name instead of instance. 

565 """ 

566 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

567 self._parentStorageClassName), 

568 {"isCalibration": self._isCalibration}) 

569 

570 def __deepcopy__(self, memo: Any) -> DatasetType: 

571 """Support for deep copy method. 

572 

573 Normally ``deepcopy`` will use pickle mechanism to make copies. 

574 We want to avoid that to support (possibly degenerate) use case when 

575 DatasetType is constructed with StorageClass instance which is not 

576 registered with StorageClassFactory (this happens in unit tests). 

577 Instead we re-implement ``__deepcopy__`` method. 

578 """ 

579 return DatasetType(name=deepcopy(self.name, memo), 

580 dimensions=deepcopy(self.dimensions, memo), 

581 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

582 parentStorageClass=deepcopy(self._parentStorageClass 

583 or self._parentStorageClassName, memo), 

584 isCalibration=deepcopy(self._isCalibration, memo)) 

585 

586 

587def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

588 """Unpickle something by calling a factory 

589 

590 Allows subclasses to unpickle using `__reduce__` with keyword 

591 arguments as well as positional arguments. 

592 """ 

593 return factory(*args, **kwargs)