Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType", "SerializedDatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45from pydantic import BaseModel, StrictStr, StrictBool 

46 

47from ..storageClass import StorageClass, StorageClassFactory 

48from ..dimensions import DimensionGraph, SerializedDimensionGraph 

49from ..configSupport import LookupKey 

50from ..json import from_json_pydantic, to_json_pydantic 

51 

52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 from ..dimensions import Dimension, DimensionUniverse 

54 from ...registry import Registry 

55 

56 

57def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

58 if data is None: 

59 data = {} 

60 return MappingProxyType(data) 

61 

62 

63class SerializedDatasetType(BaseModel): 

64 """Simplified model of a `DatasetType` suitable for serialization.""" 

65 

66 name: StrictStr 

67 storageClass: Optional[StrictStr] = None 

68 dimensions: Optional[SerializedDimensionGraph] = None 

69 parentStorageClass: Optional[StrictStr] = None 

70 isCalibration: StrictBool = False 

71 

72 

73class DatasetType: 

74 r"""A named category of Datasets. 

75 

76 Defines how they are organized, related, and stored. 

77 

78 A concrete, final class whose instances represent `DatasetType`\ s. 

79 `DatasetType` instances may be constructed without a `Registry`, 

80 but they must be registered 

81 via `Registry.registerDatasetType()` before corresponding Datasets 

82 may be added. 

83 `DatasetType` instances are immutable. 

84 

85 Parameters 

86 ---------- 

87 name : `str` 

88 A string name for the Dataset; must correspond to the same 

89 `DatasetType` across all Registries. Names must start with an 

90 upper or lowercase letter, and may contain only letters, numbers, 

91 and underscores. Component dataset types should contain a single 

92 period separating the base dataset type name from the component name 

93 (and may be recursive). 

94 dimensions : `DimensionGraph` or iterable of `Dimension` 

95 Dimensions used to label and relate instances of this `DatasetType`. 

96 If not a `DimensionGraph`, ``universe`` must be provided as well. 

97 storageClass : `StorageClass` or `str` 

98 Instance of a `StorageClass` or name of `StorageClass` that defines 

99 how this `DatasetType` is persisted. 

100 parentStorageClass : `StorageClass` or `str`, optional 

101 Instance of a `StorageClass` or name of `StorageClass` that defines 

102 how the composite parent is persisted. Must be `None` if this 

103 is not a component. 

104 universe : `DimensionUniverse`, optional 

105 Set of all known dimensions, used to normalize ``dimensions`` if it 

106 is not already a `DimensionGraph`. 

107 isCalibration : `bool`, optional 

108 If `True`, this dataset type may be included in 

109 `~CollectionType.CALIBRATION` collections. 

110 """ 

111 

112 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

113 "_parentStorageClass", "_parentStorageClassName", 

114 "_isCalibration") 

115 

116 _serializedType = SerializedDatasetType 

117 

118 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

119 

120 @staticmethod 

121 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

122 """Form a valid DatasetTypeName from a parent and component. 

123 

124 No validation is performed. 

125 

126 Parameters 

127 ---------- 

128 datasetTypeName : `str` 

129 Base type name. 

130 componentName : `str` 

131 Name of component. 

132 

133 Returns 

134 ------- 

135 compTypeName : `str` 

136 Name to use for component DatasetType. 

137 """ 

138 return "{}.{}".format(datasetTypeName, componentName) 

139 

140 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

141 storageClass: Union[StorageClass, str], 

142 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

143 universe: Optional[DimensionUniverse] = None, 

144 isCalibration: bool = False): 

145 if self.VALID_NAME_REGEX.match(name) is None: 

146 raise ValueError(f"DatasetType name '{name}' is invalid.") 

147 self._name = name 

148 if not isinstance(dimensions, DimensionGraph): 

149 if universe is None: 

150 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

151 "a universe must be provided.") 

152 dimensions = universe.extract(dimensions) 

153 self._dimensions = dimensions 

154 if name in self._dimensions.universe.getGovernorDimensions().names: 

155 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

156 if not isinstance(storageClass, (StorageClass, str)): 

157 raise ValueError("StorageClass argument must be StorageClass or str. " 

158 f"Got {storageClass}") 

159 self._storageClass: Optional[StorageClass] 

160 if isinstance(storageClass, StorageClass): 

161 self._storageClass = storageClass 

162 self._storageClassName = storageClass.name 

163 else: 

164 self._storageClass = None 

165 self._storageClassName = storageClass 

166 

167 self._parentStorageClass: Optional[StorageClass] = None 

168 self._parentStorageClassName: Optional[str] = None 

169 if parentStorageClass is not None: 

170 if not isinstance(storageClass, (StorageClass, str)): 

171 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

172 f"Got {parentStorageClass}") 

173 

174 # Only allowed for a component dataset type 

175 _, componentName = self.splitDatasetTypeName(self._name) 

176 if componentName is None: 

177 raise ValueError("Can not specify a parent storage class if this is not a component" 

178 f" ({self._name})") 

179 if isinstance(parentStorageClass, StorageClass): 

180 self._parentStorageClass = parentStorageClass 

181 self._parentStorageClassName = parentStorageClass.name 

182 else: 

183 self._parentStorageClassName = parentStorageClass 

184 

185 # Ensure that parent storage class is specified when we have 

186 # a component and is not specified when we don't 

187 _, componentName = self.splitDatasetTypeName(self._name) 

188 if parentStorageClass is None and componentName is not None: 

189 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

190 " storage class") 

191 if parentStorageClass is not None and componentName is None: 

192 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

193 self._isCalibration = isCalibration 

194 

195 def __repr__(self) -> str: 

196 extra = "" 

197 if self._parentStorageClassName: 

198 extra = f", parentStorageClass={self._parentStorageClassName}" 

199 if self._isCalibration: 

200 extra += ", isCalibration=True" 

201 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

202 

203 def __eq__(self, other: Any) -> bool: 

204 if not isinstance(other, type(self)): 

205 return False 

206 if self._name != other._name: 

207 return False 

208 if self._dimensions != other._dimensions: 

209 return False 

210 if self._storageClass is not None and other._storageClass is not None: 

211 if self._storageClass != other._storageClass: 

212 return False 

213 else: 

214 if self._storageClassName != other._storageClassName: 

215 return False 

216 if self._isCalibration != other._isCalibration: 

217 return False 

218 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

219 return self._parentStorageClass == other._parentStorageClass 

220 else: 

221 return self._parentStorageClassName == other._parentStorageClassName 

222 

223 def __hash__(self) -> int: 

224 """Hash DatasetType instance. 

225 

226 This only uses StorageClass name which is it consistent with the 

227 implementation of StorageClass hash method. 

228 """ 

229 return hash((self._name, self._dimensions, self._storageClassName, 

230 self._parentStorageClassName)) 

231 

232 def __lt__(self, other: Any) -> bool: 

233 """Sort using the dataset type name.""" 

234 if not isinstance(other, type(self)): 

235 return NotImplemented 

236 return self.name < other.name 

237 

238 @property 

239 def name(self) -> str: 

240 """Return a string name for the Dataset. 

241 

242 Mmust correspond to the same `DatasetType` across all Registries. 

243 """ 

244 return self._name 

245 

246 @property 

247 def dimensions(self) -> DimensionGraph: 

248 r"""Return the `Dimension`\ s fir this dataset type. 

249 

250 The dimensions label and relate instances of this 

251 `DatasetType` (`DimensionGraph`). 

252 """ 

253 return self._dimensions 

254 

255 @property 

256 def storageClass(self) -> StorageClass: 

257 """Return `StorageClass` instance associated with this dataset type. 

258 

259 The `StorageClass` defines how this `DatasetType` 

260 is persisted. Note that if DatasetType was constructed with a name 

261 of a StorageClass then Butler has to be initialized before using 

262 this property. 

263 """ 

264 if self._storageClass is None: 

265 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

266 return self._storageClass 

267 

268 @property 

269 def parentStorageClass(self) -> Optional[StorageClass]: 

270 """Return the storage class of the composite containing this component. 

271 

272 Note that if DatasetType was constructed with a name of a 

273 StorageClass then Butler has to be initialized before using this 

274 property. Can be `None` if this is not a component of a composite. 

275 Must be defined if this is a component. 

276 """ 

277 if self._parentStorageClass is None and self._parentStorageClassName is None: 

278 return None 

279 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

280 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

281 return self._parentStorageClass 

282 

283 def isCalibration(self) -> bool: 

284 """Return if datasets of this type can be in calibration collections. 

285 

286 Returns 

287 ------- 

288 flag : `bool` 

289 `True` if datasets of this type may be included in calibration 

290 collections. 

291 """ 

292 return self._isCalibration 

293 

294 @staticmethod 

295 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

296 """Return the root name and the component from a composite name. 

297 

298 Parameters 

299 ---------- 

300 datasetTypeName : `str` 

301 The name of the dataset type, can include a component using 

302 a "."-separator. 

303 

304 Returns 

305 ------- 

306 rootName : `str` 

307 Root name without any components. 

308 componentName : `str` 

309 The component if it has been specified, else `None`. 

310 

311 Notes 

312 ----- 

313 If the dataset type name is ``a.b.c`` this method will return a 

314 root name of ``a`` and a component name of ``b.c``. 

315 """ 

316 comp = None 

317 root = datasetTypeName 

318 if "." in root: 

319 # If there is doubt, the component is after the first "." 

320 root, comp = root.split(".", maxsplit=1) 

321 return root, comp 

322 

323 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

324 """Return the root name of this dataset type and any component. 

325 

326 Returns 

327 ------- 

328 rootName : `str` 

329 Root name for this `DatasetType` without any components. 

330 componentName : `str` 

331 The component if it has been specified, else `None`. 

332 """ 

333 return self.splitDatasetTypeName(self.name) 

334 

335 def component(self) -> Optional[str]: 

336 """Return the component name (if defined). 

337 

338 Returns 

339 ------- 

340 comp : `str` 

341 Name of component part of DatasetType name. `None` if this 

342 `DatasetType` is not associated with a component. 

343 """ 

344 _, comp = self.nameAndComponent() 

345 return comp 

346 

347 def componentTypeName(self, component: str) -> str: 

348 """Derive a component dataset type from a composite. 

349 

350 Parameters 

351 ---------- 

352 component : `str` 

353 Name of component 

354 

355 Returns 

356 ------- 

357 derived : `str` 

358 Compound name of this `DatasetType` and the component. 

359 

360 Raises 

361 ------ 

362 KeyError 

363 Requested component is not supported by this `DatasetType`. 

364 """ 

365 if component in self.storageClass.allComponents(): 

366 return self.nameWithComponent(self.name, component) 

367 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

368 

369 def makeCompositeDatasetType(self) -> DatasetType: 

370 """Return a composite dataset type from the component. 

371 

372 Returns 

373 ------- 

374 composite : `DatasetType` 

375 The composite dataset type. 

376 

377 Raises 

378 ------ 

379 RuntimeError 

380 Raised if this dataset type is not a component dataset type. 

381 """ 

382 if not self.isComponent(): 

383 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

384 composite_name, _ = self.nameAndComponent() 

385 if self.parentStorageClass is None: 

386 raise ValueError("Parent storage class is not set. " 

387 f"Unable to create composite type from {self.name}") 

388 return DatasetType(composite_name, dimensions=self.dimensions, 

389 storageClass=self.parentStorageClass) 

390 

391 def makeComponentDatasetType(self, component: str) -> DatasetType: 

392 """Return a component dataset type from a composite. 

393 

394 Assumes the same dimensions as the parent. 

395 

396 Parameters 

397 ---------- 

398 component : `str` 

399 Name of component 

400 

401 Returns 

402 ------- 

403 datasetType : `DatasetType` 

404 A new DatasetType instance. 

405 """ 

406 # The component could be a read/write or read component 

407 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

408 storageClass=self.storageClass.allComponents()[component], 

409 parentStorageClass=self.storageClass) 

410 

411 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

412 """Return all component dataset types for this composite. 

413 

414 Returns 

415 ------- 

416 all : `list` of `DatasetType` 

417 All the component dataset types. If this is not a composite 

418 then returns an empty list. 

419 """ 

420 return [self.makeComponentDatasetType(componentName) 

421 for componentName in self.storageClass.allComponents()] 

422 

423 def isComponent(self) -> bool: 

424 """Return whether this `DatasetType` refers to a component. 

425 

426 Returns 

427 ------- 

428 isComponent : `bool` 

429 `True` if this `DatasetType` is a component, `False` otherwise. 

430 """ 

431 if self.component(): 

432 return True 

433 return False 

434 

435 def isComposite(self) -> bool: 

436 """Return whether this `DatasetType` is a composite. 

437 

438 Returns 

439 ------- 

440 isComposite : `bool` 

441 `True` if this `DatasetType` is a composite type, `False` 

442 otherwise. 

443 """ 

444 return self.storageClass.isComposite() 

445 

446 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

447 """Return name keys to use for lookups in configurations. 

448 

449 The names are returned in order of priority. 

450 

451 Returns 

452 ------- 

453 names : `tuple` of `LookupKey` 

454 Tuple of the `DatasetType` name and the `StorageClass` name. 

455 If the name includes a component the name with the component 

456 is first, then the name without the component and finally 

457 the storage class name and the storage class name of the 

458 composite. 

459 """ 

460 rootName, componentName = self.nameAndComponent() 

461 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

462 if componentName is not None: 

463 lookups = lookups + (LookupKey(name=rootName),) 

464 

465 if self.dimensions: 

466 # Dimensions are a lower priority than dataset type name 

467 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

468 

469 storageClasses = self.storageClass._lookupNames() 

470 if componentName is not None and self.parentStorageClass is not None: 

471 storageClasses += self.parentStorageClass._lookupNames() 

472 

473 return lookups + storageClasses 

474 

475 def to_simple(self, minimal: bool = False) -> SerializedDatasetType: 

476 """Convert this class to a simple python type. 

477 

478 This makes it suitable for serialization. 

479 

480 Parameters 

481 ---------- 

482 minimal : `bool`, optional 

483 Use minimal serialization. Requires Registry to convert 

484 back to a full type. 

485 

486 Returns 

487 ------- 

488 simple : `SerializedDatasetType` 

489 The object converted to a class suitable for serialization. 

490 """ 

491 as_dict: Dict[str, Any] 

492 if minimal: 

493 # Only needs the name. 

494 as_dict = {"name": self.name} 

495 else: 

496 # Convert to a dict form 

497 as_dict = {"name": self.name, 

498 "storageClass": self._storageClassName, 

499 "isCalibration": self._isCalibration, 

500 "dimensions": self.dimensions.to_simple(), 

501 } 

502 

503 if self._parentStorageClassName is not None: 

504 as_dict["parentStorageClass"] = self._parentStorageClassName 

505 return SerializedDatasetType(**as_dict) 

506 

507 @classmethod 

508 def from_simple(cls, simple: SerializedDatasetType, 

509 universe: Optional[DimensionUniverse] = None, 

510 registry: Optional[Registry] = None) -> DatasetType: 

511 """Construct a new object from the simplified form. 

512 

513 This is usally data returned from the `to_simple` method. 

514 

515 Parameters 

516 ---------- 

517 simple : `SerializedDatasetType` 

518 The value returned by `to_simple()`. 

519 universe : `DimensionUniverse` 

520 The special graph of all known dimensions of which this graph will 

521 be a subset. Can be `None` if a registry is provided. 

522 registry : `lsst.daf.butler.Registry`, optional 

523 Registry to use to convert simple name of a DatasetType to 

524 a full `DatasetType`. Can be `None` if a full description of 

525 the type is provided along with a universe. 

526 

527 Returns 

528 ------- 

529 datasetType : `DatasetType` 

530 Newly-constructed object. 

531 """ 

532 if simple.storageClass is None: 

533 # Treat this as minimalist representation 

534 if registry is None: 

535 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType" 

536 " without a Registry") 

537 return registry.getDatasetType(simple.name) 

538 

539 if universe is None and registry is None: 

540 raise ValueError("One of universe or registry must be provided.") 

541 

542 if universe is None and registry is not None: 

543 # registry should not be none by now but test helps mypy 

544 universe = registry.dimensions 

545 

546 if universe is None: 

547 # this is for mypy 

548 raise ValueError("Unable to determine a usable universe") 

549 

550 if simple.dimensions is None: 

551 # mypy hint 

552 raise ValueError(f"Dimensions must be specified in {simple}") 

553 

554 return cls(name=simple.name, 

555 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe), 

556 storageClass=simple.storageClass, 

557 isCalibration=simple.isCalibration, 

558 parentStorageClass=simple.parentStorageClass, 

559 universe=universe) 

560 

561 to_json = to_json_pydantic 

562 from_json = classmethod(from_json_pydantic) 

563 

564 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

565 Tuple[str, DimensionGraph, str, Optional[str]], 

566 Dict[str, bool]]]: 

567 """Support pickling. 

568 

569 StorageClass instances can not normally be pickled, so we pickle 

570 StorageClass name instead of instance. 

571 """ 

572 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

573 self._parentStorageClassName), 

574 {"isCalibration": self._isCalibration}) 

575 

576 def __deepcopy__(self, memo: Any) -> DatasetType: 

577 """Support for deep copy method. 

578 

579 Normally ``deepcopy`` will use pickle mechanism to make copies. 

580 We want to avoid that to support (possibly degenerate) use case when 

581 DatasetType is constructed with StorageClass instance which is not 

582 registered with StorageClassFactory (this happens in unit tests). 

583 Instead we re-implement ``__deepcopy__`` method. 

584 """ 

585 return DatasetType(name=deepcopy(self.name, memo), 

586 dimensions=deepcopy(self.dimensions, memo), 

587 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

588 parentStorageClass=deepcopy(self._parentStorageClass 

589 or self._parentStorageClassName, memo), 

590 isCalibration=deepcopy(self._isCalibration, memo)) 

591 

592 

593def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

594 """Unpickle something by calling a factory. 

595 

596 Allows subclasses to unpickle using `__reduce__` with keyword 

597 arguments as well as positional arguments. 

598 """ 

599 return factory(*args, **kwargs)