Coverage for python/lsst/daf/butler/core/datasets/type.py: 22%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

203 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType", "SerializedDatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45from pydantic import BaseModel, StrictStr, StrictBool 

46 

47from ..storageClass import StorageClass, StorageClassFactory 

48from ..dimensions import DimensionGraph, SerializedDimensionGraph 

49from ..configSupport import LookupKey 

50from ..json import from_json_pydantic, to_json_pydantic 

51 

52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 from ..dimensions import Dimension, DimensionUniverse 

54 from ...registry import Registry 

55 

56 

57def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

58 if data is None: 

59 data = {} 

60 return MappingProxyType(data) 

61 

62 

63class SerializedDatasetType(BaseModel): 

64 """Simplified model of a `DatasetType` suitable for serialization.""" 

65 

66 name: StrictStr 

67 storageClass: Optional[StrictStr] = None 

68 dimensions: Optional[SerializedDimensionGraph] = None 

69 parentStorageClass: Optional[StrictStr] = None 

70 isCalibration: StrictBool = False 

71 

72 @classmethod 

73 def direct(cls, *, name: str, storageClass: Optional[str] = None, 

74 dimensions: Optional[Dict] = None, 

75 parentStorageClass: Optional[str] = None, isCalibration: bool = False 

76 ) -> SerializedDatasetType: 

77 """Construct a `SerializedDatasetType` directly without validators. 

78 

79 This differs from PyDantics construct method in that the arguments are 

80 explicitly what the model requires, and it will recurse through 

81 members, constructing them from their corresponding `direct` methods. 

82 

83 This method should only be called when the inputs are trusted. 

84 """ 

85 node = SerializedDatasetType.__new__(cls) 

86 setter = object.__setattr__ 

87 setter(node, 'name', name) 

88 setter(node, 'storageClass', storageClass) 

89 setter(node, 'dimensions', 

90 dimensions if dimensions is None else SerializedDimensionGraph.direct(**dimensions)) 

91 setter(node, 'parentStorageClass', parentStorageClass) 

92 setter(node, 'isCalibration', isCalibration) 

93 setter(node, '__fields_set__', {'name', 'storageClass', 'dimensions', 'parentStorageClass', 

94 'isCalibration'}) 

95 return node 

96 

97 

98class DatasetType: 

99 r"""A named category of Datasets. 

100 

101 Defines how they are organized, related, and stored. 

102 

103 A concrete, final class whose instances represent `DatasetType`\ s. 

104 `DatasetType` instances may be constructed without a `Registry`, 

105 but they must be registered 

106 via `Registry.registerDatasetType()` before corresponding Datasets 

107 may be added. 

108 `DatasetType` instances are immutable. 

109 

110 Parameters 

111 ---------- 

112 name : `str` 

113 A string name for the Dataset; must correspond to the same 

114 `DatasetType` across all Registries. Names must start with an 

115 upper or lowercase letter, and may contain only letters, numbers, 

116 and underscores. Component dataset types should contain a single 

117 period separating the base dataset type name from the component name 

118 (and may be recursive). 

119 dimensions : `DimensionGraph` or iterable of `Dimension` 

120 Dimensions used to label and relate instances of this `DatasetType`. 

121 If not a `DimensionGraph`, ``universe`` must be provided as well. 

122 storageClass : `StorageClass` or `str` 

123 Instance of a `StorageClass` or name of `StorageClass` that defines 

124 how this `DatasetType` is persisted. 

125 parentStorageClass : `StorageClass` or `str`, optional 

126 Instance of a `StorageClass` or name of `StorageClass` that defines 

127 how the composite parent is persisted. Must be `None` if this 

128 is not a component. 

129 universe : `DimensionUniverse`, optional 

130 Set of all known dimensions, used to normalize ``dimensions`` if it 

131 is not already a `DimensionGraph`. 

132 isCalibration : `bool`, optional 

133 If `True`, this dataset type may be included in 

134 `~CollectionType.CALIBRATION` collections. 

135 

136 See Also 

137 -------- 

138 :ref:`daf_butler_organizing_datasets` 

139 """ 

140 

141 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

142 "_parentStorageClass", "_parentStorageClassName", 

143 "_isCalibration") 

144 

145 _serializedType = SerializedDatasetType 

146 

147 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

148 

149 @staticmethod 

150 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

151 """Form a valid DatasetTypeName from a parent and component. 

152 

153 No validation is performed. 

154 

155 Parameters 

156 ---------- 

157 datasetTypeName : `str` 

158 Base type name. 

159 componentName : `str` 

160 Name of component. 

161 

162 Returns 

163 ------- 

164 compTypeName : `str` 

165 Name to use for component DatasetType. 

166 """ 

167 return "{}.{}".format(datasetTypeName, componentName) 

168 

169 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

170 storageClass: Union[StorageClass, str], 

171 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

172 universe: Optional[DimensionUniverse] = None, 

173 isCalibration: bool = False): 

174 if self.VALID_NAME_REGEX.match(name) is None: 

175 raise ValueError(f"DatasetType name '{name}' is invalid.") 

176 self._name = name 

177 if not isinstance(dimensions, DimensionGraph): 

178 if universe is None: 

179 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

180 "a universe must be provided.") 

181 dimensions = universe.extract(dimensions) 

182 self._dimensions = dimensions 

183 if name in self._dimensions.universe.getGovernorDimensions().names: 

184 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

185 if not isinstance(storageClass, (StorageClass, str)): 

186 raise ValueError("StorageClass argument must be StorageClass or str. " 

187 f"Got {storageClass}") 

188 self._storageClass: Optional[StorageClass] 

189 if isinstance(storageClass, StorageClass): 

190 self._storageClass = storageClass 

191 self._storageClassName = storageClass.name 

192 else: 

193 self._storageClass = None 

194 self._storageClassName = storageClass 

195 

196 self._parentStorageClass: Optional[StorageClass] = None 

197 self._parentStorageClassName: Optional[str] = None 

198 if parentStorageClass is not None: 

199 if not isinstance(storageClass, (StorageClass, str)): 

200 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

201 f"Got {parentStorageClass}") 

202 

203 # Only allowed for a component dataset type 

204 _, componentName = self.splitDatasetTypeName(self._name) 

205 if componentName is None: 

206 raise ValueError("Can not specify a parent storage class if this is not a component" 

207 f" ({self._name})") 

208 if isinstance(parentStorageClass, StorageClass): 

209 self._parentStorageClass = parentStorageClass 

210 self._parentStorageClassName = parentStorageClass.name 

211 else: 

212 self._parentStorageClassName = parentStorageClass 

213 

214 # Ensure that parent storage class is specified when we have 

215 # a component and is not specified when we don't 

216 _, componentName = self.splitDatasetTypeName(self._name) 

217 if parentStorageClass is None and componentName is not None: 

218 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

219 " storage class") 

220 if parentStorageClass is not None and componentName is None: 

221 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

222 self._isCalibration = isCalibration 

223 

224 def __repr__(self) -> str: 

225 extra = "" 

226 if self._parentStorageClassName: 

227 extra = f", parentStorageClass={self._parentStorageClassName}" 

228 if self._isCalibration: 

229 extra += ", isCalibration=True" 

230 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

231 

232 def __eq__(self, other: Any) -> bool: 

233 if not isinstance(other, type(self)): 

234 return False 

235 if self._name != other._name: 

236 return False 

237 if self._dimensions != other._dimensions: 

238 return False 

239 if self._storageClass is not None and other._storageClass is not None: 

240 if self._storageClass != other._storageClass: 

241 return False 

242 else: 

243 if self._storageClassName != other._storageClassName: 

244 return False 

245 if self._isCalibration != other._isCalibration: 

246 return False 

247 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

248 return self._parentStorageClass == other._parentStorageClass 

249 else: 

250 return self._parentStorageClassName == other._parentStorageClassName 

251 

252 def __hash__(self) -> int: 

253 """Hash DatasetType instance. 

254 

255 This only uses StorageClass name which is it consistent with the 

256 implementation of StorageClass hash method. 

257 """ 

258 return hash((self._name, self._dimensions, self._storageClassName, 

259 self._parentStorageClassName)) 

260 

261 def __lt__(self, other: Any) -> bool: 

262 """Sort using the dataset type name.""" 

263 if not isinstance(other, type(self)): 

264 return NotImplemented 

265 return self.name < other.name 

266 

267 @property 

268 def name(self) -> str: 

269 """Return a string name for the Dataset. 

270 

271 Must correspond to the same `DatasetType` across all Registries. 

272 """ 

273 return self._name 

274 

275 @property 

276 def dimensions(self) -> DimensionGraph: 

277 r"""Return the `Dimension`\ s fir this dataset type. 

278 

279 The dimensions label and relate instances of this 

280 `DatasetType` (`DimensionGraph`). 

281 """ 

282 return self._dimensions 

283 

284 @property 

285 def storageClass(self) -> StorageClass: 

286 """Return `StorageClass` instance associated with this dataset type. 

287 

288 The `StorageClass` defines how this `DatasetType` 

289 is persisted. Note that if DatasetType was constructed with a name 

290 of a StorageClass then Butler has to be initialized before using 

291 this property. 

292 """ 

293 if self._storageClass is None: 

294 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

295 return self._storageClass 

296 

297 @property 

298 def parentStorageClass(self) -> Optional[StorageClass]: 

299 """Return the storage class of the composite containing this component. 

300 

301 Note that if DatasetType was constructed with a name of a 

302 StorageClass then Butler has to be initialized before using this 

303 property. Can be `None` if this is not a component of a composite. 

304 Must be defined if this is a component. 

305 """ 

306 if self._parentStorageClass is None and self._parentStorageClassName is None: 

307 return None 

308 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

309 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

310 return self._parentStorageClass 

311 

312 def isCalibration(self) -> bool: 

313 """Return if datasets of this type can be in calibration collections. 

314 

315 Returns 

316 ------- 

317 flag : `bool` 

318 `True` if datasets of this type may be included in calibration 

319 collections. 

320 """ 

321 return self._isCalibration 

322 

323 @staticmethod 

324 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

325 """Return the root name and the component from a composite name. 

326 

327 Parameters 

328 ---------- 

329 datasetTypeName : `str` 

330 The name of the dataset type, can include a component using 

331 a "."-separator. 

332 

333 Returns 

334 ------- 

335 rootName : `str` 

336 Root name without any components. 

337 componentName : `str` 

338 The component if it has been specified, else `None`. 

339 

340 Notes 

341 ----- 

342 If the dataset type name is ``a.b.c`` this method will return a 

343 root name of ``a`` and a component name of ``b.c``. 

344 """ 

345 comp = None 

346 root = datasetTypeName 

347 if "." in root: 

348 # If there is doubt, the component is after the first "." 

349 root, comp = root.split(".", maxsplit=1) 

350 return root, comp 

351 

352 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

353 """Return the root name of this dataset type and any component. 

354 

355 Returns 

356 ------- 

357 rootName : `str` 

358 Root name for this `DatasetType` without any components. 

359 componentName : `str` 

360 The component if it has been specified, else `None`. 

361 """ 

362 return self.splitDatasetTypeName(self.name) 

363 

364 def component(self) -> Optional[str]: 

365 """Return the component name (if defined). 

366 

367 Returns 

368 ------- 

369 comp : `str` 

370 Name of component part of DatasetType name. `None` if this 

371 `DatasetType` is not associated with a component. 

372 """ 

373 _, comp = self.nameAndComponent() 

374 return comp 

375 

376 def componentTypeName(self, component: str) -> str: 

377 """Derive a component dataset type from a composite. 

378 

379 Parameters 

380 ---------- 

381 component : `str` 

382 Name of component 

383 

384 Returns 

385 ------- 

386 derived : `str` 

387 Compound name of this `DatasetType` and the component. 

388 

389 Raises 

390 ------ 

391 KeyError 

392 Requested component is not supported by this `DatasetType`. 

393 """ 

394 if component in self.storageClass.allComponents(): 

395 return self.nameWithComponent(self.name, component) 

396 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

397 

398 def makeCompositeDatasetType(self) -> DatasetType: 

399 """Return a composite dataset type from the component. 

400 

401 Returns 

402 ------- 

403 composite : `DatasetType` 

404 The composite dataset type. 

405 

406 Raises 

407 ------ 

408 RuntimeError 

409 Raised if this dataset type is not a component dataset type. 

410 """ 

411 if not self.isComponent(): 

412 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

413 composite_name, _ = self.nameAndComponent() 

414 if self.parentStorageClass is None: 

415 raise ValueError("Parent storage class is not set. " 

416 f"Unable to create composite type from {self.name}") 

417 return DatasetType(composite_name, dimensions=self.dimensions, 

418 storageClass=self.parentStorageClass) 

419 

420 def makeComponentDatasetType(self, component: str) -> DatasetType: 

421 """Return a component dataset type from a composite. 

422 

423 Assumes the same dimensions as the parent. 

424 

425 Parameters 

426 ---------- 

427 component : `str` 

428 Name of component 

429 

430 Returns 

431 ------- 

432 datasetType : `DatasetType` 

433 A new DatasetType instance. 

434 """ 

435 # The component could be a read/write or read component 

436 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

437 storageClass=self.storageClass.allComponents()[component], 

438 parentStorageClass=self.storageClass) 

439 

440 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

441 """Return all component dataset types for this composite. 

442 

443 Returns 

444 ------- 

445 all : `list` of `DatasetType` 

446 All the component dataset types. If this is not a composite 

447 then returns an empty list. 

448 """ 

449 return [self.makeComponentDatasetType(componentName) 

450 for componentName in self.storageClass.allComponents()] 

451 

452 def isComponent(self) -> bool: 

453 """Return whether this `DatasetType` refers to a component. 

454 

455 Returns 

456 ------- 

457 isComponent : `bool` 

458 `True` if this `DatasetType` is a component, `False` otherwise. 

459 """ 

460 if self.component(): 

461 return True 

462 return False 

463 

464 def isComposite(self) -> bool: 

465 """Return whether this `DatasetType` is a composite. 

466 

467 Returns 

468 ------- 

469 isComposite : `bool` 

470 `True` if this `DatasetType` is a composite type, `False` 

471 otherwise. 

472 """ 

473 return self.storageClass.isComposite() 

474 

475 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

476 """Return name keys to use for lookups in configurations. 

477 

478 The names are returned in order of priority. 

479 

480 Returns 

481 ------- 

482 names : `tuple` of `LookupKey` 

483 Tuple of the `DatasetType` name and the `StorageClass` name. 

484 If the name includes a component the name with the component 

485 is first, then the name without the component and finally 

486 the storage class name and the storage class name of the 

487 composite. 

488 """ 

489 rootName, componentName = self.nameAndComponent() 

490 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

491 if componentName is not None: 

492 lookups = lookups + (LookupKey(name=rootName),) 

493 

494 if self.dimensions: 

495 # Dimensions are a lower priority than dataset type name 

496 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

497 

498 storageClasses = self.storageClass._lookupNames() 

499 if componentName is not None and self.parentStorageClass is not None: 

500 storageClasses += self.parentStorageClass._lookupNames() 

501 

502 return lookups + storageClasses 

503 

504 def to_simple(self, minimal: bool = False) -> SerializedDatasetType: 

505 """Convert this class to a simple python type. 

506 

507 This makes it suitable for serialization. 

508 

509 Parameters 

510 ---------- 

511 minimal : `bool`, optional 

512 Use minimal serialization. Requires Registry to convert 

513 back to a full type. 

514 

515 Returns 

516 ------- 

517 simple : `SerializedDatasetType` 

518 The object converted to a class suitable for serialization. 

519 """ 

520 as_dict: Dict[str, Any] 

521 if minimal: 

522 # Only needs the name. 

523 as_dict = {"name": self.name} 

524 else: 

525 # Convert to a dict form 

526 as_dict = {"name": self.name, 

527 "storageClass": self._storageClassName, 

528 "isCalibration": self._isCalibration, 

529 "dimensions": self.dimensions.to_simple(), 

530 } 

531 

532 if self._parentStorageClassName is not None: 

533 as_dict["parentStorageClass"] = self._parentStorageClassName 

534 return SerializedDatasetType(**as_dict) 

535 

536 @classmethod 

537 def from_simple(cls, simple: SerializedDatasetType, 

538 universe: Optional[DimensionUniverse] = None, 

539 registry: Optional[Registry] = None) -> DatasetType: 

540 """Construct a new object from the simplified form. 

541 

542 This is usually data returned from the `to_simple` method. 

543 

544 Parameters 

545 ---------- 

546 simple : `SerializedDatasetType` 

547 The value returned by `to_simple()`. 

548 universe : `DimensionUniverse` 

549 The special graph of all known dimensions of which this graph will 

550 be a subset. Can be `None` if a registry is provided. 

551 registry : `lsst.daf.butler.Registry`, optional 

552 Registry to use to convert simple name of a DatasetType to 

553 a full `DatasetType`. Can be `None` if a full description of 

554 the type is provided along with a universe. 

555 

556 Returns 

557 ------- 

558 datasetType : `DatasetType` 

559 Newly-constructed object. 

560 """ 

561 if simple.storageClass is None: 

562 # Treat this as minimalist representation 

563 if registry is None: 

564 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType" 

565 " without a Registry") 

566 return registry.getDatasetType(simple.name) 

567 

568 if universe is None and registry is None: 

569 raise ValueError("One of universe or registry must be provided.") 

570 

571 if universe is None and registry is not None: 

572 # registry should not be none by now but test helps mypy 

573 universe = registry.dimensions 

574 

575 if universe is None: 

576 # this is for mypy 

577 raise ValueError("Unable to determine a usable universe") 

578 

579 if simple.dimensions is None: 

580 # mypy hint 

581 raise ValueError(f"Dimensions must be specified in {simple}") 

582 

583 return cls(name=simple.name, 

584 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe), 

585 storageClass=simple.storageClass, 

586 isCalibration=simple.isCalibration, 

587 parentStorageClass=simple.parentStorageClass, 

588 universe=universe) 

589 

590 to_json = to_json_pydantic 

591 from_json = classmethod(from_json_pydantic) 

592 

593 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

594 Tuple[str, DimensionGraph, str, Optional[str]], 

595 Dict[str, bool]]]: 

596 """Support pickling. 

597 

598 StorageClass instances can not normally be pickled, so we pickle 

599 StorageClass name instead of instance. 

600 """ 

601 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

602 self._parentStorageClassName), 

603 {"isCalibration": self._isCalibration}) 

604 

605 def __deepcopy__(self, memo: Any) -> DatasetType: 

606 """Support for deep copy method. 

607 

608 Normally ``deepcopy`` will use pickle mechanism to make copies. 

609 We want to avoid that to support (possibly degenerate) use case when 

610 DatasetType is constructed with StorageClass instance which is not 

611 registered with StorageClassFactory (this happens in unit tests). 

612 Instead we re-implement ``__deepcopy__`` method. 

613 """ 

614 return DatasetType(name=deepcopy(self.name, memo), 

615 dimensions=deepcopy(self.dimensions, memo), 

616 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

617 parentStorageClass=deepcopy(self._parentStorageClass 

618 or self._parentStorageClassName, memo), 

619 isCalibration=deepcopy(self._isCalibration, memo)) 

620 

621 

622def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

623 """Unpickle something by calling a factory. 

624 

625 Allows subclasses to unpickle using `__reduce__` with keyword 

626 arguments as well as positional arguments. 

627 """ 

628 return factory(*args, **kwargs)