Coverage for python/lsst/daf/butler/core/datasets/type.py: 22%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

203 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType", "SerializedDatasetType"] 

25 

26import re 

27from copy import deepcopy 

28from types import MappingProxyType 

29from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Type, Union 

30 

31from pydantic import BaseModel, StrictBool, StrictStr 

32 

33from ..configSupport import LookupKey 

34from ..dimensions import DimensionGraph, SerializedDimensionGraph 

35from ..json import from_json_pydantic, to_json_pydantic 

36from ..storageClass import StorageClass, StorageClassFactory 

37 

38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true

39 from ...registry import Registry 

40 from ..dimensions import Dimension, DimensionUniverse 

41 

42 

43def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

44 if data is None: 

45 data = {} 

46 return MappingProxyType(data) 

47 

48 

49class SerializedDatasetType(BaseModel): 

50 """Simplified model of a `DatasetType` suitable for serialization.""" 

51 

52 name: StrictStr 

53 storageClass: Optional[StrictStr] = None 

54 dimensions: Optional[SerializedDimensionGraph] = None 

55 parentStorageClass: Optional[StrictStr] = None 

56 isCalibration: StrictBool = False 

57 

58 @classmethod 

59 def direct( 

60 cls, 

61 *, 

62 name: str, 

63 storageClass: Optional[str] = None, 

64 dimensions: Optional[Dict] = None, 

65 parentStorageClass: Optional[str] = None, 

66 isCalibration: bool = False, 

67 ) -> SerializedDatasetType: 

68 """Construct a `SerializedDatasetType` directly without validators. 

69 

70 This differs from PyDantics construct method in that the arguments are 

71 explicitly what the model requires, and it will recurse through 

72 members, constructing them from their corresponding `direct` methods. 

73 

74 This method should only be called when the inputs are trusted. 

75 """ 

76 node = SerializedDatasetType.__new__(cls) 

77 setter = object.__setattr__ 

78 setter(node, "name", name) 

79 setter(node, "storageClass", storageClass) 

80 setter( 

81 node, 

82 "dimensions", 

83 dimensions if dimensions is None else SerializedDimensionGraph.direct(**dimensions), 

84 ) 

85 setter(node, "parentStorageClass", parentStorageClass) 

86 setter(node, "isCalibration", isCalibration) 

87 setter( 

88 node, 

89 "__fields_set__", 

90 {"name", "storageClass", "dimensions", "parentStorageClass", "isCalibration"}, 

91 ) 

92 return node 

93 

94 

95class DatasetType: 

96 r"""A named category of Datasets. 

97 

98 Defines how they are organized, related, and stored. 

99 

100 A concrete, final class whose instances represent `DatasetType`\ s. 

101 `DatasetType` instances may be constructed without a `Registry`, 

102 but they must be registered 

103 via `Registry.registerDatasetType()` before corresponding Datasets 

104 may be added. 

105 `DatasetType` instances are immutable. 

106 

107 Parameters 

108 ---------- 

109 name : `str` 

110 A string name for the Dataset; must correspond to the same 

111 `DatasetType` across all Registries. Names must start with an 

112 upper or lowercase letter, and may contain only letters, numbers, 

113 and underscores. Component dataset types should contain a single 

114 period separating the base dataset type name from the component name 

115 (and may be recursive). 

116 dimensions : `DimensionGraph` or iterable of `Dimension` 

117 Dimensions used to label and relate instances of this `DatasetType`. 

118 If not a `DimensionGraph`, ``universe`` must be provided as well. 

119 storageClass : `StorageClass` or `str` 

120 Instance of a `StorageClass` or name of `StorageClass` that defines 

121 how this `DatasetType` is persisted. 

122 parentStorageClass : `StorageClass` or `str`, optional 

123 Instance of a `StorageClass` or name of `StorageClass` that defines 

124 how the composite parent is persisted. Must be `None` if this 

125 is not a component. 

126 universe : `DimensionUniverse`, optional 

127 Set of all known dimensions, used to normalize ``dimensions`` if it 

128 is not already a `DimensionGraph`. 

129 isCalibration : `bool`, optional 

130 If `True`, this dataset type may be included in 

131 `~CollectionType.CALIBRATION` collections. 

132 

133 See Also 

134 -------- 

135 :ref:`daf_butler_organizing_datasets` 

136 """ 

137 

138 __slots__ = ( 

139 "_name", 

140 "_dimensions", 

141 "_storageClass", 

142 "_storageClassName", 

143 "_parentStorageClass", 

144 "_parentStorageClassName", 

145 "_isCalibration", 

146 ) 

147 

148 _serializedType = SerializedDatasetType 

149 

150 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

151 

152 @staticmethod 

153 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

154 """Form a valid DatasetTypeName from a parent and component. 

155 

156 No validation is performed. 

157 

158 Parameters 

159 ---------- 

160 datasetTypeName : `str` 

161 Base type name. 

162 componentName : `str` 

163 Name of component. 

164 

165 Returns 

166 ------- 

167 compTypeName : `str` 

168 Name to use for component DatasetType. 

169 """ 

170 return "{}.{}".format(datasetTypeName, componentName) 

171 

172 def __init__( 

173 self, 

174 name: str, 

175 dimensions: Union[DimensionGraph, Iterable[Dimension]], 

176 storageClass: Union[StorageClass, str], 

177 parentStorageClass: Optional[Union[StorageClass, str]] = None, 

178 *, 

179 universe: Optional[DimensionUniverse] = None, 

180 isCalibration: bool = False, 

181 ): 

182 if self.VALID_NAME_REGEX.match(name) is None: 

183 raise ValueError(f"DatasetType name '{name}' is invalid.") 

184 self._name = name 

185 if not isinstance(dimensions, DimensionGraph): 

186 if universe is None: 

187 raise ValueError( 

188 "If dimensions is not a normalized DimensionGraph, a universe must be provided." 

189 ) 

190 dimensions = universe.extract(dimensions) 

191 self._dimensions = dimensions 

192 if name in self._dimensions.universe.getGovernorDimensions().names: 

193 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

194 if not isinstance(storageClass, (StorageClass, str)): 

195 raise ValueError(f"StorageClass argument must be StorageClass or str. Got {storageClass}") 

196 self._storageClass: Optional[StorageClass] 

197 if isinstance(storageClass, StorageClass): 

198 self._storageClass = storageClass 

199 self._storageClassName = storageClass.name 

200 else: 

201 self._storageClass = None 

202 self._storageClassName = storageClass 

203 

204 self._parentStorageClass: Optional[StorageClass] = None 

205 self._parentStorageClassName: Optional[str] = None 

206 if parentStorageClass is not None: 

207 if not isinstance(storageClass, (StorageClass, str)): 

208 raise ValueError( 

209 f"Parent StorageClass argument must be StorageClass or str. Got {parentStorageClass}" 

210 ) 

211 

212 # Only allowed for a component dataset type 

213 _, componentName = self.splitDatasetTypeName(self._name) 

214 if componentName is None: 

215 raise ValueError( 

216 f"Can not specify a parent storage class if this is not a component ({self._name})" 

217 ) 

218 if isinstance(parentStorageClass, StorageClass): 

219 self._parentStorageClass = parentStorageClass 

220 self._parentStorageClassName = parentStorageClass.name 

221 else: 

222 self._parentStorageClassName = parentStorageClass 

223 

224 # Ensure that parent storage class is specified when we have 

225 # a component and is not specified when we don't 

226 _, componentName = self.splitDatasetTypeName(self._name) 

227 if parentStorageClass is None and componentName is not None: 

228 raise ValueError( 

229 f"Component dataset type '{self._name}' constructed without parent storage class" 

230 ) 

231 if parentStorageClass is not None and componentName is None: 

232 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

233 self._isCalibration = isCalibration 

234 

235 def __repr__(self) -> str: 

236 extra = "" 

237 if self._parentStorageClassName: 

238 extra = f", parentStorageClass={self._parentStorageClassName}" 

239 if self._isCalibration: 

240 extra += ", isCalibration=True" 

241 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

242 

243 def __eq__(self, other: Any) -> bool: 

244 if not isinstance(other, type(self)): 

245 return False 

246 if self._name != other._name: 

247 return False 

248 if self._dimensions != other._dimensions: 

249 return False 

250 if self._storageClass is not None and other._storageClass is not None: 

251 if self._storageClass != other._storageClass: 

252 return False 

253 else: 

254 if self._storageClassName != other._storageClassName: 

255 return False 

256 if self._isCalibration != other._isCalibration: 

257 return False 

258 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

259 return self._parentStorageClass == other._parentStorageClass 

260 else: 

261 return self._parentStorageClassName == other._parentStorageClassName 

262 

263 def __hash__(self) -> int: 

264 """Hash DatasetType instance. 

265 

266 This only uses StorageClass name which is it consistent with the 

267 implementation of StorageClass hash method. 

268 """ 

269 return hash((self._name, self._dimensions, self._storageClassName, self._parentStorageClassName)) 

270 

271 def __lt__(self, other: Any) -> bool: 

272 """Sort using the dataset type name.""" 

273 if not isinstance(other, type(self)): 

274 return NotImplemented 

275 return self.name < other.name 

276 

277 @property 

278 def name(self) -> str: 

279 """Return a string name for the Dataset. 

280 

281 Must correspond to the same `DatasetType` across all Registries. 

282 """ 

283 return self._name 

284 

285 @property 

286 def dimensions(self) -> DimensionGraph: 

287 r"""Return the `Dimension`\ s fir this dataset type. 

288 

289 The dimensions label and relate instances of this 

290 `DatasetType` (`DimensionGraph`). 

291 """ 

292 return self._dimensions 

293 

294 @property 

295 def storageClass(self) -> StorageClass: 

296 """Return `StorageClass` instance associated with this dataset type. 

297 

298 The `StorageClass` defines how this `DatasetType` 

299 is persisted. Note that if DatasetType was constructed with a name 

300 of a StorageClass then Butler has to be initialized before using 

301 this property. 

302 """ 

303 if self._storageClass is None: 

304 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

305 return self._storageClass 

306 

307 @property 

308 def parentStorageClass(self) -> Optional[StorageClass]: 

309 """Return the storage class of the composite containing this component. 

310 

311 Note that if DatasetType was constructed with a name of a 

312 StorageClass then Butler has to be initialized before using this 

313 property. Can be `None` if this is not a component of a composite. 

314 Must be defined if this is a component. 

315 """ 

316 if self._parentStorageClass is None and self._parentStorageClassName is None: 

317 return None 

318 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

319 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

320 return self._parentStorageClass 

321 

322 def isCalibration(self) -> bool: 

323 """Return if datasets of this type can be in calibration collections. 

324 

325 Returns 

326 ------- 

327 flag : `bool` 

328 `True` if datasets of this type may be included in calibration 

329 collections. 

330 """ 

331 return self._isCalibration 

332 

333 @staticmethod 

334 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

335 """Return the root name and the component from a composite name. 

336 

337 Parameters 

338 ---------- 

339 datasetTypeName : `str` 

340 The name of the dataset type, can include a component using 

341 a "."-separator. 

342 

343 Returns 

344 ------- 

345 rootName : `str` 

346 Root name without any components. 

347 componentName : `str` 

348 The component if it has been specified, else `None`. 

349 

350 Notes 

351 ----- 

352 If the dataset type name is ``a.b.c`` this method will return a 

353 root name of ``a`` and a component name of ``b.c``. 

354 """ 

355 comp = None 

356 root = datasetTypeName 

357 if "." in root: 

358 # If there is doubt, the component is after the first "." 

359 root, comp = root.split(".", maxsplit=1) 

360 return root, comp 

361 

362 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

363 """Return the root name of this dataset type and any component. 

364 

365 Returns 

366 ------- 

367 rootName : `str` 

368 Root name for this `DatasetType` without any components. 

369 componentName : `str` 

370 The component if it has been specified, else `None`. 

371 """ 

372 return self.splitDatasetTypeName(self.name) 

373 

374 def component(self) -> Optional[str]: 

375 """Return the component name (if defined). 

376 

377 Returns 

378 ------- 

379 comp : `str` 

380 Name of component part of DatasetType name. `None` if this 

381 `DatasetType` is not associated with a component. 

382 """ 

383 _, comp = self.nameAndComponent() 

384 return comp 

385 

386 def componentTypeName(self, component: str) -> str: 

387 """Derive a component dataset type from a composite. 

388 

389 Parameters 

390 ---------- 

391 component : `str` 

392 Name of component 

393 

394 Returns 

395 ------- 

396 derived : `str` 

397 Compound name of this `DatasetType` and the component. 

398 

399 Raises 

400 ------ 

401 KeyError 

402 Requested component is not supported by this `DatasetType`. 

403 """ 

404 if component in self.storageClass.allComponents(): 

405 return self.nameWithComponent(self.name, component) 

406 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

407 

408 def makeCompositeDatasetType(self) -> DatasetType: 

409 """Return a composite dataset type from the component. 

410 

411 Returns 

412 ------- 

413 composite : `DatasetType` 

414 The composite dataset type. 

415 

416 Raises 

417 ------ 

418 RuntimeError 

419 Raised if this dataset type is not a component dataset type. 

420 """ 

421 if not self.isComponent(): 

422 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

423 composite_name, _ = self.nameAndComponent() 

424 if self.parentStorageClass is None: 

425 raise ValueError( 

426 f"Parent storage class is not set. Unable to create composite type from {self.name}" 

427 ) 

428 return DatasetType(composite_name, dimensions=self.dimensions, storageClass=self.parentStorageClass) 

429 

430 def makeComponentDatasetType(self, component: str) -> DatasetType: 

431 """Return a component dataset type from a composite. 

432 

433 Assumes the same dimensions as the parent. 

434 

435 Parameters 

436 ---------- 

437 component : `str` 

438 Name of component 

439 

440 Returns 

441 ------- 

442 datasetType : `DatasetType` 

443 A new DatasetType instance. 

444 """ 

445 # The component could be a read/write or read component 

446 return DatasetType( 

447 self.componentTypeName(component), 

448 dimensions=self.dimensions, 

449 storageClass=self.storageClass.allComponents()[component], 

450 parentStorageClass=self.storageClass, 

451 ) 

452 

453 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

454 """Return all component dataset types for this composite. 

455 

456 Returns 

457 ------- 

458 all : `list` of `DatasetType` 

459 All the component dataset types. If this is not a composite 

460 then returns an empty list. 

461 """ 

462 return [ 

463 self.makeComponentDatasetType(componentName) 

464 for componentName in self.storageClass.allComponents() 

465 ] 

466 

467 def isComponent(self) -> bool: 

468 """Return whether this `DatasetType` refers to a component. 

469 

470 Returns 

471 ------- 

472 isComponent : `bool` 

473 `True` if this `DatasetType` is a component, `False` otherwise. 

474 """ 

475 if self.component(): 

476 return True 

477 return False 

478 

479 def isComposite(self) -> bool: 

480 """Return whether this `DatasetType` is a composite. 

481 

482 Returns 

483 ------- 

484 isComposite : `bool` 

485 `True` if this `DatasetType` is a composite type, `False` 

486 otherwise. 

487 """ 

488 return self.storageClass.isComposite() 

489 

490 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

491 """Return name keys to use for lookups in configurations. 

492 

493 The names are returned in order of priority. 

494 

495 Returns 

496 ------- 

497 names : `tuple` of `LookupKey` 

498 Tuple of the `DatasetType` name and the `StorageClass` name. 

499 If the name includes a component the name with the component 

500 is first, then the name without the component and finally 

501 the storage class name and the storage class name of the 

502 composite. 

503 """ 

504 rootName, componentName = self.nameAndComponent() 

505 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

506 if componentName is not None: 

507 lookups = lookups + (LookupKey(name=rootName),) 

508 

509 if self.dimensions: 

510 # Dimensions are a lower priority than dataset type name 

511 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

512 

513 storageClasses = self.storageClass._lookupNames() 

514 if componentName is not None and self.parentStorageClass is not None: 

515 storageClasses += self.parentStorageClass._lookupNames() 

516 

517 return lookups + storageClasses 

518 

519 def to_simple(self, minimal: bool = False) -> SerializedDatasetType: 

520 """Convert this class to a simple python type. 

521 

522 This makes it suitable for serialization. 

523 

524 Parameters 

525 ---------- 

526 minimal : `bool`, optional 

527 Use minimal serialization. Requires Registry to convert 

528 back to a full type. 

529 

530 Returns 

531 ------- 

532 simple : `SerializedDatasetType` 

533 The object converted to a class suitable for serialization. 

534 """ 

535 as_dict: Dict[str, Any] 

536 if minimal: 

537 # Only needs the name. 

538 as_dict = {"name": self.name} 

539 else: 

540 # Convert to a dict form 

541 as_dict = { 

542 "name": self.name, 

543 "storageClass": self._storageClassName, 

544 "isCalibration": self._isCalibration, 

545 "dimensions": self.dimensions.to_simple(), 

546 } 

547 

548 if self._parentStorageClassName is not None: 

549 as_dict["parentStorageClass"] = self._parentStorageClassName 

550 return SerializedDatasetType(**as_dict) 

551 

552 @classmethod 

553 def from_simple( 

554 cls, 

555 simple: SerializedDatasetType, 

556 universe: Optional[DimensionUniverse] = None, 

557 registry: Optional[Registry] = None, 

558 ) -> DatasetType: 

559 """Construct a new object from the simplified form. 

560 

561 This is usually data returned from the `to_simple` method. 

562 

563 Parameters 

564 ---------- 

565 simple : `SerializedDatasetType` 

566 The value returned by `to_simple()`. 

567 universe : `DimensionUniverse` 

568 The special graph of all known dimensions of which this graph will 

569 be a subset. Can be `None` if a registry is provided. 

570 registry : `lsst.daf.butler.Registry`, optional 

571 Registry to use to convert simple name of a DatasetType to 

572 a full `DatasetType`. Can be `None` if a full description of 

573 the type is provided along with a universe. 

574 

575 Returns 

576 ------- 

577 datasetType : `DatasetType` 

578 Newly-constructed object. 

579 """ 

580 if simple.storageClass is None: 

581 # Treat this as minimalist representation 

582 if registry is None: 

583 raise ValueError( 

584 f"Unable to convert a DatasetType name '{simple}' to DatasetType without a Registry" 

585 ) 

586 return registry.getDatasetType(simple.name) 

587 

588 if universe is None and registry is None: 

589 raise ValueError("One of universe or registry must be provided.") 

590 

591 if universe is None and registry is not None: 

592 # registry should not be none by now but test helps mypy 

593 universe = registry.dimensions 

594 

595 if universe is None: 

596 # this is for mypy 

597 raise ValueError("Unable to determine a usable universe") 

598 

599 if simple.dimensions is None: 

600 # mypy hint 

601 raise ValueError(f"Dimensions must be specified in {simple}") 

602 

603 return cls( 

604 name=simple.name, 

605 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe), 

606 storageClass=simple.storageClass, 

607 isCalibration=simple.isCalibration, 

608 parentStorageClass=simple.parentStorageClass, 

609 universe=universe, 

610 ) 

611 

612 to_json = to_json_pydantic 

613 from_json = classmethod(from_json_pydantic) 

614 

615 def __reduce__( 

616 self, 

617 ) -> Tuple[ 

618 Callable, Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str, Optional[str]], Dict[str, bool]] 

619 ]: 

620 """Support pickling. 

621 

622 StorageClass instances can not normally be pickled, so we pickle 

623 StorageClass name instead of instance. 

624 """ 

625 return _unpickle_via_factory, ( 

626 self.__class__, 

627 (self.name, self.dimensions, self._storageClassName, self._parentStorageClassName), 

628 {"isCalibration": self._isCalibration}, 

629 ) 

630 

631 def __deepcopy__(self, memo: Any) -> DatasetType: 

632 """Support for deep copy method. 

633 

634 Normally ``deepcopy`` will use pickle mechanism to make copies. 

635 We want to avoid that to support (possibly degenerate) use case when 

636 DatasetType is constructed with StorageClass instance which is not 

637 registered with StorageClassFactory (this happens in unit tests). 

638 Instead we re-implement ``__deepcopy__`` method. 

639 """ 

640 return DatasetType( 

641 name=deepcopy(self.name, memo), 

642 dimensions=deepcopy(self.dimensions, memo), 

643 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

644 parentStorageClass=deepcopy(self._parentStorageClass or self._parentStorageClassName, memo), 

645 isCalibration=deepcopy(self._isCalibration, memo), 

646 ) 

647 

648 

649def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

650 """Unpickle something by calling a factory. 

651 

652 Allows subclasses to unpickle using `__reduce__` with keyword 

653 arguments as well as positional arguments. 

654 """ 

655 return factory(*args, **kwargs)