Coverage for python/lsst/daf/butler/_dataset_type.py: 22%

244 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DatasetType", "SerializedDatasetType"] 

31 

32import re 

33from collections.abc import Callable, Iterable, Mapping 

34from copy import deepcopy 

35from types import MappingProxyType 

36from typing import TYPE_CHECKING, Any, ClassVar 

37 

38from pydantic import BaseModel, StrictBool, StrictStr 

39 

40from ._config_support import LookupKey 

41from ._storage_class import StorageClass, StorageClassFactory 

42from .dimensions import DimensionGraph, DimensionGroup, SerializedDimensionGraph 

43from .json import from_json_pydantic, to_json_pydantic 

44from .persistence_context import PersistenceContextVars 

45 

46if TYPE_CHECKING: 

47 from .dimensions import Dimension, DimensionUniverse 

48 from .registry import Registry 

49 

50 

51def _safeMakeMappingProxyType(data: Mapping | None) -> Mapping: 

52 if data is None: 

53 data = {} 

54 return MappingProxyType(data) 

55 

56 

57class SerializedDatasetType(BaseModel): 

58 """Simplified model of a `DatasetType` suitable for serialization.""" 

59 

60 name: StrictStr 

61 storageClass: StrictStr | None = None 

62 dimensions: SerializedDimensionGraph | list[StrictStr] | None = None 

63 parentStorageClass: StrictStr | None = None 

64 isCalibration: StrictBool = False 

65 

66 @classmethod 

67 def direct( 

68 cls, 

69 *, 

70 name: str, 

71 storageClass: str | None = None, 

72 dimensions: list | dict | None = None, 

73 parentStorageClass: str | None = None, 

74 isCalibration: bool = False, 

75 ) -> SerializedDatasetType: 

76 """Construct a `SerializedDatasetType` directly without validators. 

77 

78 This differs from Pydantic's model_construct method in that the 

79 arguments are explicitly what the model requires, and it will recurse 

80 through members, constructing them from their corresponding `direct` 

81 methods. 

82 

83 This method should only be called when the inputs are trusted. 

84 

85 Parameters 

86 ---------- 

87 name : `str` 

88 The name of the dataset type. 

89 storageClass : `str` or `None` 

90 The name of the storage class. 

91 dimensions : `list` or `dict` or `None` 

92 The dimensions associated with this dataset type. 

93 parentStorageClass : `str` or `None` 

94 The parent storage class name if this is a component. 

95 isCalibration : `bool` 

96 Whether this dataset type represents calibrations. 

97 

98 Returns 

99 ------- 

100 `SerializedDatasetType` 

101 A Pydantic model representing a dataset type. 

102 """ 

103 cache = PersistenceContextVars.serializedDatasetTypeMapping.get() 

104 key = (name, storageClass or "") 

105 if cache is not None and (type_ := cache.get(key, None)) is not None: 

106 return type_ 

107 

108 serialized_dimensions: list[str] | None 

109 match dimensions: 

110 case list(): 

111 serialized_dimensions = dimensions 

112 case dict(): 

113 serialized_dimensions = SerializedDimensionGraph.direct(**dimensions).names 

114 case None: 

115 serialized_dimensions = None 

116 

117 node = cls.model_construct( 

118 name=name, 

119 storageClass=storageClass, 

120 dimensions=serialized_dimensions, 

121 parentStorageClass=parentStorageClass, 

122 isCalibration=isCalibration, 

123 ) 

124 

125 if cache is not None: 

126 cache[key] = node 

127 return node 

128 

129 

130class DatasetType: 

131 r"""A named category of Datasets. 

132 

133 Defines how they are organized, related, and stored. 

134 

135 A concrete, final class whose instances represent `DatasetType`\ s. 

136 `DatasetType` instances may be constructed without a `Registry`, 

137 but they must be registered 

138 via `Registry.registerDatasetType()` before corresponding Datasets 

139 may be added. 

140 `DatasetType` instances are immutable. 

141 

142 Parameters 

143 ---------- 

144 name : `str` 

145 A string name for the Dataset; must correspond to the same 

146 `DatasetType` across all Registries. Names must start with an 

147 upper or lowercase letter, and may contain only letters, numbers, 

148 and underscores. Component dataset types should contain a single 

149 period separating the base dataset type name from the component name 

150 (and may be recursive). 

151 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

152 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

153 Dimensions used to label and relate instances of this `DatasetType`. 

154 If not a `DimensionGraph` or `DimensionGroup`, ``universe`` must be 

155 provided as well. 

156 storageClass : `StorageClass` or `str` 

157 Instance of a `StorageClass` or name of `StorageClass` that defines 

158 how this `DatasetType` is persisted. 

159 parentStorageClass : `StorageClass` or `str`, optional 

160 Instance of a `StorageClass` or name of `StorageClass` that defines 

161 how the composite parent is persisted. Must be `None` if this 

162 is not a component. 

163 universe : `DimensionUniverse`, optional 

164 Set of all known dimensions, used to normalize ``dimensions`` if it 

165 is not already a `DimensionGraph`. 

166 isCalibration : `bool`, optional 

167 If `True`, this dataset type may be included in 

168 `~CollectionType.CALIBRATION` collections. 

169 

170 Notes 

171 ----- 

172 See also :ref:`daf_butler_organizing_datasets`. 

173 """ 

174 

175 __slots__ = ( 

176 "_name", 

177 "_dimensions", 

178 "_storageClass", 

179 "_storageClassName", 

180 "_parentStorageClass", 

181 "_parentStorageClassName", 

182 "_isCalibration", 

183 ) 

184 

185 _serializedType: ClassVar[type[BaseModel]] = SerializedDatasetType 

186 

187 VALID_NAME_REGEX = re.compile("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*)*$") 

188 

189 @staticmethod 

190 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

191 """Form a valid DatasetTypeName from a parent and component. 

192 

193 No validation is performed. 

194 

195 Parameters 

196 ---------- 

197 datasetTypeName : `str` 

198 Base type name. 

199 componentName : `str` 

200 Name of component. 

201 

202 Returns 

203 ------- 

204 compTypeName : `str` 

205 Name to use for component DatasetType. 

206 """ 

207 return f"{datasetTypeName}.{componentName}" 

208 

209 def __init__( 

210 self, 

211 name: str, 

212 dimensions: DimensionGroup | DimensionGraph | Iterable[Dimension | str], 

213 storageClass: StorageClass | str, 

214 parentStorageClass: StorageClass | str | None = None, 

215 *, 

216 universe: DimensionUniverse | None = None, 

217 isCalibration: bool = False, 

218 ): 

219 if self.VALID_NAME_REGEX.match(name) is None: 

220 raise ValueError(f"DatasetType name '{name}' is invalid.") 

221 self._name = name 

222 universe = universe or getattr(dimensions, "universe", None) 

223 if universe is None: 

224 raise ValueError( 

225 "If dimensions is not a DimensionGroup or DimensionGraph, a universe must be provided." 

226 ) 

227 self._dimensions = universe.conform(dimensions) 

228 if name in self._dimensions.universe.governor_dimensions: 

229 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

230 if not isinstance(storageClass, StorageClass | str): 

231 raise ValueError(f"StorageClass argument must be StorageClass or str. Got {storageClass}") 

232 self._storageClass: StorageClass | None 

233 if isinstance(storageClass, StorageClass): 

234 self._storageClass = storageClass 

235 self._storageClassName = storageClass.name 

236 else: 

237 self._storageClass = None 

238 self._storageClassName = storageClass 

239 

240 self._parentStorageClass: StorageClass | None = None 

241 self._parentStorageClassName: str | None = None 

242 if parentStorageClass is not None: 

243 if not isinstance(storageClass, StorageClass | str): 

244 raise ValueError( 

245 f"Parent StorageClass argument must be StorageClass or str. Got {parentStorageClass}" 

246 ) 

247 

248 # Only allowed for a component dataset type 

249 _, componentName = self.splitDatasetTypeName(self._name) 

250 if componentName is None: 

251 raise ValueError( 

252 f"Can not specify a parent storage class if this is not a component ({self._name})" 

253 ) 

254 if isinstance(parentStorageClass, StorageClass): 

255 self._parentStorageClass = parentStorageClass 

256 self._parentStorageClassName = parentStorageClass.name 

257 else: 

258 self._parentStorageClassName = parentStorageClass 

259 

260 # Ensure that parent storage class is specified when we have 

261 # a component and is not specified when we don't 

262 _, componentName = self.splitDatasetTypeName(self._name) 

263 if parentStorageClass is None and componentName is not None: 

264 raise ValueError( 

265 f"Component dataset type '{self._name}' constructed without parent storage class" 

266 ) 

267 if parentStorageClass is not None and componentName is None: 

268 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

269 self._isCalibration = isCalibration 

270 

271 def __repr__(self) -> str: 

272 extra = "" 

273 if self._parentStorageClassName: 

274 extra = f", parentStorageClass={self._parentStorageClassName}" 

275 if self._isCalibration: 

276 extra += ", isCalibration=True" 

277 return f"DatasetType({self.name!r}, {self._dimensions}, {self._storageClassName}{extra})" 

278 

279 def _equal_ignoring_storage_class(self, other: Any) -> bool: 

280 """Check everything is equal except the storage class. 

281 

282 Parameters 

283 ---------- 

284 other : Any 

285 Object to check against this one. 

286 

287 Returns 

288 ------- 

289 mostly : `bool` 

290 Returns `True` if everything except the storage class is equal. 

291 """ 

292 if not isinstance(other, type(self)): 

293 return False 

294 if self._name != other._name: 

295 return False 

296 if self._dimensions != other._dimensions: 

297 return False 

298 if self._isCalibration != other._isCalibration: 

299 return False 

300 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

301 return self._parentStorageClass == other._parentStorageClass 

302 else: 

303 return self._parentStorageClassName == other._parentStorageClassName 

304 

305 def __eq__(self, other: Any) -> bool: 

306 mostly_equal = self._equal_ignoring_storage_class(other) 

307 if not mostly_equal: 

308 return False 

309 

310 # Be careful not to force a storage class to import the corresponding 

311 # python code. 

312 if self._storageClass is not None and other._storageClass is not None: 

313 if self._storageClass != other._storageClass: 

314 return False 

315 else: 

316 if self._storageClassName != other._storageClassName: 

317 return False 

318 return True 

319 

320 def is_compatible_with(self, other: DatasetType) -> bool: 

321 """Determine if the given `DatasetType` is compatible with this one. 

322 

323 Compatibility requires a matching name and dimensions and a storage 

324 class for this dataset type that can convert the python type associated 

325 with the other storage class to this python type. 

326 

327 Parameters 

328 ---------- 

329 other : `DatasetType` 

330 Dataset type to check. 

331 

332 Returns 

333 ------- 

334 is_compatible : `bool` 

335 Returns `True` if the other dataset type is either the same as this 

336 or the storage class associated with the other can be converted to 

337 this. 

338 """ 

339 mostly_equal = self._equal_ignoring_storage_class(other) 

340 if not mostly_equal: 

341 return False 

342 

343 # If the storage class names match then they are compatible. 

344 if self._storageClassName == other._storageClassName: 

345 return True 

346 

347 # Now required to check the full storage class. 

348 self_sc = self.storageClass 

349 other_sc = other.storageClass 

350 

351 return self_sc.can_convert(other_sc) 

352 

353 def __hash__(self) -> int: 

354 """Hash DatasetType instance. 

355 

356 This only uses StorageClass name which is it consistent with the 

357 implementation of StorageClass hash method. 

358 """ 

359 return hash((self._name, self._dimensions, self._storageClassName, self._parentStorageClassName)) 

360 

361 def __lt__(self, other: Any) -> bool: 

362 """Sort using the dataset type name.""" 

363 if not isinstance(other, type(self)): 

364 return NotImplemented 

365 return self.name < other.name 

366 

367 @property 

368 def name(self) -> str: 

369 """Return a string name for the Dataset. 

370 

371 Must correspond to the same `DatasetType` across all Registries. 

372 """ 

373 return self._name 

374 

375 @property 

376 def dimensions(self) -> DimensionGraph: 

377 """Return the dimensions of this dataset type (`DimensionGraph`). 

378 

379 The dimensions of a define the keys of its datasets' data IDs.. 

380 """ 

381 return self._dimensions._as_graph() 

382 

383 @property 

384 def storageClass(self) -> StorageClass: 

385 """Return `StorageClass` instance associated with this dataset type. 

386 

387 The `StorageClass` defines how this `DatasetType` 

388 is persisted. Note that if DatasetType was constructed with a name 

389 of a StorageClass then Butler has to be initialized before using 

390 this property. 

391 """ 

392 if self._storageClass is None: 

393 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

394 return self._storageClass 

395 

396 @property 

397 def storageClass_name(self) -> str: 

398 """Return the storage class name. 

399 

400 This will never force the storage class to be imported. 

401 """ 

402 return self._storageClassName 

403 

404 @property 

405 def parentStorageClass(self) -> StorageClass | None: 

406 """Return the storage class of the composite containing this component. 

407 

408 Note that if DatasetType was constructed with a name of a 

409 StorageClass then Butler has to be initialized before using this 

410 property. Can be `None` if this is not a component of a composite. 

411 Must be defined if this is a component. 

412 """ 

413 if self._parentStorageClass is None and self._parentStorageClassName is None: 

414 return None 

415 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

416 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

417 return self._parentStorageClass 

418 

419 def isCalibration(self) -> bool: 

420 """Return if datasets of this type can be in calibration collections. 

421 

422 Returns 

423 ------- 

424 flag : `bool` 

425 `True` if datasets of this type may be included in calibration 

426 collections. 

427 """ 

428 return self._isCalibration 

429 

430 @staticmethod 

431 def splitDatasetTypeName(datasetTypeName: str) -> tuple[str, str | None]: 

432 """Return the root name and the component from a composite name. 

433 

434 Parameters 

435 ---------- 

436 datasetTypeName : `str` 

437 The name of the dataset type, can include a component using 

438 a "."-separator. 

439 

440 Returns 

441 ------- 

442 rootName : `str` 

443 Root name without any components. 

444 componentName : `str` 

445 The component if it has been specified, else `None`. 

446 

447 Notes 

448 ----- 

449 If the dataset type name is ``a.b.c`` this method will return a 

450 root name of ``a`` and a component name of ``b.c``. 

451 """ 

452 comp = None 

453 root = datasetTypeName 

454 if "." in root: 

455 # If there is doubt, the component is after the first "." 

456 root, comp = root.split(".", maxsplit=1) 

457 return root, comp 

458 

459 def nameAndComponent(self) -> tuple[str, str | None]: 

460 """Return the root name of this dataset type and any component. 

461 

462 Returns 

463 ------- 

464 rootName : `str` 

465 Root name for this `DatasetType` without any components. 

466 componentName : `str` 

467 The component if it has been specified, else `None`. 

468 """ 

469 return self.splitDatasetTypeName(self.name) 

470 

471 def component(self) -> str | None: 

472 """Return the component name (if defined). 

473 

474 Returns 

475 ------- 

476 comp : `str` 

477 Name of component part of DatasetType name. `None` if this 

478 `DatasetType` is not associated with a component. 

479 """ 

480 _, comp = self.nameAndComponent() 

481 return comp 

482 

483 def componentTypeName(self, component: str) -> str: 

484 """Derive a component dataset type from a composite. 

485 

486 Parameters 

487 ---------- 

488 component : `str` 

489 Name of component. 

490 

491 Returns 

492 ------- 

493 derived : `str` 

494 Compound name of this `DatasetType` and the component. 

495 

496 Raises 

497 ------ 

498 KeyError 

499 Requested component is not supported by this `DatasetType`. 

500 """ 

501 if component in self.storageClass.allComponents(): 

502 return self.nameWithComponent(self.name, component) 

503 raise KeyError(f"Requested component ({component}) not understood by this DatasetType ({self})") 

504 

505 def makeCompositeDatasetType(self) -> DatasetType: 

506 """Return a composite dataset type from the component. 

507 

508 Returns 

509 ------- 

510 composite : `DatasetType` 

511 The composite dataset type. 

512 

513 Raises 

514 ------ 

515 RuntimeError 

516 Raised if this dataset type is not a component dataset type. 

517 """ 

518 if not self.isComponent(): 

519 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

520 composite_name, _ = self.nameAndComponent() 

521 if self.parentStorageClass is None: 

522 raise ValueError( 

523 f"Parent storage class is not set. Unable to create composite type from {self.name}" 

524 ) 

525 return DatasetType( 

526 composite_name, 

527 dimensions=self._dimensions, 

528 storageClass=self.parentStorageClass, 

529 isCalibration=self.isCalibration(), 

530 ) 

531 

532 def makeComponentDatasetType(self, component: str) -> DatasetType: 

533 """Return a component dataset type from a composite. 

534 

535 Assumes the same dimensions as the parent. 

536 

537 Parameters 

538 ---------- 

539 component : `str` 

540 Name of component. 

541 

542 Returns 

543 ------- 

544 datasetType : `DatasetType` 

545 A new DatasetType instance. 

546 """ 

547 # The component could be a read/write or read component 

548 return DatasetType( 

549 self.componentTypeName(component), 

550 dimensions=self._dimensions, 

551 storageClass=self.storageClass.allComponents()[component], 

552 parentStorageClass=self.storageClass, 

553 isCalibration=self.isCalibration(), 

554 ) 

555 

556 def makeAllComponentDatasetTypes(self) -> list[DatasetType]: 

557 """Return all component dataset types for this composite. 

558 

559 Returns 

560 ------- 

561 all : `list` of `DatasetType` 

562 All the component dataset types. If this is not a composite 

563 then returns an empty list. 

564 """ 

565 return [ 

566 self.makeComponentDatasetType(componentName) 

567 for componentName in self.storageClass.allComponents() 

568 ] 

569 

570 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetType: 

571 """Create a new `DatasetType` from this one but with an updated 

572 `StorageClass`. 

573 

574 Parameters 

575 ---------- 

576 storageClass : `str` or `StorageClass` 

577 The new storage class. 

578 

579 Returns 

580 ------- 

581 modified : `DatasetType` 

582 A dataset type that is the same as the current one but with a 

583 different storage class. Will be ``self`` if the given storage 

584 class is the current one. 

585 

586 Notes 

587 ----- 

588 If this is a component dataset type, the parent storage class will be 

589 retained. 

590 """ 

591 if storageClass == self._storageClassName or storageClass == self._storageClass: 

592 return self 

593 parent = self._parentStorageClass if self._parentStorageClass else self._parentStorageClassName 

594 new = DatasetType( 

595 self.name, 

596 dimensions=self._dimensions, 

597 storageClass=storageClass, 

598 parentStorageClass=parent, 

599 isCalibration=self.isCalibration(), 

600 ) 

601 # Check validity. 

602 if new.is_compatible_with(self) or self.is_compatible_with(new): 

603 return new 

604 raise ValueError( 

605 f"The new storage class ({new.storageClass}) is not compatible with the " 

606 f"existing storage class ({self.storageClass})." 

607 ) 

608 

609 def isComponent(self) -> bool: 

610 """Return whether this `DatasetType` refers to a component. 

611 

612 Returns 

613 ------- 

614 isComponent : `bool` 

615 `True` if this `DatasetType` is a component, `False` otherwise. 

616 """ 

617 if self.component(): 

618 return True 

619 return False 

620 

621 def isComposite(self) -> bool: 

622 """Return whether this `DatasetType` is a composite. 

623 

624 Returns 

625 ------- 

626 isComposite : `bool` 

627 `True` if this `DatasetType` is a composite type, `False` 

628 otherwise. 

629 """ 

630 return self.storageClass.isComposite() 

631 

632 def _lookupNames(self) -> tuple[LookupKey, ...]: 

633 """Return name keys to use for lookups in configurations. 

634 

635 The names are returned in order of priority. 

636 

637 Returns 

638 ------- 

639 names : `tuple` of `LookupKey` 

640 Tuple of the `DatasetType` name and the `StorageClass` name. 

641 If the name includes a component the name with the component 

642 is first, then the name without the component and finally 

643 the storage class name and the storage class name of the 

644 composite. 

645 """ 

646 rootName, componentName = self.nameAndComponent() 

647 lookups: tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

648 if componentName is not None: 

649 lookups = lookups + (LookupKey(name=rootName),) 

650 

651 if self._dimensions: 

652 # Dimensions are a lower priority than dataset type name 

653 lookups = lookups + (LookupKey(dimensions=self._dimensions),) 

654 

655 storageClasses = self.storageClass._lookupNames() 

656 if componentName is not None and self.parentStorageClass is not None: 

657 storageClasses += self.parentStorageClass._lookupNames() 

658 

659 return lookups + storageClasses 

660 

661 def to_simple(self, minimal: bool = False) -> SerializedDatasetType: 

662 """Convert this class to a simple python type. 

663 

664 This makes it suitable for serialization. 

665 

666 Parameters 

667 ---------- 

668 minimal : `bool`, optional 

669 Use minimal serialization. Requires Registry to convert 

670 back to a full type. 

671 

672 Returns 

673 ------- 

674 simple : `SerializedDatasetType` 

675 The object converted to a class suitable for serialization. 

676 """ 

677 as_dict: dict[str, Any] 

678 if minimal: 

679 # Only needs the name. 

680 as_dict = {"name": self.name} 

681 else: 

682 # Convert to a dict form 

683 as_dict = { 

684 "name": self.name, 

685 "storageClass": self._storageClassName, 

686 "isCalibration": self._isCalibration, 

687 "dimensions": list(self._dimensions.names), 

688 } 

689 

690 if self._parentStorageClassName is not None: 

691 as_dict["parentStorageClass"] = self._parentStorageClassName 

692 return SerializedDatasetType(**as_dict) 

693 

694 @classmethod 

695 def from_simple( 

696 cls, 

697 simple: SerializedDatasetType, 

698 universe: DimensionUniverse | None = None, 

699 registry: Registry | None = None, 

700 ) -> DatasetType: 

701 """Construct a new object from the simplified form. 

702 

703 This is usually data returned from the `to_simple` method. 

704 

705 Parameters 

706 ---------- 

707 simple : `SerializedDatasetType` 

708 The value returned by `to_simple()`. 

709 universe : `DimensionUniverse` 

710 The special graph of all known dimensions of which this graph will 

711 be a subset. Can be `None` if a registry is provided. 

712 registry : `lsst.daf.butler.Registry`, optional 

713 Registry to use to convert simple name of a DatasetType to 

714 a full `DatasetType`. Can be `None` if a full description of 

715 the type is provided along with a universe. 

716 

717 Returns 

718 ------- 

719 datasetType : `DatasetType` 

720 Newly-constructed object. 

721 """ 

722 # check to see if there is a cache, and if there is, if there is a 

723 # cached dataset type 

724 cache = PersistenceContextVars.loadedTypes.get() 

725 key = (simple.name, simple.storageClass or "") 

726 if cache is not None and (type_ := cache.get(key, None)) is not None: 

727 return type_ 

728 

729 if simple.storageClass is None: 

730 # Treat this as minimalist representation 

731 if registry is None: 

732 raise ValueError( 

733 f"Unable to convert a DatasetType name '{simple}' to DatasetType without a Registry" 

734 ) 

735 return registry.getDatasetType(simple.name) 

736 

737 if universe is None and registry is None: 

738 raise ValueError("One of universe or registry must be provided.") 

739 

740 if universe is None and registry is not None: 

741 # registry should not be none by now but test helps mypy 

742 universe = registry.dimensions 

743 

744 if universe is None: 

745 # this is for mypy 

746 raise ValueError("Unable to determine a usable universe") 

747 

748 match simple.dimensions: 

749 case list(): 

750 dimensions = universe.conform(simple.dimensions) 

751 case SerializedDimensionGraph(): 

752 dimensions = universe.conform(simple.dimensions.names) 

753 case None: 

754 raise ValueError(f"Dimensions must be specified in {simple}") 

755 

756 newType = cls( 

757 name=simple.name, 

758 dimensions=dimensions, 

759 storageClass=simple.storageClass, 

760 isCalibration=simple.isCalibration, 

761 parentStorageClass=simple.parentStorageClass, 

762 universe=universe, 

763 ) 

764 if cache is not None: 

765 cache[key] = newType 

766 return newType 

767 

768 to_json = to_json_pydantic 

769 from_json: ClassVar = classmethod(from_json_pydantic) 

770 

771 def __reduce__( 

772 self, 

773 ) -> tuple[ 

774 Callable, tuple[type[DatasetType], tuple[str, DimensionGroup, str, str | None], dict[str, bool]] 

775 ]: 

776 """Support pickling. 

777 

778 StorageClass instances can not normally be pickled, so we pickle 

779 StorageClass name instead of instance. 

780 """ 

781 return _unpickle_via_factory, ( 

782 self.__class__, 

783 (self.name, self._dimensions, self._storageClassName, self._parentStorageClassName), 

784 {"isCalibration": self._isCalibration}, 

785 ) 

786 

787 def __deepcopy__(self, memo: Any) -> DatasetType: 

788 """Support for deep copy method. 

789 

790 Normally ``deepcopy`` will use pickle mechanism to make copies. 

791 We want to avoid that to support (possibly degenerate) use case when 

792 DatasetType is constructed with StorageClass instance which is not 

793 registered with StorageClassFactory (this happens in unit tests). 

794 Instead we re-implement ``__deepcopy__`` method. 

795 """ 

796 return DatasetType( 

797 name=deepcopy(self.name, memo), 

798 dimensions=deepcopy(self._dimensions, memo), 

799 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

800 parentStorageClass=deepcopy(self._parentStorageClass or self._parentStorageClassName, memo), 

801 isCalibration=deepcopy(self._isCalibration, memo), 

802 ) 

803 

804 

805def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

806 """Unpickle something by calling a factory. 

807 

808 Allows subclasses to unpickle using `__reduce__` with keyword 

809 arguments as well as positional arguments. 

810 """ 

811 return factory(*args, **kwargs)