Coverage for python/lsst/daf/butler/_dataset_type.py: 23%

245 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-05 11:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ["DatasetType", "SerializedDatasetType"] 

31 

32import re 

33from collections.abc import Callable, Iterable, Mapping 

34from copy import deepcopy 

35from types import MappingProxyType 

36from typing import TYPE_CHECKING, Any, ClassVar 

37 

38from lsst.daf.butler._compat import _BaseModelCompat 

39from pydantic import StrictBool, StrictStr 

40 

41from ._config_support import LookupKey 

42from ._storage_class import StorageClass, StorageClassFactory 

43from .dimensions import DimensionGraph, DimensionGroup, SerializedDimensionGraph 

44from .json import from_json_pydantic, to_json_pydantic 

45from .persistence_context import PersistenceContextVars 

46 

47if TYPE_CHECKING: 

48 from .dimensions import Dimension, DimensionUniverse 

49 from .registry import Registry 

50 

51 

52def _safeMakeMappingProxyType(data: Mapping | None) -> Mapping: 

53 if data is None: 

54 data = {} 

55 return MappingProxyType(data) 

56 

57 

58class SerializedDatasetType(_BaseModelCompat): 

59 """Simplified model of a `DatasetType` suitable for serialization.""" 

60 

61 name: StrictStr 

62 storageClass: StrictStr | None = None 

63 dimensions: SerializedDimensionGraph | list[StrictStr] | None = None 

64 parentStorageClass: StrictStr | None = None 

65 isCalibration: StrictBool = False 

66 

67 @classmethod 

68 def direct( 

69 cls, 

70 *, 

71 name: str, 

72 storageClass: str | None = None, 

73 dimensions: list | dict | None = None, 

74 parentStorageClass: str | None = None, 

75 isCalibration: bool = False, 

76 ) -> SerializedDatasetType: 

77 """Construct a `SerializedDatasetType` directly without validators. 

78 

79 This differs from Pydantic's model_construct method in that the 

80 arguments are explicitly what the model requires, and it will recurse 

81 through members, constructing them from their corresponding `direct` 

82 methods. 

83 

84 This method should only be called when the inputs are trusted. 

85 """ 

86 cache = PersistenceContextVars.serializedDatasetTypeMapping.get() 

87 key = (name, storageClass or "") 

88 if cache is not None and (type_ := cache.get(key, None)) is not None: 

89 return type_ 

90 

91 serialized_dimensions: list[str] | None 

92 match dimensions: 

93 case list(): 

94 serialized_dimensions = dimensions 

95 case dict(): 

96 serialized_dimensions = SerializedDimensionGraph.direct(**dimensions).names 

97 case None: 

98 serialized_dimensions = None 

99 

100 node = cls.model_construct( 

101 name=name, 

102 storageClass=storageClass, 

103 dimensions=serialized_dimensions, 

104 parentStorageClass=parentStorageClass, 

105 isCalibration=isCalibration, 

106 ) 

107 

108 if cache is not None: 

109 cache[key] = node 

110 return node 

111 

112 

113class DatasetType: 

114 r"""A named category of Datasets. 

115 

116 Defines how they are organized, related, and stored. 

117 

118 A concrete, final class whose instances represent `DatasetType`\ s. 

119 `DatasetType` instances may be constructed without a `Registry`, 

120 but they must be registered 

121 via `Registry.registerDatasetType()` before corresponding Datasets 

122 may be added. 

123 `DatasetType` instances are immutable. 

124 

125 Parameters 

126 ---------- 

127 name : `str` 

128 A string name for the Dataset; must correspond to the same 

129 `DatasetType` across all Registries. Names must start with an 

130 upper or lowercase letter, and may contain only letters, numbers, 

131 and underscores. Component dataset types should contain a single 

132 period separating the base dataset type name from the component name 

133 (and may be recursive). 

134 dimensions : `DimensionGroup`, `DimensionGraph`, or \ 

135 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

136 Dimensions used to label and relate instances of this `DatasetType`. 

137 If not a `DimensionGraph` or `DimensionGroup`, ``universe`` must be 

138 provided as well. 

139 storageClass : `StorageClass` or `str` 

140 Instance of a `StorageClass` or name of `StorageClass` that defines 

141 how this `DatasetType` is persisted. 

142 parentStorageClass : `StorageClass` or `str`, optional 

143 Instance of a `StorageClass` or name of `StorageClass` that defines 

144 how the composite parent is persisted. Must be `None` if this 

145 is not a component. 

146 universe : `DimensionUniverse`, optional 

147 Set of all known dimensions, used to normalize ``dimensions`` if it 

148 is not already a `DimensionGraph`. 

149 isCalibration : `bool`, optional 

150 If `True`, this dataset type may be included in 

151 `~CollectionType.CALIBRATION` collections. 

152 

153 See Also 

154 -------- 

155 :ref:`daf_butler_organizing_datasets` 

156 """ 

157 

158 __slots__ = ( 

159 "_name", 

160 "_dimensions", 

161 "_storageClass", 

162 "_storageClassName", 

163 "_parentStorageClass", 

164 "_parentStorageClassName", 

165 "_isCalibration", 

166 ) 

167 

168 _serializedType = SerializedDatasetType 

169 

170 VALID_NAME_REGEX = re.compile("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*)*$") 

171 

172 @staticmethod 

173 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

174 """Form a valid DatasetTypeName from a parent and component. 

175 

176 No validation is performed. 

177 

178 Parameters 

179 ---------- 

180 datasetTypeName : `str` 

181 Base type name. 

182 componentName : `str` 

183 Name of component. 

184 

185 Returns 

186 ------- 

187 compTypeName : `str` 

188 Name to use for component DatasetType. 

189 """ 

190 return f"{datasetTypeName}.{componentName}" 

191 

192 def __init__( 

193 self, 

194 name: str, 

195 dimensions: DimensionGroup | DimensionGraph | Iterable[Dimension | str], 

196 storageClass: StorageClass | str, 

197 parentStorageClass: StorageClass | str | None = None, 

198 *, 

199 universe: DimensionUniverse | None = None, 

200 isCalibration: bool = False, 

201 ): 

202 if self.VALID_NAME_REGEX.match(name) is None: 

203 raise ValueError(f"DatasetType name '{name}' is invalid.") 

204 self._name = name 

205 universe = universe or getattr(dimensions, "universe", None) 

206 if universe is None: 

207 raise ValueError( 

208 "If dimensions is not a DimensionGroup or DimensionGraph, a universe must be provided." 

209 ) 

210 self._dimensions = universe.conform(dimensions) 

211 if name in self._dimensions.universe.governor_dimensions: 

212 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

213 if not isinstance(storageClass, StorageClass | str): 

214 raise ValueError(f"StorageClass argument must be StorageClass or str. Got {storageClass}") 

215 self._storageClass: StorageClass | None 

216 if isinstance(storageClass, StorageClass): 

217 self._storageClass = storageClass 

218 self._storageClassName = storageClass.name 

219 else: 

220 self._storageClass = None 

221 self._storageClassName = storageClass 

222 

223 self._parentStorageClass: StorageClass | None = None 

224 self._parentStorageClassName: str | None = None 

225 if parentStorageClass is not None: 

226 if not isinstance(storageClass, StorageClass | str): 

227 raise ValueError( 

228 f"Parent StorageClass argument must be StorageClass or str. Got {parentStorageClass}" 

229 ) 

230 

231 # Only allowed for a component dataset type 

232 _, componentName = self.splitDatasetTypeName(self._name) 

233 if componentName is None: 

234 raise ValueError( 

235 f"Can not specify a parent storage class if this is not a component ({self._name})" 

236 ) 

237 if isinstance(parentStorageClass, StorageClass): 

238 self._parentStorageClass = parentStorageClass 

239 self._parentStorageClassName = parentStorageClass.name 

240 else: 

241 self._parentStorageClassName = parentStorageClass 

242 

243 # Ensure that parent storage class is specified when we have 

244 # a component and is not specified when we don't 

245 _, componentName = self.splitDatasetTypeName(self._name) 

246 if parentStorageClass is None and componentName is not None: 

247 raise ValueError( 

248 f"Component dataset type '{self._name}' constructed without parent storage class" 

249 ) 

250 if parentStorageClass is not None and componentName is None: 

251 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

252 self._isCalibration = isCalibration 

253 

254 def __repr__(self) -> str: 

255 extra = "" 

256 if self._parentStorageClassName: 

257 extra = f", parentStorageClass={self._parentStorageClassName}" 

258 if self._isCalibration: 

259 extra += ", isCalibration=True" 

260 return f"DatasetType({self.name!r}, {self._dimensions}, {self._storageClassName}{extra})" 

261 

262 def _equal_ignoring_storage_class(self, other: Any) -> bool: 

263 """Check everything is equal except the storage class. 

264 

265 Parameters 

266 ---------- 

267 other : Any 

268 Object to check against this one. 

269 

270 Returns 

271 ------- 

272 mostly : `bool` 

273 Returns `True` if everything except the storage class is equal. 

274 """ 

275 if not isinstance(other, type(self)): 

276 return False 

277 if self._name != other._name: 

278 return False 

279 if self._dimensions != other._dimensions: 

280 return False 

281 if self._isCalibration != other._isCalibration: 

282 return False 

283 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

284 return self._parentStorageClass == other._parentStorageClass 

285 else: 

286 return self._parentStorageClassName == other._parentStorageClassName 

287 

288 def __eq__(self, other: Any) -> bool: 

289 mostly_equal = self._equal_ignoring_storage_class(other) 

290 if not mostly_equal: 

291 return False 

292 

293 # Be careful not to force a storage class to import the corresponding 

294 # python code. 

295 if self._storageClass is not None and other._storageClass is not None: 

296 if self._storageClass != other._storageClass: 

297 return False 

298 else: 

299 if self._storageClassName != other._storageClassName: 

300 return False 

301 return True 

302 

303 def is_compatible_with(self, other: DatasetType) -> bool: 

304 """Determine if the given `DatasetType` is compatible with this one. 

305 

306 Compatibility requires a matching name and dimensions and a storage 

307 class for this dataset type that can convert the python type associated 

308 with the other storage class to this python type. 

309 

310 Parameters 

311 ---------- 

312 other : `DatasetType` 

313 Dataset type to check. 

314 

315 Returns 

316 ------- 

317 is_compatible : `bool` 

318 Returns `True` if the other dataset type is either the same as this 

319 or the storage class associated with the other can be converted to 

320 this. 

321 """ 

322 mostly_equal = self._equal_ignoring_storage_class(other) 

323 if not mostly_equal: 

324 return False 

325 

326 # If the storage class names match then they are compatible. 

327 if self._storageClassName == other._storageClassName: 

328 return True 

329 

330 # Now required to check the full storage class. 

331 self_sc = self.storageClass 

332 other_sc = other.storageClass 

333 

334 return self_sc.can_convert(other_sc) 

335 

336 def __hash__(self) -> int: 

337 """Hash DatasetType instance. 

338 

339 This only uses StorageClass name which is it consistent with the 

340 implementation of StorageClass hash method. 

341 """ 

342 return hash((self._name, self._dimensions, self._storageClassName, self._parentStorageClassName)) 

343 

344 def __lt__(self, other: Any) -> bool: 

345 """Sort using the dataset type name.""" 

346 if not isinstance(other, type(self)): 

347 return NotImplemented 

348 return self.name < other.name 

349 

350 @property 

351 def name(self) -> str: 

352 """Return a string name for the Dataset. 

353 

354 Must correspond to the same `DatasetType` across all Registries. 

355 """ 

356 return self._name 

357 

358 @property 

359 def dimensions(self) -> DimensionGraph: 

360 """Return the dimensions of this dataset type (`DimensionGraph`). 

361 

362 The dimensions of a define the keys of its datasets' data IDs.. 

363 """ 

364 return self._dimensions._as_graph() 

365 

366 @property 

367 def storageClass(self) -> StorageClass: 

368 """Return `StorageClass` instance associated with this dataset type. 

369 

370 The `StorageClass` defines how this `DatasetType` 

371 is persisted. Note that if DatasetType was constructed with a name 

372 of a StorageClass then Butler has to be initialized before using 

373 this property. 

374 """ 

375 if self._storageClass is None: 

376 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

377 return self._storageClass 

378 

379 @property 

380 def storageClass_name(self) -> str: 

381 """Return the storage class name. 

382 

383 This will never force the storage class to be imported. 

384 """ 

385 return self._storageClassName 

386 

387 @property 

388 def parentStorageClass(self) -> StorageClass | None: 

389 """Return the storage class of the composite containing this component. 

390 

391 Note that if DatasetType was constructed with a name of a 

392 StorageClass then Butler has to be initialized before using this 

393 property. Can be `None` if this is not a component of a composite. 

394 Must be defined if this is a component. 

395 """ 

396 if self._parentStorageClass is None and self._parentStorageClassName is None: 

397 return None 

398 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

399 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

400 return self._parentStorageClass 

401 

402 def isCalibration(self) -> bool: 

403 """Return if datasets of this type can be in calibration collections. 

404 

405 Returns 

406 ------- 

407 flag : `bool` 

408 `True` if datasets of this type may be included in calibration 

409 collections. 

410 """ 

411 return self._isCalibration 

412 

413 @staticmethod 

414 def splitDatasetTypeName(datasetTypeName: str) -> tuple[str, str | None]: 

415 """Return the root name and the component from a composite name. 

416 

417 Parameters 

418 ---------- 

419 datasetTypeName : `str` 

420 The name of the dataset type, can include a component using 

421 a "."-separator. 

422 

423 Returns 

424 ------- 

425 rootName : `str` 

426 Root name without any components. 

427 componentName : `str` 

428 The component if it has been specified, else `None`. 

429 

430 Notes 

431 ----- 

432 If the dataset type name is ``a.b.c`` this method will return a 

433 root name of ``a`` and a component name of ``b.c``. 

434 """ 

435 comp = None 

436 root = datasetTypeName 

437 if "." in root: 

438 # If there is doubt, the component is after the first "." 

439 root, comp = root.split(".", maxsplit=1) 

440 return root, comp 

441 

442 def nameAndComponent(self) -> tuple[str, str | None]: 

443 """Return the root name of this dataset type and any component. 

444 

445 Returns 

446 ------- 

447 rootName : `str` 

448 Root name for this `DatasetType` without any components. 

449 componentName : `str` 

450 The component if it has been specified, else `None`. 

451 """ 

452 return self.splitDatasetTypeName(self.name) 

453 

454 def component(self) -> str | None: 

455 """Return the component name (if defined). 

456 

457 Returns 

458 ------- 

459 comp : `str` 

460 Name of component part of DatasetType name. `None` if this 

461 `DatasetType` is not associated with a component. 

462 """ 

463 _, comp = self.nameAndComponent() 

464 return comp 

465 

466 def componentTypeName(self, component: str) -> str: 

467 """Derive a component dataset type from a composite. 

468 

469 Parameters 

470 ---------- 

471 component : `str` 

472 Name of component 

473 

474 Returns 

475 ------- 

476 derived : `str` 

477 Compound name of this `DatasetType` and the component. 

478 

479 Raises 

480 ------ 

481 KeyError 

482 Requested component is not supported by this `DatasetType`. 

483 """ 

484 if component in self.storageClass.allComponents(): 

485 return self.nameWithComponent(self.name, component) 

486 raise KeyError(f"Requested component ({component}) not understood by this DatasetType ({self})") 

487 

488 def makeCompositeDatasetType(self) -> DatasetType: 

489 """Return a composite dataset type from the component. 

490 

491 Returns 

492 ------- 

493 composite : `DatasetType` 

494 The composite dataset type. 

495 

496 Raises 

497 ------ 

498 RuntimeError 

499 Raised if this dataset type is not a component dataset type. 

500 """ 

501 if not self.isComponent(): 

502 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

503 composite_name, _ = self.nameAndComponent() 

504 if self.parentStorageClass is None: 

505 raise ValueError( 

506 f"Parent storage class is not set. Unable to create composite type from {self.name}" 

507 ) 

508 return DatasetType( 

509 composite_name, 

510 dimensions=self._dimensions, 

511 storageClass=self.parentStorageClass, 

512 isCalibration=self.isCalibration(), 

513 ) 

514 

515 def makeComponentDatasetType(self, component: str) -> DatasetType: 

516 """Return a component dataset type from a composite. 

517 

518 Assumes the same dimensions as the parent. 

519 

520 Parameters 

521 ---------- 

522 component : `str` 

523 Name of component 

524 

525 Returns 

526 ------- 

527 datasetType : `DatasetType` 

528 A new DatasetType instance. 

529 """ 

530 # The component could be a read/write or read component 

531 return DatasetType( 

532 self.componentTypeName(component), 

533 dimensions=self._dimensions, 

534 storageClass=self.storageClass.allComponents()[component], 

535 parentStorageClass=self.storageClass, 

536 isCalibration=self.isCalibration(), 

537 ) 

538 

539 def makeAllComponentDatasetTypes(self) -> list[DatasetType]: 

540 """Return all component dataset types for this composite. 

541 

542 Returns 

543 ------- 

544 all : `list` of `DatasetType` 

545 All the component dataset types. If this is not a composite 

546 then returns an empty list. 

547 """ 

548 return [ 

549 self.makeComponentDatasetType(componentName) 

550 for componentName in self.storageClass.allComponents() 

551 ] 

552 

553 def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetType: 

554 """Create a new `DatasetType` from this one but with an updated 

555 `StorageClass`. 

556 

557 Parameters 

558 ---------- 

559 storageClass : `str` or `StorageClass` 

560 The new storage class. 

561 

562 Returns 

563 ------- 

564 modified : `DatasetType` 

565 A dataset type that is the same as the current one but with a 

566 different storage class. Will be ``self`` if the given storage 

567 class is the current one. 

568 

569 Notes 

570 ----- 

571 If this is a component dataset type, the parent storage class will be 

572 retained. 

573 """ 

574 if storageClass == self._storageClassName or storageClass == self._storageClass: 

575 return self 

576 parent = self._parentStorageClass if self._parentStorageClass else self._parentStorageClassName 

577 new = DatasetType( 

578 self.name, 

579 dimensions=self._dimensions, 

580 storageClass=storageClass, 

581 parentStorageClass=parent, 

582 isCalibration=self.isCalibration(), 

583 ) 

584 # Check validity. 

585 if new.is_compatible_with(self) or self.is_compatible_with(new): 

586 return new 

587 raise ValueError( 

588 f"The new storage class ({new.storageClass}) is not compatible with the " 

589 f"existing storage class ({self.storageClass})." 

590 ) 

591 

592 def isComponent(self) -> bool: 

593 """Return whether this `DatasetType` refers to a component. 

594 

595 Returns 

596 ------- 

597 isComponent : `bool` 

598 `True` if this `DatasetType` is a component, `False` otherwise. 

599 """ 

600 if self.component(): 

601 return True 

602 return False 

603 

604 def isComposite(self) -> bool: 

605 """Return whether this `DatasetType` is a composite. 

606 

607 Returns 

608 ------- 

609 isComposite : `bool` 

610 `True` if this `DatasetType` is a composite type, `False` 

611 otherwise. 

612 """ 

613 return self.storageClass.isComposite() 

614 

615 def _lookupNames(self) -> tuple[LookupKey, ...]: 

616 """Return name keys to use for lookups in configurations. 

617 

618 The names are returned in order of priority. 

619 

620 Returns 

621 ------- 

622 names : `tuple` of `LookupKey` 

623 Tuple of the `DatasetType` name and the `StorageClass` name. 

624 If the name includes a component the name with the component 

625 is first, then the name without the component and finally 

626 the storage class name and the storage class name of the 

627 composite. 

628 """ 

629 rootName, componentName = self.nameAndComponent() 

630 lookups: tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

631 if componentName is not None: 

632 lookups = lookups + (LookupKey(name=rootName),) 

633 

634 if self._dimensions: 

635 # Dimensions are a lower priority than dataset type name 

636 lookups = lookups + (LookupKey(dimensions=self._dimensions),) 

637 

638 storageClasses = self.storageClass._lookupNames() 

639 if componentName is not None and self.parentStorageClass is not None: 

640 storageClasses += self.parentStorageClass._lookupNames() 

641 

642 return lookups + storageClasses 

643 

644 def to_simple(self, minimal: bool = False) -> SerializedDatasetType: 

645 """Convert this class to a simple python type. 

646 

647 This makes it suitable for serialization. 

648 

649 Parameters 

650 ---------- 

651 minimal : `bool`, optional 

652 Use minimal serialization. Requires Registry to convert 

653 back to a full type. 

654 

655 Returns 

656 ------- 

657 simple : `SerializedDatasetType` 

658 The object converted to a class suitable for serialization. 

659 """ 

660 as_dict: dict[str, Any] 

661 if minimal: 

662 # Only needs the name. 

663 as_dict = {"name": self.name} 

664 else: 

665 # Convert to a dict form 

666 as_dict = { 

667 "name": self.name, 

668 "storageClass": self._storageClassName, 

669 "isCalibration": self._isCalibration, 

670 "dimensions": list(self._dimensions.names), 

671 } 

672 

673 if self._parentStorageClassName is not None: 

674 as_dict["parentStorageClass"] = self._parentStorageClassName 

675 return SerializedDatasetType(**as_dict) 

676 

677 @classmethod 

678 def from_simple( 

679 cls, 

680 simple: SerializedDatasetType, 

681 universe: DimensionUniverse | None = None, 

682 registry: Registry | None = None, 

683 ) -> DatasetType: 

684 """Construct a new object from the simplified form. 

685 

686 This is usually data returned from the `to_simple` method. 

687 

688 Parameters 

689 ---------- 

690 simple : `SerializedDatasetType` 

691 The value returned by `to_simple()`. 

692 universe : `DimensionUniverse` 

693 The special graph of all known dimensions of which this graph will 

694 be a subset. Can be `None` if a registry is provided. 

695 registry : `lsst.daf.butler.Registry`, optional 

696 Registry to use to convert simple name of a DatasetType to 

697 a full `DatasetType`. Can be `None` if a full description of 

698 the type is provided along with a universe. 

699 

700 Returns 

701 ------- 

702 datasetType : `DatasetType` 

703 Newly-constructed object. 

704 """ 

705 # check to see if there is a cache, and if there is, if there is a 

706 # cached dataset type 

707 cache = PersistenceContextVars.loadedTypes.get() 

708 key = (simple.name, simple.storageClass or "") 

709 if cache is not None and (type_ := cache.get(key, None)) is not None: 

710 return type_ 

711 

712 if simple.storageClass is None: 

713 # Treat this as minimalist representation 

714 if registry is None: 

715 raise ValueError( 

716 f"Unable to convert a DatasetType name '{simple}' to DatasetType without a Registry" 

717 ) 

718 return registry.getDatasetType(simple.name) 

719 

720 if universe is None and registry is None: 

721 raise ValueError("One of universe or registry must be provided.") 

722 

723 if universe is None and registry is not None: 

724 # registry should not be none by now but test helps mypy 

725 universe = registry.dimensions 

726 

727 if universe is None: 

728 # this is for mypy 

729 raise ValueError("Unable to determine a usable universe") 

730 

731 match simple.dimensions: 

732 case list(): 

733 dimensions = universe.conform(simple.dimensions) 

734 case SerializedDimensionGraph(): 

735 dimensions = universe.conform(simple.dimensions.names) 

736 case None: 

737 raise ValueError(f"Dimensions must be specified in {simple}") 

738 

739 newType = cls( 

740 name=simple.name, 

741 dimensions=dimensions, 

742 storageClass=simple.storageClass, 

743 isCalibration=simple.isCalibration, 

744 parentStorageClass=simple.parentStorageClass, 

745 universe=universe, 

746 ) 

747 if cache is not None: 

748 cache[key] = newType 

749 return newType 

750 

751 to_json = to_json_pydantic 

752 from_json: ClassVar = classmethod(from_json_pydantic) 

753 

754 def __reduce__( 

755 self, 

756 ) -> tuple[ 

757 Callable, tuple[type[DatasetType], tuple[str, DimensionGroup, str, str | None], dict[str, bool]] 

758 ]: 

759 """Support pickling. 

760 

761 StorageClass instances can not normally be pickled, so we pickle 

762 StorageClass name instead of instance. 

763 """ 

764 return _unpickle_via_factory, ( 

765 self.__class__, 

766 (self.name, self._dimensions, self._storageClassName, self._parentStorageClassName), 

767 {"isCalibration": self._isCalibration}, 

768 ) 

769 

770 def __deepcopy__(self, memo: Any) -> DatasetType: 

771 """Support for deep copy method. 

772 

773 Normally ``deepcopy`` will use pickle mechanism to make copies. 

774 We want to avoid that to support (possibly degenerate) use case when 

775 DatasetType is constructed with StorageClass instance which is not 

776 registered with StorageClassFactory (this happens in unit tests). 

777 Instead we re-implement ``__deepcopy__`` method. 

778 """ 

779 return DatasetType( 

780 name=deepcopy(self.name, memo), 

781 dimensions=deepcopy(self._dimensions, memo), 

782 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

783 parentStorageClass=deepcopy(self._parentStorageClass or self._parentStorageClassName, memo), 

784 isCalibration=deepcopy(self._isCalibration, memo), 

785 ) 

786 

787 

788def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

789 """Unpickle something by calling a factory. 

790 

791 Allows subclasses to unpickle using `__reduce__` with keyword 

792 arguments as well as positional arguments. 

793 """ 

794 return factory(*args, **kwargs)