Coverage for python/lsst/daf/butler/core/datasets/type.py: 22%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

192 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType", "SerializedDatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45from pydantic import BaseModel, StrictStr, StrictBool 

46 

47from ..storageClass import StorageClass, StorageClassFactory 

48from ..dimensions import DimensionGraph, SerializedDimensionGraph 

49from ..configSupport import LookupKey 

50from ..json import from_json_pydantic, to_json_pydantic 

51 

52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 from ..dimensions import Dimension, DimensionUniverse 

54 from ...registry import Registry 

55 

56 

57def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

58 if data is None: 

59 data = {} 

60 return MappingProxyType(data) 

61 

62 

63class SerializedDatasetType(BaseModel): 

64 """Simplified model of a `DatasetType` suitable for serialization.""" 

65 

66 name: StrictStr 

67 storageClass: Optional[StrictStr] = None 

68 dimensions: Optional[SerializedDimensionGraph] = None 

69 parentStorageClass: Optional[StrictStr] = None 

70 isCalibration: StrictBool = False 

71 

72 

73class DatasetType: 

74 r"""A named category of Datasets. 

75 

76 Defines how they are organized, related, and stored. 

77 

78 A concrete, final class whose instances represent `DatasetType`\ s. 

79 `DatasetType` instances may be constructed without a `Registry`, 

80 but they must be registered 

81 via `Registry.registerDatasetType()` before corresponding Datasets 

82 may be added. 

83 `DatasetType` instances are immutable. 

84 

85 Parameters 

86 ---------- 

87 name : `str` 

88 A string name for the Dataset; must correspond to the same 

89 `DatasetType` across all Registries. Names must start with an 

90 upper or lowercase letter, and may contain only letters, numbers, 

91 and underscores. Component dataset types should contain a single 

92 period separating the base dataset type name from the component name 

93 (and may be recursive). 

94 dimensions : `DimensionGraph` or iterable of `Dimension` 

95 Dimensions used to label and relate instances of this `DatasetType`. 

96 If not a `DimensionGraph`, ``universe`` must be provided as well. 

97 storageClass : `StorageClass` or `str` 

98 Instance of a `StorageClass` or name of `StorageClass` that defines 

99 how this `DatasetType` is persisted. 

100 parentStorageClass : `StorageClass` or `str`, optional 

101 Instance of a `StorageClass` or name of `StorageClass` that defines 

102 how the composite parent is persisted. Must be `None` if this 

103 is not a component. 

104 universe : `DimensionUniverse`, optional 

105 Set of all known dimensions, used to normalize ``dimensions`` if it 

106 is not already a `DimensionGraph`. 

107 isCalibration : `bool`, optional 

108 If `True`, this dataset type may be included in 

109 `~CollectionType.CALIBRATION` collections. 

110 

111 See Also 

112 -------- 

113 :ref:`daf_butler_organizing_datasets` 

114 """ 

115 

116 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

117 "_parentStorageClass", "_parentStorageClassName", 

118 "_isCalibration") 

119 

120 _serializedType = SerializedDatasetType 

121 

122 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

123 

124 @staticmethod 

125 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

126 """Form a valid DatasetTypeName from a parent and component. 

127 

128 No validation is performed. 

129 

130 Parameters 

131 ---------- 

132 datasetTypeName : `str` 

133 Base type name. 

134 componentName : `str` 

135 Name of component. 

136 

137 Returns 

138 ------- 

139 compTypeName : `str` 

140 Name to use for component DatasetType. 

141 """ 

142 return "{}.{}".format(datasetTypeName, componentName) 

143 

144 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

145 storageClass: Union[StorageClass, str], 

146 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

147 universe: Optional[DimensionUniverse] = None, 

148 isCalibration: bool = False): 

149 if self.VALID_NAME_REGEX.match(name) is None: 

150 raise ValueError(f"DatasetType name '{name}' is invalid.") 

151 self._name = name 

152 if not isinstance(dimensions, DimensionGraph): 

153 if universe is None: 

154 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

155 "a universe must be provided.") 

156 dimensions = universe.extract(dimensions) 

157 self._dimensions = dimensions 

158 if name in self._dimensions.universe.getGovernorDimensions().names: 

159 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

160 if not isinstance(storageClass, (StorageClass, str)): 

161 raise ValueError("StorageClass argument must be StorageClass or str. " 

162 f"Got {storageClass}") 

163 self._storageClass: Optional[StorageClass] 

164 if isinstance(storageClass, StorageClass): 

165 self._storageClass = storageClass 

166 self._storageClassName = storageClass.name 

167 else: 

168 self._storageClass = None 

169 self._storageClassName = storageClass 

170 

171 self._parentStorageClass: Optional[StorageClass] = None 

172 self._parentStorageClassName: Optional[str] = None 

173 if parentStorageClass is not None: 

174 if not isinstance(storageClass, (StorageClass, str)): 

175 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

176 f"Got {parentStorageClass}") 

177 

178 # Only allowed for a component dataset type 

179 _, componentName = self.splitDatasetTypeName(self._name) 

180 if componentName is None: 

181 raise ValueError("Can not specify a parent storage class if this is not a component" 

182 f" ({self._name})") 

183 if isinstance(parentStorageClass, StorageClass): 

184 self._parentStorageClass = parentStorageClass 

185 self._parentStorageClassName = parentStorageClass.name 

186 else: 

187 self._parentStorageClassName = parentStorageClass 

188 

189 # Ensure that parent storage class is specified when we have 

190 # a component and is not specified when we don't 

191 _, componentName = self.splitDatasetTypeName(self._name) 

192 if parentStorageClass is None and componentName is not None: 

193 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

194 " storage class") 

195 if parentStorageClass is not None and componentName is None: 

196 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

197 self._isCalibration = isCalibration 

198 

199 def __repr__(self) -> str: 

200 extra = "" 

201 if self._parentStorageClassName: 

202 extra = f", parentStorageClass={self._parentStorageClassName}" 

203 if self._isCalibration: 

204 extra += ", isCalibration=True" 

205 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

206 

207 def __eq__(self, other: Any) -> bool: 

208 if not isinstance(other, type(self)): 

209 return False 

210 if self._name != other._name: 

211 return False 

212 if self._dimensions != other._dimensions: 

213 return False 

214 if self._storageClass is not None and other._storageClass is not None: 

215 if self._storageClass != other._storageClass: 

216 return False 

217 else: 

218 if self._storageClassName != other._storageClassName: 

219 return False 

220 if self._isCalibration != other._isCalibration: 

221 return False 

222 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

223 return self._parentStorageClass == other._parentStorageClass 

224 else: 

225 return self._parentStorageClassName == other._parentStorageClassName 

226 

227 def __hash__(self) -> int: 

228 """Hash DatasetType instance. 

229 

230 This only uses StorageClass name which is it consistent with the 

231 implementation of StorageClass hash method. 

232 """ 

233 return hash((self._name, self._dimensions, self._storageClassName, 

234 self._parentStorageClassName)) 

235 

236 def __lt__(self, other: Any) -> bool: 

237 """Sort using the dataset type name.""" 

238 if not isinstance(other, type(self)): 

239 return NotImplemented 

240 return self.name < other.name 

241 

242 @property 

243 def name(self) -> str: 

244 """Return a string name for the Dataset. 

245 

246 Must correspond to the same `DatasetType` across all Registries. 

247 """ 

248 return self._name 

249 

250 @property 

251 def dimensions(self) -> DimensionGraph: 

252 r"""Return the `Dimension`\ s fir this dataset type. 

253 

254 The dimensions label and relate instances of this 

255 `DatasetType` (`DimensionGraph`). 

256 """ 

257 return self._dimensions 

258 

259 @property 

260 def storageClass(self) -> StorageClass: 

261 """Return `StorageClass` instance associated with this dataset type. 

262 

263 The `StorageClass` defines how this `DatasetType` 

264 is persisted. Note that if DatasetType was constructed with a name 

265 of a StorageClass then Butler has to be initialized before using 

266 this property. 

267 """ 

268 if self._storageClass is None: 

269 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

270 return self._storageClass 

271 

272 @property 

273 def parentStorageClass(self) -> Optional[StorageClass]: 

274 """Return the storage class of the composite containing this component. 

275 

276 Note that if DatasetType was constructed with a name of a 

277 StorageClass then Butler has to be initialized before using this 

278 property. Can be `None` if this is not a component of a composite. 

279 Must be defined if this is a component. 

280 """ 

281 if self._parentStorageClass is None and self._parentStorageClassName is None: 

282 return None 

283 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

284 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

285 return self._parentStorageClass 

286 

287 def isCalibration(self) -> bool: 

288 """Return if datasets of this type can be in calibration collections. 

289 

290 Returns 

291 ------- 

292 flag : `bool` 

293 `True` if datasets of this type may be included in calibration 

294 collections. 

295 """ 

296 return self._isCalibration 

297 

298 @staticmethod 

299 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

300 """Return the root name and the component from a composite name. 

301 

302 Parameters 

303 ---------- 

304 datasetTypeName : `str` 

305 The name of the dataset type, can include a component using 

306 a "."-separator. 

307 

308 Returns 

309 ------- 

310 rootName : `str` 

311 Root name without any components. 

312 componentName : `str` 

313 The component if it has been specified, else `None`. 

314 

315 Notes 

316 ----- 

317 If the dataset type name is ``a.b.c`` this method will return a 

318 root name of ``a`` and a component name of ``b.c``. 

319 """ 

320 comp = None 

321 root = datasetTypeName 

322 if "." in root: 

323 # If there is doubt, the component is after the first "." 

324 root, comp = root.split(".", maxsplit=1) 

325 return root, comp 

326 

327 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

328 """Return the root name of this dataset type and any component. 

329 

330 Returns 

331 ------- 

332 rootName : `str` 

333 Root name for this `DatasetType` without any components. 

334 componentName : `str` 

335 The component if it has been specified, else `None`. 

336 """ 

337 return self.splitDatasetTypeName(self.name) 

338 

339 def component(self) -> Optional[str]: 

340 """Return the component name (if defined). 

341 

342 Returns 

343 ------- 

344 comp : `str` 

345 Name of component part of DatasetType name. `None` if this 

346 `DatasetType` is not associated with a component. 

347 """ 

348 _, comp = self.nameAndComponent() 

349 return comp 

350 

351 def componentTypeName(self, component: str) -> str: 

352 """Derive a component dataset type from a composite. 

353 

354 Parameters 

355 ---------- 

356 component : `str` 

357 Name of component 

358 

359 Returns 

360 ------- 

361 derived : `str` 

362 Compound name of this `DatasetType` and the component. 

363 

364 Raises 

365 ------ 

366 KeyError 

367 Requested component is not supported by this `DatasetType`. 

368 """ 

369 if component in self.storageClass.allComponents(): 

370 return self.nameWithComponent(self.name, component) 

371 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

372 

373 def makeCompositeDatasetType(self) -> DatasetType: 

374 """Return a composite dataset type from the component. 

375 

376 Returns 

377 ------- 

378 composite : `DatasetType` 

379 The composite dataset type. 

380 

381 Raises 

382 ------ 

383 RuntimeError 

384 Raised if this dataset type is not a component dataset type. 

385 """ 

386 if not self.isComponent(): 

387 raise RuntimeError(f"DatasetType {self.name} must be a component to form the composite") 

388 composite_name, _ = self.nameAndComponent() 

389 if self.parentStorageClass is None: 

390 raise ValueError("Parent storage class is not set. " 

391 f"Unable to create composite type from {self.name}") 

392 return DatasetType(composite_name, dimensions=self.dimensions, 

393 storageClass=self.parentStorageClass) 

394 

395 def makeComponentDatasetType(self, component: str) -> DatasetType: 

396 """Return a component dataset type from a composite. 

397 

398 Assumes the same dimensions as the parent. 

399 

400 Parameters 

401 ---------- 

402 component : `str` 

403 Name of component 

404 

405 Returns 

406 ------- 

407 datasetType : `DatasetType` 

408 A new DatasetType instance. 

409 """ 

410 # The component could be a read/write or read component 

411 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

412 storageClass=self.storageClass.allComponents()[component], 

413 parentStorageClass=self.storageClass) 

414 

415 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

416 """Return all component dataset types for this composite. 

417 

418 Returns 

419 ------- 

420 all : `list` of `DatasetType` 

421 All the component dataset types. If this is not a composite 

422 then returns an empty list. 

423 """ 

424 return [self.makeComponentDatasetType(componentName) 

425 for componentName in self.storageClass.allComponents()] 

426 

427 def isComponent(self) -> bool: 

428 """Return whether this `DatasetType` refers to a component. 

429 

430 Returns 

431 ------- 

432 isComponent : `bool` 

433 `True` if this `DatasetType` is a component, `False` otherwise. 

434 """ 

435 if self.component(): 

436 return True 

437 return False 

438 

439 def isComposite(self) -> bool: 

440 """Return whether this `DatasetType` is a composite. 

441 

442 Returns 

443 ------- 

444 isComposite : `bool` 

445 `True` if this `DatasetType` is a composite type, `False` 

446 otherwise. 

447 """ 

448 return self.storageClass.isComposite() 

449 

450 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

451 """Return name keys to use for lookups in configurations. 

452 

453 The names are returned in order of priority. 

454 

455 Returns 

456 ------- 

457 names : `tuple` of `LookupKey` 

458 Tuple of the `DatasetType` name and the `StorageClass` name. 

459 If the name includes a component the name with the component 

460 is first, then the name without the component and finally 

461 the storage class name and the storage class name of the 

462 composite. 

463 """ 

464 rootName, componentName = self.nameAndComponent() 

465 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

466 if componentName is not None: 

467 lookups = lookups + (LookupKey(name=rootName),) 

468 

469 if self.dimensions: 

470 # Dimensions are a lower priority than dataset type name 

471 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

472 

473 storageClasses = self.storageClass._lookupNames() 

474 if componentName is not None and self.parentStorageClass is not None: 

475 storageClasses += self.parentStorageClass._lookupNames() 

476 

477 return lookups + storageClasses 

478 

479 def to_simple(self, minimal: bool = False) -> SerializedDatasetType: 

480 """Convert this class to a simple python type. 

481 

482 This makes it suitable for serialization. 

483 

484 Parameters 

485 ---------- 

486 minimal : `bool`, optional 

487 Use minimal serialization. Requires Registry to convert 

488 back to a full type. 

489 

490 Returns 

491 ------- 

492 simple : `SerializedDatasetType` 

493 The object converted to a class suitable for serialization. 

494 """ 

495 as_dict: Dict[str, Any] 

496 if minimal: 

497 # Only needs the name. 

498 as_dict = {"name": self.name} 

499 else: 

500 # Convert to a dict form 

501 as_dict = {"name": self.name, 

502 "storageClass": self._storageClassName, 

503 "isCalibration": self._isCalibration, 

504 "dimensions": self.dimensions.to_simple(), 

505 } 

506 

507 if self._parentStorageClassName is not None: 

508 as_dict["parentStorageClass"] = self._parentStorageClassName 

509 return SerializedDatasetType(**as_dict) 

510 

511 @classmethod 

512 def from_simple(cls, simple: SerializedDatasetType, 

513 universe: Optional[DimensionUniverse] = None, 

514 registry: Optional[Registry] = None) -> DatasetType: 

515 """Construct a new object from the simplified form. 

516 

517 This is usually data returned from the `to_simple` method. 

518 

519 Parameters 

520 ---------- 

521 simple : `SerializedDatasetType` 

522 The value returned by `to_simple()`. 

523 universe : `DimensionUniverse` 

524 The special graph of all known dimensions of which this graph will 

525 be a subset. Can be `None` if a registry is provided. 

526 registry : `lsst.daf.butler.Registry`, optional 

527 Registry to use to convert simple name of a DatasetType to 

528 a full `DatasetType`. Can be `None` if a full description of 

529 the type is provided along with a universe. 

530 

531 Returns 

532 ------- 

533 datasetType : `DatasetType` 

534 Newly-constructed object. 

535 """ 

536 if simple.storageClass is None: 

537 # Treat this as minimalist representation 

538 if registry is None: 

539 raise ValueError(f"Unable to convert a DatasetType name '{simple}' to DatasetType" 

540 " without a Registry") 

541 return registry.getDatasetType(simple.name) 

542 

543 if universe is None and registry is None: 

544 raise ValueError("One of universe or registry must be provided.") 

545 

546 if universe is None and registry is not None: 

547 # registry should not be none by now but test helps mypy 

548 universe = registry.dimensions 

549 

550 if universe is None: 

551 # this is for mypy 

552 raise ValueError("Unable to determine a usable universe") 

553 

554 if simple.dimensions is None: 

555 # mypy hint 

556 raise ValueError(f"Dimensions must be specified in {simple}") 

557 

558 return cls(name=simple.name, 

559 dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe), 

560 storageClass=simple.storageClass, 

561 isCalibration=simple.isCalibration, 

562 parentStorageClass=simple.parentStorageClass, 

563 universe=universe) 

564 

565 to_json = to_json_pydantic 

566 from_json = classmethod(from_json_pydantic) 

567 

568 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

569 Tuple[str, DimensionGraph, str, Optional[str]], 

570 Dict[str, bool]]]: 

571 """Support pickling. 

572 

573 StorageClass instances can not normally be pickled, so we pickle 

574 StorageClass name instead of instance. 

575 """ 

576 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

577 self._parentStorageClassName), 

578 {"isCalibration": self._isCalibration}) 

579 

580 def __deepcopy__(self, memo: Any) -> DatasetType: 

581 """Support for deep copy method. 

582 

583 Normally ``deepcopy`` will use pickle mechanism to make copies. 

584 We want to avoid that to support (possibly degenerate) use case when 

585 DatasetType is constructed with StorageClass instance which is not 

586 registered with StorageClassFactory (this happens in unit tests). 

587 Instead we re-implement ``__deepcopy__`` method. 

588 """ 

589 return DatasetType(name=deepcopy(self.name, memo), 

590 dimensions=deepcopy(self.dimensions, memo), 

591 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

592 parentStorageClass=deepcopy(self._parentStorageClass 

593 or self._parentStorageClassName, memo), 

594 isCalibration=deepcopy(self._isCalibration, memo)) 

595 

596 

597def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

598 """Unpickle something by calling a factory. 

599 

600 Allows subclasses to unpickle using `__reduce__` with keyword 

601 arguments as well as positional arguments. 

602 """ 

603 return factory(*args, **kwargs)