Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45 

46from ..storageClass import StorageClass, StorageClassFactory 

47from ..dimensions import DimensionGraph 

48from ..configSupport import LookupKey 

49 

50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from ..dimensions import Dimension, DimensionUniverse 

52 

53 

54def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

55 if data is None: 

56 data = {} 

57 return MappingProxyType(data) 

58 

59 

60class DatasetType: 

61 r"""A named category of Datasets that defines how they are organized, 

62 related, and stored. 

63 

64 A concrete, final class whose instances represent `DatasetType`\ s. 

65 `DatasetType` instances may be constructed without a `Registry`, 

66 but they must be registered 

67 via `Registry.registerDatasetType()` before corresponding Datasets 

68 may be added. 

69 `DatasetType` instances are immutable. 

70 

71 Parameters 

72 ---------- 

73 name : `str` 

74 A string name for the Dataset; must correspond to the same 

75 `DatasetType` across all Registries. Names must start with an 

76 upper or lowercase letter, and may contain only letters, numbers, 

77 and underscores. Component dataset types should contain a single 

78 period separating the base dataset type name from the component name 

79 (and may be recursive). 

80 dimensions : `DimensionGraph` or iterable of `Dimension` 

81 Dimensions used to label and relate instances of this `DatasetType`. 

82 If not a `DimensionGraph`, ``universe`` must be provided as well. 

83 storageClass : `StorageClass` or `str` 

84 Instance of a `StorageClass` or name of `StorageClass` that defines 

85 how this `DatasetType` is persisted. 

86 parentStorageClass : `StorageClass` or `str`, optional 

87 Instance of a `StorageClass` or name of `StorageClass` that defines 

88 how the composite parent is persisted. Must be `None` if this 

89 is not a component. Mandatory if it is a component but can be the 

90 special temporary placeholder 

91 (`DatasetType.PlaceholderParentStorageClass`) to allow 

92 construction with an intent to finalize later. 

93 universe : `DimensionUniverse`, optional 

94 Set of all known dimensions, used to normalize ``dimensions`` if it 

95 is not already a `DimensionGraph`. 

96 isCalibration : `bool`, optional 

97 If `True`, this dataset type may be included in 

98 `~CollectionType.CALIBRATION` collections. 

99 """ 

100 

101 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

102 "_parentStorageClass", "_parentStorageClassName", 

103 "_isCalibration") 

104 

105 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

106 

107 PlaceholderParentStorageClass = StorageClass("PlaceHolder") 

108 """Placeholder StorageClass that can be used temporarily for a 

109 component. 

110 

111 This can be useful in pipeline construction where we are creating 

112 dataset types without a registry. 

113 """ 

114 

115 @staticmethod 

116 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

117 """Form a valid DatasetTypeName from a parent and component. 

118 

119 No validation is performed. 

120 

121 Parameters 

122 ---------- 

123 datasetTypeName : `str` 

124 Base type name. 

125 componentName : `str` 

126 Name of component. 

127 

128 Returns 

129 ------- 

130 compTypeName : `str` 

131 Name to use for component DatasetType. 

132 """ 

133 return "{}.{}".format(datasetTypeName, componentName) 

134 

135 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

136 storageClass: Union[StorageClass, str], 

137 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

138 universe: Optional[DimensionUniverse] = None, 

139 isCalibration: bool = False): 

140 if self.VALID_NAME_REGEX.match(name) is None: 

141 raise ValueError(f"DatasetType name '{name}' is invalid.") 

142 self._name = name 

143 if not isinstance(dimensions, DimensionGraph): 

144 if universe is None: 

145 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

146 "a universe must be provided.") 

147 dimensions = universe.extract(dimensions) 

148 self._dimensions = dimensions 

149 if name in self._dimensions.universe.getGovernorDimensions().names: 

150 raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.") 

151 if not isinstance(storageClass, (StorageClass, str)): 

152 raise ValueError("StorageClass argument must be StorageClass or str. " 

153 f"Got {storageClass}") 

154 self._storageClass: Optional[StorageClass] 

155 if isinstance(storageClass, StorageClass): 

156 self._storageClass = storageClass 

157 self._storageClassName = storageClass.name 

158 else: 

159 self._storageClass = None 

160 self._storageClassName = storageClass 

161 

162 self._parentStorageClass: Optional[StorageClass] = None 

163 self._parentStorageClassName: Optional[str] = None 

164 if parentStorageClass is not None: 

165 if not isinstance(storageClass, (StorageClass, str)): 

166 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

167 f"Got {parentStorageClass}") 

168 

169 # Only allowed for a component dataset type 

170 _, componentName = self.splitDatasetTypeName(self._name) 

171 if componentName is None: 

172 raise ValueError("Can not specify a parent storage class if this is not a component" 

173 f" ({self._name})") 

174 if isinstance(parentStorageClass, StorageClass): 

175 self._parentStorageClass = parentStorageClass 

176 self._parentStorageClassName = parentStorageClass.name 

177 else: 

178 self._parentStorageClassName = parentStorageClass 

179 

180 # Ensure that parent storage class is specified when we have 

181 # a component and is not specified when we don't 

182 _, componentName = self.splitDatasetTypeName(self._name) 

183 if parentStorageClass is None and componentName is not None: 

184 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

185 " storage class") 

186 if parentStorageClass is not None and componentName is None: 

187 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

188 self._isCalibration = isCalibration 

189 

190 def __repr__(self) -> str: 

191 extra = "" 

192 if self._parentStorageClassName: 

193 extra = f", parentStorageClass={self._parentStorageClassName}" 

194 if self._isCalibration: 

195 extra += ", isCalibration=True" 

196 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

197 

198 def __eq__(self, other: Any) -> bool: 

199 if not isinstance(other, type(self)): 

200 return False 

201 if self._name != other._name: 

202 return False 

203 if self._dimensions != other._dimensions: 

204 return False 

205 if self._storageClass is not None and other._storageClass is not None: 

206 if self._storageClass != other._storageClass: 

207 return False 

208 else: 

209 if self._storageClassName != other._storageClassName: 

210 return False 

211 if self._isCalibration != other._isCalibration: 

212 return False 

213 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

214 return self._parentStorageClass == other._parentStorageClass 

215 else: 

216 return self._parentStorageClassName == other._parentStorageClassName 

217 

218 def __hash__(self) -> int: 

219 """Hash DatasetType instance. 

220 

221 This only uses StorageClass name which is it consistent with the 

222 implementation of StorageClass hash method. 

223 """ 

224 return hash((self._name, self._dimensions, self._storageClassName, 

225 self._parentStorageClassName)) 

226 

227 def __lt__(self, other: Any) -> bool: 

228 """Sort using the dataset type name. 

229 """ 

230 if not isinstance(other, type(self)): 

231 return NotImplemented 

232 return self.name < other.name 

233 

234 @property 

235 def name(self) -> str: 

236 """A string name for the Dataset; must correspond to the same 

237 `DatasetType` across all Registries. 

238 """ 

239 return self._name 

240 

241 @property 

242 def dimensions(self) -> DimensionGraph: 

243 r"""The `Dimension`\ s that label and relate instances of this 

244 `DatasetType` (`DimensionGraph`). 

245 """ 

246 return self._dimensions 

247 

248 @property 

249 def storageClass(self) -> StorageClass: 

250 """`StorageClass` instance that defines how this `DatasetType` 

251 is persisted. Note that if DatasetType was constructed with a name 

252 of a StorageClass then Butler has to be initialized before using 

253 this property. 

254 """ 

255 if self._storageClass is None: 

256 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

257 return self._storageClass 

258 

259 @property 

260 def parentStorageClass(self) -> Optional[StorageClass]: 

261 """`StorageClass` instance that defines how the composite associated 

262 with this `DatasetType` is persisted. 

263 

264 Note that if DatasetType was constructed with a name of a 

265 StorageClass then Butler has to be initialized before using this 

266 property. Can be `None` if this is not a component of a composite. 

267 Must be defined if this is a component. 

268 """ 

269 if self._parentStorageClass is None and self._parentStorageClassName is None: 

270 return None 

271 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

272 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

273 return self._parentStorageClass 

274 

275 def isCalibration(self) -> bool: 

276 """Return whether datasets of this type may be included in calibration 

277 collections. 

278 

279 Returns 

280 ------- 

281 flag : `bool` 

282 `True` if datasets of this type may be included in calibration 

283 collections. 

284 """ 

285 return self._isCalibration 

286 

287 def finalizeParentStorageClass(self, newParent: StorageClass) -> None: 

288 """Replace the current placeholder parent storage class with 

289 the real parent. 

290 

291 Parameters 

292 ---------- 

293 newParent : `StorageClass` 

294 The new parent to be associated with this composite dataset 

295 type. This replaces the temporary placeholder parent that 

296 was specified during construction. 

297 

298 Raises 

299 ------ 

300 ValueError 

301 Raised if this dataset type is not a component of a composite. 

302 Raised if a StorageClass is not given. 

303 Raised if the parent currently associated with the dataset 

304 type is not a placeholder. 

305 """ 

306 if not self.isComponent(): 

307 raise ValueError("Can not set a parent storage class if this is not a component" 

308 f" ({self.name})") 

309 if self._parentStorageClass != self.PlaceholderParentStorageClass: 

310 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and" 

311 " is not a placeholder.") 

312 if not isinstance(newParent, StorageClass): 

313 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}") 

314 self._parentStorageClass = newParent 

315 self._parentStorageClassName = newParent.name 

316 

317 @staticmethod 

318 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

319 """Given a dataset type name, return the root name and the component 

320 name. 

321 

322 Parameters 

323 ---------- 

324 datasetTypeName : `str` 

325 The name of the dataset type, can include a component using 

326 a "."-separator. 

327 

328 Returns 

329 ------- 

330 rootName : `str` 

331 Root name without any components. 

332 componentName : `str` 

333 The component if it has been specified, else `None`. 

334 

335 Notes 

336 ----- 

337 If the dataset type name is ``a.b.c`` this method will return a 

338 root name of ``a`` and a component name of ``b.c``. 

339 """ 

340 comp = None 

341 root = datasetTypeName 

342 if "." in root: 

343 # If there is doubt, the component is after the first "." 

344 root, comp = root.split(".", maxsplit=1) 

345 return root, comp 

346 

347 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

348 """Return the root name of this dataset type and the component 

349 name (if defined). 

350 

351 Returns 

352 ------- 

353 rootName : `str` 

354 Root name for this `DatasetType` without any components. 

355 componentName : `str` 

356 The component if it has been specified, else `None`. 

357 """ 

358 return self.splitDatasetTypeName(self.name) 

359 

360 def component(self) -> Optional[str]: 

361 """Component name (if defined) 

362 

363 Returns 

364 ------- 

365 comp : `str` 

366 Name of component part of DatasetType name. `None` if this 

367 `DatasetType` is not associated with a component. 

368 """ 

369 _, comp = self.nameAndComponent() 

370 return comp 

371 

372 def componentTypeName(self, component: str) -> str: 

373 """Given a component name, derive the datasetTypeName of that component 

374 

375 Parameters 

376 ---------- 

377 component : `str` 

378 Name of component 

379 

380 Returns 

381 ------- 

382 derived : `str` 

383 Compound name of this `DatasetType` and the component. 

384 

385 Raises 

386 ------ 

387 KeyError 

388 Requested component is not supported by this `DatasetType`. 

389 """ 

390 if component in self.storageClass.allComponents(): 

391 return self.nameWithComponent(self.name, component) 

392 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

393 

394 def makeComponentDatasetType(self, component: str) -> DatasetType: 

395 """Return a DatasetType suitable for the given component, assuming the 

396 same dimensions as the parent. 

397 

398 Parameters 

399 ---------- 

400 component : `str` 

401 Name of component 

402 

403 Returns 

404 ------- 

405 datasetType : `DatasetType` 

406 A new DatasetType instance. 

407 """ 

408 # The component could be a read/write or read component 

409 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

410 storageClass=self.storageClass.allComponents()[component], 

411 parentStorageClass=self.storageClass) 

412 

413 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

414 """Return all the component dataset types assocaited with this 

415 dataset type. 

416 

417 Returns 

418 ------- 

419 all : `list` of `DatasetType` 

420 All the component dataset types. If this is not a composite 

421 then returns an empty list. 

422 """ 

423 return [self.makeComponentDatasetType(componentName) 

424 for componentName in self.storageClass.allComponents()] 

425 

426 def isComponent(self) -> bool: 

427 """Boolean indicating whether this `DatasetType` refers to a 

428 component of a composite. 

429 

430 Returns 

431 ------- 

432 isComponent : `bool` 

433 `True` if this `DatasetType` is a component, `False` otherwise. 

434 """ 

435 if self.component(): 

436 return True 

437 return False 

438 

439 def isComposite(self) -> bool: 

440 """Boolean indicating whether this `DatasetType` is a composite type. 

441 

442 Returns 

443 ------- 

444 isComposite : `bool` 

445 `True` if this `DatasetType` is a composite type, `False` 

446 otherwise. 

447 """ 

448 return self.storageClass.isComposite() 

449 

450 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

451 """Name keys to use when looking up this datasetType in a 

452 configuration. 

453 

454 The names are returned in order of priority. 

455 

456 Returns 

457 ------- 

458 names : `tuple` of `LookupKey` 

459 Tuple of the `DatasetType` name and the `StorageClass` name. 

460 If the name includes a component the name with the component 

461 is first, then the name without the component and finally 

462 the storage class name. 

463 """ 

464 rootName, componentName = self.nameAndComponent() 

465 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

466 if componentName is not None: 

467 lookups = lookups + (LookupKey(name=rootName),) 

468 

469 if self.dimensions: 

470 # Dimensions are a lower priority than dataset type name 

471 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

472 

473 return lookups + self.storageClass._lookupNames() 

474 

475 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

476 Tuple[str, DimensionGraph, str, Optional[str]], 

477 Dict[str, bool]]]: 

478 """Support pickling. 

479 

480 StorageClass instances can not normally be pickled, so we pickle 

481 StorageClass name instead of instance. 

482 """ 

483 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

484 self._parentStorageClassName), 

485 {"isCalibration": self._isCalibration}) 

486 

487 def __deepcopy__(self, memo: Any) -> DatasetType: 

488 """Support for deep copy method. 

489 

490 Normally ``deepcopy`` will use pickle mechanism to make copies. 

491 We want to avoid that to support (possibly degenerate) use case when 

492 DatasetType is constructed with StorageClass instance which is not 

493 registered with StorageClassFactory (this happens in unit tests). 

494 Instead we re-implement ``__deepcopy__`` method. 

495 """ 

496 return DatasetType(name=deepcopy(self.name, memo), 

497 dimensions=deepcopy(self.dimensions, memo), 

498 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

499 parentStorageClass=deepcopy(self._parentStorageClass 

500 or self._parentStorageClassName, memo), 

501 isCalibration=deepcopy(self._isCalibration, memo)) 

502 

503 

504def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

505 """Unpickle something by calling a factory 

506 

507 Allows subclasses to unpickle using `__reduce__` with keyword 

508 arguments as well as positional arguments. 

509 """ 

510 return factory(*args, **kwargs)