Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 Dict, 

36 Iterable, 

37 List, 

38 Mapping, 

39 Optional, 

40 Tuple, 

41 Type, 

42 Union, 

43) 

44 

45 

46from ..storageClass import StorageClass, StorageClassFactory 

47from ..dimensions import DimensionGraph 

48from ..configSupport import LookupKey 

49 

50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from ..dimensions import Dimension, DimensionUniverse 

52 

53 

54def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

55 if data is None: 

56 data = {} 

57 return MappingProxyType(data) 

58 

59 

60class DatasetType: 

61 r"""A named category of Datasets that defines how they are organized, 

62 related, and stored. 

63 

64 A concrete, final class whose instances represent `DatasetType`\ s. 

65 `DatasetType` instances may be constructed without a `Registry`, 

66 but they must be registered 

67 via `Registry.registerDatasetType()` before corresponding Datasets 

68 may be added. 

69 `DatasetType` instances are immutable. 

70 

71 Parameters 

72 ---------- 

73 name : `str` 

74 A string name for the Dataset; must correspond to the same 

75 `DatasetType` across all Registries. Names must start with an 

76 upper or lowercase letter, and may contain only letters, numbers, 

77 and underscores. Component dataset types should contain a single 

78 period separating the base dataset type name from the component name 

79 (and may be recursive). 

80 dimensions : `DimensionGraph` or iterable of `Dimension` 

81 Dimensions used to label and relate instances of this `DatasetType`. 

82 If not a `DimensionGraph`, ``universe`` must be provided as well. 

83 storageClass : `StorageClass` or `str` 

84 Instance of a `StorageClass` or name of `StorageClass` that defines 

85 how this `DatasetType` is persisted. 

86 parentStorageClass : `StorageClass` or `str`, optional 

87 Instance of a `StorageClass` or name of `StorageClass` that defines 

88 how the composite parent is persisted. Must be `None` if this 

89 is not a component. Mandatory if it is a component but can be the 

90 special temporary placeholder 

91 (`DatasetType.PlaceholderParentStorageClass`) to allow 

92 construction with an intent to finalize later. 

93 universe : `DimensionUniverse`, optional 

94 Set of all known dimensions, used to normalize ``dimensions`` if it 

95 is not already a `DimensionGraph`. 

96 isCalibration : `bool`, optional 

97 If `True`, this dataset type may be included in 

98 `~CollectionType.CALIBRATION` collections. 

99 """ 

100 

101 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

102 "_parentStorageClass", "_parentStorageClassName", 

103 "_isCalibration") 

104 

105 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

106 

107 PlaceholderParentStorageClass = StorageClass("PlaceHolder") 

108 """Placeholder StorageClass that can be used temporarily for a 

109 component. 

110 

111 This can be useful in pipeline construction where we are creating 

112 dataset types without a registry. 

113 """ 

114 

115 @staticmethod 

116 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

117 """Form a valid DatasetTypeName from a parent and component. 

118 

119 No validation is performed. 

120 

121 Parameters 

122 ---------- 

123 datasetTypeName : `str` 

124 Base type name. 

125 componentName : `str` 

126 Name of component. 

127 

128 Returns 

129 ------- 

130 compTypeName : `str` 

131 Name to use for component DatasetType. 

132 """ 

133 return "{}.{}".format(datasetTypeName, componentName) 

134 

135 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

136 storageClass: Union[StorageClass, str], 

137 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

138 universe: Optional[DimensionUniverse] = None, 

139 isCalibration: bool = False): 

140 if self.VALID_NAME_REGEX.match(name) is None: 

141 raise ValueError(f"DatasetType name '{name}' is invalid.") 

142 self._name = name 

143 if not isinstance(dimensions, DimensionGraph): 

144 if universe is None: 

145 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

146 "a universe must be provided.") 

147 dimensions = universe.extract(dimensions) 

148 self._dimensions = dimensions 

149 if not isinstance(storageClass, (StorageClass, str)): 

150 raise ValueError("StorageClass argument must be StorageClass or str. " 

151 f"Got {storageClass}") 

152 self._storageClass: Optional[StorageClass] 

153 if isinstance(storageClass, StorageClass): 

154 self._storageClass = storageClass 

155 self._storageClassName = storageClass.name 

156 else: 

157 self._storageClass = None 

158 self._storageClassName = storageClass 

159 

160 self._parentStorageClass: Optional[StorageClass] = None 

161 self._parentStorageClassName: Optional[str] = None 

162 if parentStorageClass is not None: 

163 if not isinstance(storageClass, (StorageClass, str)): 

164 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

165 f"Got {parentStorageClass}") 

166 

167 # Only allowed for a component dataset type 

168 _, componentName = self.splitDatasetTypeName(self._name) 

169 if componentName is None: 

170 raise ValueError("Can not specify a parent storage class if this is not a component" 

171 f" ({self._name})") 

172 if isinstance(parentStorageClass, StorageClass): 

173 self._parentStorageClass = parentStorageClass 

174 self._parentStorageClassName = parentStorageClass.name 

175 else: 

176 self._parentStorageClassName = parentStorageClass 

177 

178 # Ensure that parent storage class is specified when we have 

179 # a component and is not specified when we don't 

180 _, componentName = self.splitDatasetTypeName(self._name) 

181 if parentStorageClass is None and componentName is not None: 

182 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

183 " storage class") 

184 if parentStorageClass is not None and componentName is None: 

185 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

186 self._isCalibration = isCalibration 

187 

188 def __repr__(self) -> str: 

189 extra = "" 

190 if self._parentStorageClassName: 

191 extra = f", parentStorageClass={self._parentStorageClassName}" 

192 if self._isCalibration: 

193 extra += ", isCalibration=True" 

194 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

195 

196 def __eq__(self, other: Any) -> bool: 

197 if not isinstance(other, type(self)): 

198 return False 

199 if self._name != other._name: 

200 return False 

201 if self._dimensions != other._dimensions: 

202 return False 

203 if self._storageClass is not None and other._storageClass is not None: 

204 if self._storageClass != other._storageClass: 

205 return False 

206 else: 

207 if self._storageClassName != other._storageClassName: 

208 return False 

209 if self._isCalibration != other._isCalibration: 

210 return False 

211 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

212 return self._parentStorageClass == other._parentStorageClass 

213 else: 

214 return self._parentStorageClassName == other._parentStorageClassName 

215 

216 def __hash__(self) -> int: 

217 """Hash DatasetType instance. 

218 

219 This only uses StorageClass name which is it consistent with the 

220 implementation of StorageClass hash method. 

221 """ 

222 return hash((self._name, self._dimensions, self._storageClassName, 

223 self._parentStorageClassName)) 

224 

225 def __lt__(self, other: Any) -> bool: 

226 """Sort using the dataset type name. 

227 """ 

228 if not isinstance(other, type(self)): 

229 return NotImplemented 

230 return self.name < other.name 

231 

232 @property 

233 def name(self) -> str: 

234 """A string name for the Dataset; must correspond to the same 

235 `DatasetType` across all Registries. 

236 """ 

237 return self._name 

238 

239 @property 

240 def dimensions(self) -> DimensionGraph: 

241 r"""The `Dimension`\ s that label and relate instances of this 

242 `DatasetType` (`DimensionGraph`). 

243 """ 

244 return self._dimensions 

245 

246 @property 

247 def storageClass(self) -> StorageClass: 

248 """`StorageClass` instance that defines how this `DatasetType` 

249 is persisted. Note that if DatasetType was constructed with a name 

250 of a StorageClass then Butler has to be initialized before using 

251 this property. 

252 """ 

253 if self._storageClass is None: 

254 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

255 return self._storageClass 

256 

257 @property 

258 def parentStorageClass(self) -> Optional[StorageClass]: 

259 """`StorageClass` instance that defines how the composite associated 

260 with this `DatasetType` is persisted. 

261 

262 Note that if DatasetType was constructed with a name of a 

263 StorageClass then Butler has to be initialized before using this 

264 property. Can be `None` if this is not a component of a composite. 

265 Must be defined if this is a component. 

266 """ 

267 if self._parentStorageClass is None and self._parentStorageClassName is None: 

268 return None 

269 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

270 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

271 return self._parentStorageClass 

272 

273 def isCalibration(self) -> bool: 

274 """Return whether datasets of this type may be included in calibration 

275 collections. 

276 

277 Returns 

278 ------- 

279 flag : `bool` 

280 `True` if datasets of this type may be included in calibration 

281 collections. 

282 """ 

283 return self._isCalibration 

284 

285 def finalizeParentStorageClass(self, newParent: StorageClass) -> None: 

286 """Replace the current placeholder parent storage class with 

287 the real parent. 

288 

289 Parameters 

290 ---------- 

291 newParent : `StorageClass` 

292 The new parent to be associated with this composite dataset 

293 type. This replaces the temporary placeholder parent that 

294 was specified during construction. 

295 

296 Raises 

297 ------ 

298 ValueError 

299 Raised if this dataset type is not a component of a composite. 

300 Raised if a StorageClass is not given. 

301 Raised if the parent currently associated with the dataset 

302 type is not a placeholder. 

303 """ 

304 if not self.isComponent(): 

305 raise ValueError("Can not set a parent storage class if this is not a component" 

306 f" ({self.name})") 

307 if self._parentStorageClass != self.PlaceholderParentStorageClass: 

308 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and" 

309 " is not a placeholder.") 

310 if not isinstance(newParent, StorageClass): 

311 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}") 

312 self._parentStorageClass = newParent 

313 self._parentStorageClassName = newParent.name 

314 

315 @staticmethod 

316 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

317 """Given a dataset type name, return the root name and the component 

318 name. 

319 

320 Parameters 

321 ---------- 

322 datasetTypeName : `str` 

323 The name of the dataset type, can include a component using 

324 a "."-separator. 

325 

326 Returns 

327 ------- 

328 rootName : `str` 

329 Root name without any components. 

330 componentName : `str` 

331 The component if it has been specified, else `None`. 

332 

333 Notes 

334 ----- 

335 If the dataset type name is ``a.b.c`` this method will return a 

336 root name of ``a`` and a component name of ``b.c``. 

337 """ 

338 comp = None 

339 root = datasetTypeName 

340 if "." in root: 

341 # If there is doubt, the component is after the first "." 

342 root, comp = root.split(".", maxsplit=1) 

343 return root, comp 

344 

345 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

346 """Return the root name of this dataset type and the component 

347 name (if defined). 

348 

349 Returns 

350 ------- 

351 rootName : `str` 

352 Root name for this `DatasetType` without any components. 

353 componentName : `str` 

354 The component if it has been specified, else `None`. 

355 """ 

356 return self.splitDatasetTypeName(self.name) 

357 

358 def component(self) -> Optional[str]: 

359 """Component name (if defined) 

360 

361 Returns 

362 ------- 

363 comp : `str` 

364 Name of component part of DatasetType name. `None` if this 

365 `DatasetType` is not associated with a component. 

366 """ 

367 _, comp = self.nameAndComponent() 

368 return comp 

369 

370 def componentTypeName(self, component: str) -> str: 

371 """Given a component name, derive the datasetTypeName of that component 

372 

373 Parameters 

374 ---------- 

375 component : `str` 

376 Name of component 

377 

378 Returns 

379 ------- 

380 derived : `str` 

381 Compound name of this `DatasetType` and the component. 

382 

383 Raises 

384 ------ 

385 KeyError 

386 Requested component is not supported by this `DatasetType`. 

387 """ 

388 if component in self.storageClass.allComponents(): 

389 return self.nameWithComponent(self.name, component) 

390 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

391 

392 def makeComponentDatasetType(self, component: str) -> DatasetType: 

393 """Return a DatasetType suitable for the given component, assuming the 

394 same dimensions as the parent. 

395 

396 Parameters 

397 ---------- 

398 component : `str` 

399 Name of component 

400 

401 Returns 

402 ------- 

403 datasetType : `DatasetType` 

404 A new DatasetType instance. 

405 """ 

406 # The component could be a read/write or read component 

407 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

408 storageClass=self.storageClass.allComponents()[component], 

409 parentStorageClass=self.storageClass) 

410 

411 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

412 """Return all the component dataset types assocaited with this 

413 dataset type. 

414 

415 Returns 

416 ------- 

417 all : `list` of `DatasetType` 

418 All the component dataset types. If this is not a composite 

419 then returns an empty list. 

420 """ 

421 return [self.makeComponentDatasetType(componentName) 

422 for componentName in self.storageClass.allComponents()] 

423 

424 def isComponent(self) -> bool: 

425 """Boolean indicating whether this `DatasetType` refers to a 

426 component of a composite. 

427 

428 Returns 

429 ------- 

430 isComponent : `bool` 

431 `True` if this `DatasetType` is a component, `False` otherwise. 

432 """ 

433 if self.component(): 

434 return True 

435 return False 

436 

437 def isComposite(self) -> bool: 

438 """Boolean indicating whether this `DatasetType` is a composite type. 

439 

440 Returns 

441 ------- 

442 isComposite : `bool` 

443 `True` if this `DatasetType` is a composite type, `False` 

444 otherwise. 

445 """ 

446 return self.storageClass.isComposite() 

447 

448 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

449 """Name keys to use when looking up this datasetType in a 

450 configuration. 

451 

452 The names are returned in order of priority. 

453 

454 Returns 

455 ------- 

456 names : `tuple` of `LookupKey` 

457 Tuple of the `DatasetType` name and the `StorageClass` name. 

458 If the name includes a component the name with the component 

459 is first, then the name without the component and finally 

460 the storage class name. 

461 """ 

462 rootName, componentName = self.nameAndComponent() 

463 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

464 if componentName is not None: 

465 lookups = lookups + (LookupKey(name=rootName),) 

466 

467 if self.dimensions: 

468 # Dimensions are a lower priority than dataset type name 

469 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

470 

471 return lookups + self.storageClass._lookupNames() 

472 

473 def __reduce__(self) -> Tuple[Callable, Tuple[Type[DatasetType], 

474 Tuple[str, DimensionGraph, str, Optional[str]], 

475 Dict[str, bool]]]: 

476 """Support pickling. 

477 

478 StorageClass instances can not normally be pickled, so we pickle 

479 StorageClass name instead of instance. 

480 """ 

481 return _unpickle_via_factory, (self.__class__, (self.name, self.dimensions, self._storageClassName, 

482 self._parentStorageClassName), 

483 {"isCalibration": self._isCalibration}) 

484 

485 def __deepcopy__(self, memo: Any) -> DatasetType: 

486 """Support for deep copy method. 

487 

488 Normally ``deepcopy`` will use pickle mechanism to make copies. 

489 We want to avoid that to support (possibly degenerate) use case when 

490 DatasetType is constructed with StorageClass instance which is not 

491 registered with StorageClassFactory (this happens in unit tests). 

492 Instead we re-implement ``__deepcopy__`` method. 

493 """ 

494 return DatasetType(name=deepcopy(self.name, memo), 

495 dimensions=deepcopy(self.dimensions, memo), 

496 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

497 parentStorageClass=deepcopy(self._parentStorageClass 

498 or self._parentStorageClassName, memo), 

499 isCalibration=deepcopy(self._isCalibration, memo)) 

500 

501 

502def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetType: 

503 """Unpickle something by calling a factory 

504 

505 Allows subclasses to unpickle using `__reduce__` with keyword 

506 arguments as well as positional arguments. 

507 """ 

508 return factory(*args, **kwargs)