Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Iterable, 

35 List, 

36 Mapping, 

37 Optional, 

38 Tuple, 

39 Type, 

40 Union, 

41) 

42 

43 

44from ..storageClass import StorageClass, StorageClassFactory 

45from ..dimensions import DimensionGraph 

46from ..configSupport import LookupKey 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from ..dimensions import Dimension, DimensionUniverse 

50 

51 

52def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

53 if data is None: 

54 data = {} 

55 return MappingProxyType(data) 

56 

57 

58class DatasetType: 

59 r"""A named category of Datasets that defines how they are organized, 

60 related, and stored. 

61 

62 A concrete, final class whose instances represent `DatasetType`\ s. 

63 `DatasetType` instances may be constructed without a `Registry`, 

64 but they must be registered 

65 via `Registry.registerDatasetType()` before corresponding Datasets 

66 may be added. 

67 `DatasetType` instances are immutable. 

68 

69 Parameters 

70 ---------- 

71 name : `str` 

72 A string name for the Dataset; must correspond to the same 

73 `DatasetType` across all Registries. Names must start with an 

74 upper or lowercase letter, and may contain only letters, numbers, 

75 and underscores. Component dataset types should contain a single 

76 period separating the base dataset type name from the component name 

77 (and may be recursive). 

78 dimensions : `DimensionGraph` or iterable of `Dimension` 

79 Dimensions used to label and relate instances of this `DatasetType`. 

80 If not a `DimensionGraph`, ``universe`` must be provided as well. 

81 storageClass : `StorageClass` or `str` 

82 Instance of a `StorageClass` or name of `StorageClass` that defines 

83 how this `DatasetType` is persisted. 

84 parentStorageClass : `StorageClass` or `str`, optional 

85 Instance of a `StorageClass` or name of `StorageClass` that defines 

86 how the composite parent is persisted. Must be `None` if this 

87 is not a component. Mandatory if it is a component but can be the 

88 special temporary placeholder 

89 (`DatasetType.PlaceholderParentStorageClass`) to allow 

90 construction with an intent to finalize later. 

91 universe : `DimensionUniverse`, optional 

92 Set of all known dimensions, used to normalize ``dimensions`` if it 

93 is not already a `DimensionGraph`. 

94 isCalibration : `bool`, optional 

95 If `True`, this dataset type may be included in 

96 `~CollectionType.CALIBRATION` collections. 

97 """ 

98 

99 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

100 "_parentStorageClass", "_parentStorageClassName", 

101 "_isCalibration") 

102 

103 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

104 

105 PlaceholderParentStorageClass = StorageClass("PlaceHolder") 

106 """Placeholder StorageClass that can be used temporarily for a 

107 component. 

108 

109 This can be useful in pipeline construction where we are creating 

110 dataset types without a registry. 

111 """ 

112 

113 @staticmethod 

114 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

115 """Form a valid DatasetTypeName from a parent and component. 

116 

117 No validation is performed. 

118 

119 Parameters 

120 ---------- 

121 datasetTypeName : `str` 

122 Base type name. 

123 componentName : `str` 

124 Name of component. 

125 

126 Returns 

127 ------- 

128 compTypeName : `str` 

129 Name to use for component DatasetType. 

130 """ 

131 return "{}.{}".format(datasetTypeName, componentName) 

132 

133 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

134 storageClass: Union[StorageClass, str], 

135 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

136 universe: Optional[DimensionUniverse] = None, 

137 isCalibration: bool = False): 

138 if self.VALID_NAME_REGEX.match(name) is None: 

139 raise ValueError(f"DatasetType name '{name}' is invalid.") 

140 self._name = name 

141 if not isinstance(dimensions, DimensionGraph): 

142 if universe is None: 

143 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

144 "a universe must be provided.") 

145 dimensions = universe.extract(dimensions) 

146 self._dimensions = dimensions 

147 if not isinstance(storageClass, (StorageClass, str)): 

148 raise ValueError("StorageClass argument must be StorageClass or str. " 

149 f"Got {storageClass}") 

150 self._storageClass: Optional[StorageClass] 

151 if isinstance(storageClass, StorageClass): 

152 self._storageClass = storageClass 

153 self._storageClassName = storageClass.name 

154 else: 

155 self._storageClass = None 

156 self._storageClassName = storageClass 

157 

158 self._parentStorageClass: Optional[StorageClass] = None 

159 self._parentStorageClassName: Optional[str] = None 

160 if parentStorageClass is not None: 

161 if not isinstance(storageClass, (StorageClass, str)): 

162 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

163 f"Got {parentStorageClass}") 

164 

165 # Only allowed for a component dataset type 

166 _, componentName = self.splitDatasetTypeName(self._name) 

167 if componentName is None: 

168 raise ValueError("Can not specify a parent storage class if this is not a component" 

169 f" ({self._name})") 

170 if isinstance(parentStorageClass, StorageClass): 

171 self._parentStorageClass = parentStorageClass 

172 self._parentStorageClassName = parentStorageClass.name 

173 else: 

174 self._parentStorageClassName = parentStorageClass 

175 

176 # Ensure that parent storage class is specified when we have 

177 # a component and is not specified when we don't 

178 _, componentName = self.splitDatasetTypeName(self._name) 

179 if parentStorageClass is None and componentName is not None: 

180 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

181 " storage class") 

182 if parentStorageClass is not None and componentName is None: 

183 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

184 self._isCalibration = isCalibration 

185 

186 def __repr__(self) -> str: 

187 extra = "" 

188 if self._parentStorageClassName: 

189 extra = f", parentStorageClass={self._parentStorageClassName}" 

190 if self._isCalibration: 

191 extra += ", isCalibration=True" 

192 return f"DatasetType({self.name!r}, {self.dimensions}, {self._storageClassName}{extra})" 

193 

194 def __eq__(self, other: Any) -> bool: 

195 if not isinstance(other, type(self)): 

196 return False 

197 if self._name != other._name: 

198 return False 

199 if self._dimensions != other._dimensions: 

200 return False 

201 if self._storageClass is not None and other._storageClass is not None: 

202 if self._storageClass != other._storageClass: 

203 return False 

204 else: 

205 if self._storageClassName != other._storageClassName: 

206 return False 

207 if self._isCalibration != other._isCalibration: 

208 return False 

209 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

210 return self._parentStorageClass == other._parentStorageClass 

211 else: 

212 return self._parentStorageClassName == other._parentStorageClassName 

213 

214 def __hash__(self) -> int: 

215 """Hash DatasetType instance. 

216 

217 This only uses StorageClass name which is it consistent with the 

218 implementation of StorageClass hash method. 

219 """ 

220 return hash((self._name, self._dimensions, self._storageClassName, 

221 self._parentStorageClassName)) 

222 

223 @property 

224 def name(self) -> str: 

225 """A string name for the Dataset; must correspond to the same 

226 `DatasetType` across all Registries. 

227 """ 

228 return self._name 

229 

230 @property 

231 def dimensions(self) -> DimensionGraph: 

232 r"""The `Dimension`\ s that label and relate instances of this 

233 `DatasetType` (`DimensionGraph`). 

234 """ 

235 return self._dimensions 

236 

237 @property 

238 def storageClass(self) -> StorageClass: 

239 """`StorageClass` instance that defines how this `DatasetType` 

240 is persisted. Note that if DatasetType was constructed with a name 

241 of a StorageClass then Butler has to be initialized before using 

242 this property. 

243 """ 

244 if self._storageClass is None: 

245 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

246 return self._storageClass 

247 

248 @property 

249 def parentStorageClass(self) -> Optional[StorageClass]: 

250 """`StorageClass` instance that defines how the composite associated 

251 with this `DatasetType` is persisted. 

252 

253 Note that if DatasetType was constructed with a name of a 

254 StorageClass then Butler has to be initialized before using this 

255 property. Can be `None` if this is not a component of a composite. 

256 Must be defined if this is a component. 

257 """ 

258 if self._parentStorageClass is None and self._parentStorageClassName is None: 

259 return None 

260 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

261 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

262 return self._parentStorageClass 

263 

264 def isCalibration(self) -> bool: 

265 """Return whether datasets of this type may be included in calibration 

266 collections. 

267 

268 Returns 

269 ------- 

270 flag : `bool` 

271 `True` if datasets of this type may be included in calibration 

272 collections. 

273 """ 

274 return self._isCalibration 

275 

276 def finalizeParentStorageClass(self, newParent: StorageClass) -> None: 

277 """Replace the current placeholder parent storage class with 

278 the real parent. 

279 

280 Parameters 

281 ---------- 

282 newParent : `StorageClass` 

283 The new parent to be associated with this composite dataset 

284 type. This replaces the temporary placeholder parent that 

285 was specified during construction. 

286 

287 Raises 

288 ------ 

289 ValueError 

290 Raised if this dataset type is not a component of a composite. 

291 Raised if a StorageClass is not given. 

292 Raised if the parent currently associated with the dataset 

293 type is not a placeholder. 

294 """ 

295 if not self.isComponent(): 

296 raise ValueError("Can not set a parent storage class if this is not a component" 

297 f" ({self.name})") 

298 if self._parentStorageClass != self.PlaceholderParentStorageClass: 

299 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and" 

300 " is not a placeholder.") 

301 if not isinstance(newParent, StorageClass): 

302 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}") 

303 self._parentStorageClass = newParent 

304 self._parentStorageClassName = newParent.name 

305 

306 @staticmethod 

307 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

308 """Given a dataset type name, return the root name and the component 

309 name. 

310 

311 Parameters 

312 ---------- 

313 datasetTypeName : `str` 

314 The name of the dataset type, can include a component using 

315 a "."-separator. 

316 

317 Returns 

318 ------- 

319 rootName : `str` 

320 Root name without any components. 

321 componentName : `str` 

322 The component if it has been specified, else `None`. 

323 

324 Notes 

325 ----- 

326 If the dataset type name is ``a.b.c`` this method will return a 

327 root name of ``a`` and a component name of ``b.c``. 

328 """ 

329 comp = None 

330 root = datasetTypeName 

331 if "." in root: 

332 # If there is doubt, the component is after the first "." 

333 root, comp = root.split(".", maxsplit=1) 

334 return root, comp 

335 

336 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

337 """Return the root name of this dataset type and the component 

338 name (if defined). 

339 

340 Returns 

341 ------- 

342 rootName : `str` 

343 Root name for this `DatasetType` without any components. 

344 componentName : `str` 

345 The component if it has been specified, else `None`. 

346 """ 

347 return self.splitDatasetTypeName(self.name) 

348 

349 def component(self) -> Optional[str]: 

350 """Component name (if defined) 

351 

352 Returns 

353 ------- 

354 comp : `str` 

355 Name of component part of DatasetType name. `None` if this 

356 `DatasetType` is not associated with a component. 

357 """ 

358 _, comp = self.nameAndComponent() 

359 return comp 

360 

361 def componentTypeName(self, component: str) -> str: 

362 """Given a component name, derive the datasetTypeName of that component 

363 

364 Parameters 

365 ---------- 

366 component : `str` 

367 Name of component 

368 

369 Returns 

370 ------- 

371 derived : `str` 

372 Compound name of this `DatasetType` and the component. 

373 

374 Raises 

375 ------ 

376 KeyError 

377 Requested component is not supported by this `DatasetType`. 

378 """ 

379 if component in self.storageClass.allComponents(): 

380 return self.nameWithComponent(self.name, component) 

381 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

382 

383 def makeComponentDatasetType(self, component: str) -> DatasetType: 

384 """Return a DatasetType suitable for the given component, assuming the 

385 same dimensions as the parent. 

386 

387 Parameters 

388 ---------- 

389 component : `str` 

390 Name of component 

391 

392 Returns 

393 ------- 

394 datasetType : `DatasetType` 

395 A new DatasetType instance. 

396 """ 

397 # The component could be a read/write or read component 

398 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

399 storageClass=self.storageClass.allComponents()[component], 

400 parentStorageClass=self.storageClass) 

401 

402 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

403 """Return all the component dataset types assocaited with this 

404 dataset type. 

405 

406 Returns 

407 ------- 

408 all : `list` of `DatasetType` 

409 All the component dataset types. If this is not a composite 

410 then returns an empty list. 

411 """ 

412 return [self.makeComponentDatasetType(componentName) 

413 for componentName in self.storageClass.allComponents()] 

414 

415 def isComponent(self) -> bool: 

416 """Boolean indicating whether this `DatasetType` refers to a 

417 component of a composite. 

418 

419 Returns 

420 ------- 

421 isComponent : `bool` 

422 `True` if this `DatasetType` is a component, `False` otherwise. 

423 """ 

424 if self.component(): 

425 return True 

426 return False 

427 

428 def isComposite(self) -> bool: 

429 """Boolean indicating whether this `DatasetType` is a composite type. 

430 

431 Returns 

432 ------- 

433 isComposite : `bool` 

434 `True` if this `DatasetType` is a composite type, `False` 

435 otherwise. 

436 """ 

437 return self.storageClass.isComposite() 

438 

439 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

440 """Name keys to use when looking up this datasetType in a 

441 configuration. 

442 

443 The names are returned in order of priority. 

444 

445 Returns 

446 ------- 

447 names : `tuple` of `LookupKey` 

448 Tuple of the `DatasetType` name and the `StorageClass` name. 

449 If the name includes a component the name with the component 

450 is first, then the name without the component and finally 

451 the storage class name. 

452 """ 

453 rootName, componentName = self.nameAndComponent() 

454 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

455 if componentName is not None: 

456 lookups = lookups + (LookupKey(name=rootName),) 

457 

458 if self.dimensions: 

459 # Dimensions are a lower priority than dataset type name 

460 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

461 

462 return lookups + self.storageClass._lookupNames() 

463 

464 def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str, Optional[str]]]: 

465 """Support pickling. 

466 

467 StorageClass instances can not normally be pickled, so we pickle 

468 StorageClass name instead of instance. 

469 """ 

470 return (DatasetType, (self.name, self.dimensions, self._storageClassName, 

471 self._parentStorageClassName)) 

472 

473 def __deepcopy__(self, memo: Any) -> DatasetType: 

474 """Support for deep copy method. 

475 

476 Normally ``deepcopy`` will use pickle mechanism to make copies. 

477 We want to avoid that to support (possibly degenerate) use case when 

478 DatasetType is constructed with StorageClass instance which is not 

479 registered with StorageClassFactory (this happens in unit tests). 

480 Instead we re-implement ``__deepcopy__`` method. 

481 """ 

482 return DatasetType(name=deepcopy(self.name, memo), 

483 dimensions=deepcopy(self.dimensions, memo), 

484 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

485 parentStorageClass=deepcopy(self._parentStorageClass 

486 or self._parentStorageClassName, memo))