Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Iterable, 

35 List, 

36 Mapping, 

37 Optional, 

38 Tuple, 

39 Type, 

40 Union, 

41) 

42 

43 

44from ..storageClass import StorageClass, StorageClassFactory 

45from ..dimensions import DimensionGraph 

46from ..configSupport import LookupKey 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from ..dimensions import Dimension, DimensionUniverse 

50 

51 

52def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

53 if data is None: 

54 data = {} 

55 return MappingProxyType(data) 

56 

57 

58class DatasetType: 

59 r"""A named category of Datasets that defines how they are organized, 

60 related, and stored. 

61 

62 A concrete, final class whose instances represent `DatasetType`\ s. 

63 `DatasetType` instances may be constructed without a `Registry`, 

64 but they must be registered 

65 via `Registry.registerDatasetType()` before corresponding Datasets 

66 may be added. 

67 `DatasetType` instances are immutable. 

68 

69 Parameters 

70 ---------- 

71 name : `str` 

72 A string name for the Dataset; must correspond to the same 

73 `DatasetType` across all Registries. Names must start with an 

74 upper or lowercase letter, and may contain only letters, numbers, 

75 and underscores. Component dataset types should contain a single 

76 period separating the base dataset type name from the component name 

77 (and may be recursive). 

78 dimensions : `DimensionGraph` or iterable of `Dimension` 

79 Dimensions used to label and relate instances of this `DatasetType`. 

80 If not a `DimensionGraph`, ``universe`` must be provided as well. 

81 storageClass : `StorageClass` or `str` 

82 Instance of a `StorageClass` or name of `StorageClass` that defines 

83 how this `DatasetType` is persisted. 

84 parentStorageClass : `StorageClass` or `str`, optional 

85 Instance of a `StorageClass` or name of `StorageClass` that defines 

86 how the composite parent is persisted. Must be `None` if this 

87 is not a component. Mandatory if it is a component but can be the 

88 special temporary placeholder 

89 (`DatasetType.PlaceholderParentStorageClass`) to allow 

90 construction with an intent to finalize later. 

91 universe : `DimensionUniverse`, optional 

92 Set of all known dimensions, used to normalize ``dimensions`` if it 

93 is not already a `DimensionGraph`. 

94 """ 

95 

96 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName", 

97 "_parentStorageClass", "_parentStorageClassName") 

98 

99 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

100 

101 PlaceholderParentStorageClass = StorageClass("PlaceHolder") 

102 """Placeholder StorageClass that can be used temporarily for a 

103 component. 

104 

105 This can be useful in pipeline construction where we are creating 

106 dataset types without a registry. 

107 """ 

108 

109 @staticmethod 

110 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

111 """Form a valid DatasetTypeName from a parent and component. 

112 

113 No validation is performed. 

114 

115 Parameters 

116 ---------- 

117 datasetTypeName : `str` 

118 Base type name. 

119 componentName : `str` 

120 Name of component. 

121 

122 Returns 

123 ------- 

124 compTypeName : `str` 

125 Name to use for component DatasetType. 

126 """ 

127 return "{}.{}".format(datasetTypeName, componentName) 

128 

129 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

130 storageClass: Union[StorageClass, str], 

131 parentStorageClass: Optional[Union[StorageClass, str]] = None, *, 

132 universe: Optional[DimensionUniverse] = None): 

133 if self.VALID_NAME_REGEX.match(name) is None: 

134 raise ValueError(f"DatasetType name '{name}' is invalid.") 

135 self._name = name 

136 if not isinstance(dimensions, DimensionGraph): 

137 if universe is None: 

138 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

139 "a universe must be provided.") 

140 dimensions = universe.extract(dimensions) 

141 self._dimensions = dimensions 

142 if not isinstance(storageClass, (StorageClass, str)): 

143 raise ValueError("StorageClass argument must be StorageClass or str. " 

144 f"Got {storageClass}") 

145 self._storageClass: Optional[StorageClass] 

146 if isinstance(storageClass, StorageClass): 

147 self._storageClass = storageClass 

148 self._storageClassName = storageClass.name 

149 else: 

150 self._storageClass = None 

151 self._storageClassName = storageClass 

152 

153 self._parentStorageClass: Optional[StorageClass] = None 

154 self._parentStorageClassName: Optional[str] = None 

155 if parentStorageClass is not None: 

156 if not isinstance(storageClass, (StorageClass, str)): 

157 raise ValueError("Parent StorageClass argument must be StorageClass or str. " 

158 f"Got {parentStorageClass}") 

159 

160 # Only allowed for a component dataset type 

161 _, componentName = self.splitDatasetTypeName(self._name) 

162 if componentName is None: 

163 raise ValueError("Can not specify a parent storage class if this is not a component" 

164 f" ({self._name})") 

165 if isinstance(parentStorageClass, StorageClass): 

166 self._parentStorageClass = parentStorageClass 

167 self._parentStorageClassName = parentStorageClass.name 

168 else: 

169 self._parentStorageClassName = parentStorageClass 

170 

171 # Ensure that parent storage class is specified when we have 

172 # a component and is not specified when we don't 

173 _, componentName = self.splitDatasetTypeName(self._name) 

174 if parentStorageClass is None and componentName is not None: 

175 raise ValueError(f"Component dataset type '{self._name}' constructed without parent" 

176 " storage class") 

177 if parentStorageClass is not None and componentName is None: 

178 raise ValueError(f"Parent storage class specified by {self._name} is not a composite") 

179 

180 def __repr__(self) -> str: 

181 parent = "" 

182 if self._parentStorageClassName: 

183 parent = f", parentStorageClass={self._parentStorageClassName}" 

184 return f"DatasetType({self.name}, {self.dimensions}, {self._storageClassName}{parent})" 

185 

186 def __eq__(self, other: Any) -> bool: 

187 if not isinstance(other, type(self)): 

188 return False 

189 if self._name != other._name: 

190 return False 

191 if self._dimensions != other._dimensions: 

192 return False 

193 if self._storageClass is not None and other._storageClass is not None: 

194 if self._storageClass != other._storageClass: 

195 return False 

196 else: 

197 if self._storageClassName != other._storageClassName: 

198 return False 

199 if self._parentStorageClass is not None and other._parentStorageClass is not None: 

200 return self._parentStorageClass == other._parentStorageClass 

201 else: 

202 return self._parentStorageClassName == other._parentStorageClassName 

203 

204 def __hash__(self) -> int: 

205 """Hash DatasetType instance. 

206 

207 This only uses StorageClass name which is it consistent with the 

208 implementation of StorageClass hash method. 

209 """ 

210 return hash((self._name, self._dimensions, self._storageClassName, 

211 self._parentStorageClassName)) 

212 

213 @property 

214 def name(self) -> str: 

215 """A string name for the Dataset; must correspond to the same 

216 `DatasetType` across all Registries. 

217 """ 

218 return self._name 

219 

220 @property 

221 def dimensions(self) -> DimensionGraph: 

222 r"""The `Dimension`\ s that label and relate instances of this 

223 `DatasetType` (`DimensionGraph`). 

224 """ 

225 return self._dimensions 

226 

227 @property 

228 def storageClass(self) -> StorageClass: 

229 """`StorageClass` instance that defines how this `DatasetType` 

230 is persisted. Note that if DatasetType was constructed with a name 

231 of a StorageClass then Butler has to be initialized before using 

232 this property. 

233 """ 

234 if self._storageClass is None: 

235 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

236 return self._storageClass 

237 

238 @property 

239 def parentStorageClass(self) -> Optional[StorageClass]: 

240 """`StorageClass` instance that defines how the composite associated 

241 with this `DatasetType` is persisted. 

242 

243 Note that if DatasetType was constructed with a name of a 

244 StorageClass then Butler has to be initialized before using this 

245 property. Can be `None` if this is not a component of a composite. 

246 Must be defined if this is a component. 

247 """ 

248 if self._parentStorageClass is None and self._parentStorageClassName is None: 

249 return None 

250 if self._parentStorageClass is None and self._parentStorageClassName is not None: 

251 self._parentStorageClass = StorageClassFactory().getStorageClass(self._parentStorageClassName) 

252 return self._parentStorageClass 

253 

254 def finalizeParentStorageClass(self, newParent: StorageClass) -> None: 

255 """Replace the current placeholder parent storage class with 

256 the real parent. 

257 

258 Parameters 

259 ---------- 

260 newParent : `StorageClass` 

261 The new parent to be associated with this composite dataset 

262 type. This replaces the temporary placeholder parent that 

263 was specified during construction. 

264 

265 Raises 

266 ------ 

267 ValueError 

268 Raised if this dataset type is not a component of a composite. 

269 Raised if a StorageClass is not given. 

270 Raised if the parent currently associated with the dataset 

271 type is not a placeholder. 

272 """ 

273 if not self.isComponent(): 

274 raise ValueError("Can not set a parent storage class if this is not a component" 

275 f" ({self.name})") 

276 if self._parentStorageClass != self.PlaceholderParentStorageClass: 

277 raise ValueError(f"This DatasetType has a parent of {self._parentStorageClassName} and" 

278 " is not a placeholder.") 

279 if not isinstance(newParent, StorageClass): 

280 raise ValueError(f"Supplied parent must be a StorageClass. Got {newParent!r}") 

281 self._parentStorageClass = newParent 

282 self._parentStorageClassName = newParent.name 

283 

284 @staticmethod 

285 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

286 """Given a dataset type name, return the root name and the component 

287 name. 

288 

289 Parameters 

290 ---------- 

291 datasetTypeName : `str` 

292 The name of the dataset type, can include a component using 

293 a "."-separator. 

294 

295 Returns 

296 ------- 

297 rootName : `str` 

298 Root name without any components. 

299 componentName : `str` 

300 The component if it has been specified, else `None`. 

301 

302 Notes 

303 ----- 

304 If the dataset type name is ``a.b.c`` this method will return a 

305 root name of ``a`` and a component name of ``b.c``. 

306 """ 

307 comp = None 

308 root = datasetTypeName 

309 if "." in root: 

310 # If there is doubt, the component is after the first "." 

311 root, comp = root.split(".", maxsplit=1) 

312 return root, comp 

313 

314 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

315 """Return the root name of this dataset type and the component 

316 name (if defined). 

317 

318 Returns 

319 ------- 

320 rootName : `str` 

321 Root name for this `DatasetType` without any components. 

322 componentName : `str` 

323 The component if it has been specified, else `None`. 

324 """ 

325 return self.splitDatasetTypeName(self.name) 

326 

327 def component(self) -> Optional[str]: 

328 """Component name (if defined) 

329 

330 Returns 

331 ------- 

332 comp : `str` 

333 Name of component part of DatasetType name. `None` if this 

334 `DatasetType` is not associated with a component. 

335 """ 

336 _, comp = self.nameAndComponent() 

337 return comp 

338 

339 def componentTypeName(self, component: str) -> str: 

340 """Given a component name, derive the datasetTypeName of that component 

341 

342 Parameters 

343 ---------- 

344 component : `str` 

345 Name of component 

346 

347 Returns 

348 ------- 

349 derived : `str` 

350 Compound name of this `DatasetType` and the component. 

351 

352 Raises 

353 ------ 

354 KeyError 

355 Requested component is not supported by this `DatasetType`. 

356 """ 

357 if component in self.storageClass.allComponents(): 

358 return self.nameWithComponent(self.name, component) 

359 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

360 

361 def makeComponentDatasetType(self, component: str) -> DatasetType: 

362 """Return a DatasetType suitable for the given component, assuming the 

363 same dimensions as the parent. 

364 

365 Parameters 

366 ---------- 

367 component : `str` 

368 Name of component 

369 

370 Returns 

371 ------- 

372 datasetType : `DatasetType` 

373 A new DatasetType instance. 

374 """ 

375 # The component could be a read/write or read component 

376 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

377 storageClass=self.storageClass.allComponents()[component], 

378 parentStorageClass=self.storageClass) 

379 

380 def makeAllComponentDatasetTypes(self) -> List[DatasetType]: 

381 """Return all the component dataset types assocaited with this 

382 dataset type. 

383 

384 Returns 

385 ------- 

386 all : `list` of `DatasetType` 

387 All the component dataset types. If this is not a composite 

388 then returns an empty list. 

389 """ 

390 return [self.makeComponentDatasetType(componentName) 

391 for componentName in self.storageClass.allComponents()] 

392 

393 def isComponent(self) -> bool: 

394 """Boolean indicating whether this `DatasetType` refers to a 

395 component of a composite. 

396 

397 Returns 

398 ------- 

399 isComponent : `bool` 

400 `True` if this `DatasetType` is a component, `False` otherwise. 

401 """ 

402 if self.component(): 

403 return True 

404 return False 

405 

406 def isComposite(self) -> bool: 

407 """Boolean indicating whether this `DatasetType` is a composite type. 

408 

409 Returns 

410 ------- 

411 isComposite : `bool` 

412 `True` if this `DatasetType` is a composite type, `False` 

413 otherwise. 

414 """ 

415 return self.storageClass.isComposite() 

416 

417 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

418 """Name keys to use when looking up this datasetType in a 

419 configuration. 

420 

421 The names are returned in order of priority. 

422 

423 Returns 

424 ------- 

425 names : `tuple` of `LookupKey` 

426 Tuple of the `DatasetType` name and the `StorageClass` name. 

427 If the name includes a component the name with the component 

428 is first, then the name without the component and finally 

429 the storage class name. 

430 """ 

431 rootName, componentName = self.nameAndComponent() 

432 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

433 if componentName is not None: 

434 lookups = lookups + (LookupKey(name=rootName),) 

435 

436 if self.dimensions: 

437 # Dimensions are a lower priority than dataset type name 

438 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

439 

440 return lookups + self.storageClass._lookupNames() 

441 

442 def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str, Optional[str]]]: 

443 """Support pickling. 

444 

445 StorageClass instances can not normally be pickled, so we pickle 

446 StorageClass name instead of instance. 

447 """ 

448 return (DatasetType, (self.name, self.dimensions, self._storageClassName, 

449 self._parentStorageClassName)) 

450 

451 def __deepcopy__(self, memo: Any) -> DatasetType: 

452 """Support for deep copy method. 

453 

454 Normally ``deepcopy`` will use pickle mechanism to make copies. 

455 We want to avoid that to support (possibly degenerate) use case when 

456 DatasetType is constructed with StorageClass instance which is not 

457 registered with StorageClassFactory (this happens in unit tests). 

458 Instead we re-implement ``__deepcopy__`` method. 

459 """ 

460 return DatasetType(name=deepcopy(self.name, memo), 

461 dimensions=deepcopy(self.dimensions, memo), 

462 storageClass=deepcopy(self._storageClass or self._storageClassName, memo), 

463 parentStorageClass=deepcopy(self._parentStorageClass 

464 or self._parentStorageClassName, memo))