Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Iterable, 

35 Mapping, 

36 Optional, 

37 Tuple, 

38 Type, 

39 Union, 

40) 

41 

42 

43from ..storageClass import StorageClass, StorageClassFactory 

44from ..dimensions import DimensionGraph 

45from ..configSupport import LookupKey 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from ..dimensions import Dimension, DimensionUniverse 

49 

50 

51def _safeMakeMappingProxyType(data: Optional[Mapping]) -> Mapping: 

52 if data is None: 

53 data = {} 

54 return MappingProxyType(data) 

55 

56 

57class DatasetType: 

58 r"""A named category of Datasets that defines how they are organized, 

59 related, and stored. 

60 

61 A concrete, final class whose instances represent `DatasetType`\ s. 

62 `DatasetType` instances may be constructed without a `Registry`, 

63 but they must be registered 

64 via `Registry.registerDatasetType()` before corresponding Datasets 

65 may be added. 

66 `DatasetType` instances are immutable. 

67 

68 Parameters 

69 ---------- 

70 name : `str` 

71 A string name for the Dataset; must correspond to the same 

72 `DatasetType` across all Registries. Names must start with an 

73 upper or lowercase letter, and may contain only letters, numbers, 

74 and underscores. Component dataset types should contain a single 

75 period separating the base dataset type name from the component name 

76 (and may be recursive). 

77 dimensions : `DimensionGraph` or iterable of `Dimension` 

78 Dimensions used to label and relate instances of this `DatasetType`. 

79 If not a `DimensionGraph`, ``universe`` must be provided as well. 

80 storageClass : `StorageClass` or `str` 

81 Instance of a `StorageClass` or name of `StorageClass` that defines 

82 how this `DatasetType` is persisted. 

83 universe : `DimensionUniverse`, optional 

84 Set of all known dimensions, used to normalize ``dimensions`` if it 

85 is not already a `DimensionGraph`. 

86 """ 

87 

88 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName") 

89 

90 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

91 

92 @staticmethod 

93 def nameWithComponent(datasetTypeName: str, componentName: str) -> str: 

94 """Form a valid DatasetTypeName from a parent and component. 

95 

96 No validation is performed. 

97 

98 Parameters 

99 ---------- 

100 datasetTypeName : `str` 

101 Base type name. 

102 componentName : `str` 

103 Name of component. 

104 

105 Returns 

106 ------- 

107 compTypeName : `str` 

108 Name to use for component DatasetType. 

109 """ 

110 return "{}.{}".format(datasetTypeName, componentName) 

111 

112 def __init__(self, name: str, dimensions: Union[DimensionGraph, Iterable[Dimension]], 

113 storageClass: Union[StorageClass, str], 

114 *, universe: DimensionUniverse = None): 

115 if self.VALID_NAME_REGEX.match(name) is None: 

116 raise ValueError(f"DatasetType name '{name}' is invalid.") 

117 self._name = name 

118 if not isinstance(dimensions, DimensionGraph): 

119 if universe is None: 

120 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

121 "a universe must be provided.") 

122 dimensions = universe.extract(dimensions) 

123 self._dimensions = dimensions 

124 assert isinstance(storageClass, (StorageClass, str)) 

125 self._storageClass: Optional[StorageClass] 

126 if isinstance(storageClass, StorageClass): 

127 self._storageClass = storageClass 

128 self._storageClassName = storageClass.name 

129 else: 

130 self._storageClass = None 

131 self._storageClassName = storageClass 

132 

133 def __repr__(self) -> str: 

134 return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName) 

135 

136 def __eq__(self, other: Any) -> bool: 

137 if not isinstance(other, type(self)): 

138 return False 

139 if self._name != other._name: 

140 return False 

141 if self._dimensions != other._dimensions: 

142 return False 

143 if self._storageClass is not None and other._storageClass is not None: 

144 return self._storageClass == other._storageClass 

145 else: 

146 return self._storageClassName == other._storageClassName 

147 

148 def __hash__(self) -> int: 

149 """Hash DatasetType instance. 

150 

151 This only uses StorageClass name which is it consistent with the 

152 implementation of StorageClass hash method. 

153 """ 

154 return hash((self._name, self._dimensions, self._storageClassName)) 

155 

156 @property 

157 def name(self) -> str: 

158 """A string name for the Dataset; must correspond to the same 

159 `DatasetType` across all Registries. 

160 """ 

161 return self._name 

162 

163 @property 

164 def dimensions(self) -> DimensionGraph: 

165 r"""The `Dimension`\ s that label and relate instances of this 

166 `DatasetType` (`DimensionGraph`). 

167 """ 

168 return self._dimensions 

169 

170 @property 

171 def storageClass(self) -> StorageClass: 

172 """`StorageClass` instance that defines how this `DatasetType` 

173 is persisted. Note that if DatasetType was constructed with a name 

174 of a StorageClass then Butler has to be initialized before using 

175 this property. 

176 """ 

177 if self._storageClass is None: 

178 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

179 return self._storageClass 

180 

181 @staticmethod 

182 def splitDatasetTypeName(datasetTypeName: str) -> Tuple[str, Optional[str]]: 

183 """Given a dataset type name, return the root name and the component 

184 name. 

185 

186 Parameters 

187 ---------- 

188 datasetTypeName : `str` 

189 The name of the dataset type, can include a component using 

190 a "."-separator. 

191 

192 Returns 

193 ------- 

194 rootName : `str` 

195 Root name without any components. 

196 componentName : `str` 

197 The component if it has been specified, else `None`. 

198 

199 Notes 

200 ----- 

201 If the dataset type name is ``a.b.c`` this method will return a 

202 root name of ``a`` and a component name of ``b.c``. 

203 """ 

204 comp = None 

205 root = datasetTypeName 

206 if "." in root: 

207 # If there is doubt, the component is after the first "." 

208 root, comp = root.split(".", maxsplit=1) 

209 return root, comp 

210 

211 def nameAndComponent(self) -> Tuple[str, Optional[str]]: 

212 """Return the root name of this dataset type and the component 

213 name (if defined). 

214 

215 Returns 

216 ------- 

217 rootName : `str` 

218 Root name for this `DatasetType` without any components. 

219 componentName : `str` 

220 The component if it has been specified, else `None`. 

221 """ 

222 return self.splitDatasetTypeName(self.name) 

223 

224 def component(self) -> Optional[str]: 

225 """Component name (if defined) 

226 

227 Returns 

228 ------- 

229 comp : `str` 

230 Name of component part of DatasetType name. `None` if this 

231 `DatasetType` is not associated with a component. 

232 """ 

233 _, comp = self.nameAndComponent() 

234 return comp 

235 

236 def componentTypeName(self, component: str) -> str: 

237 """Given a component name, derive the datasetTypeName of that component 

238 

239 Parameters 

240 ---------- 

241 component : `str` 

242 Name of component 

243 

244 Returns 

245 ------- 

246 derived : `str` 

247 Compound name of this `DatasetType` and the component. 

248 

249 Raises 

250 ------ 

251 KeyError 

252 Requested component is not supported by this `DatasetType`. 

253 """ 

254 if component in self.storageClass.components: 

255 return self.nameWithComponent(self.name, component) 

256 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

257 

258 def makeComponentDatasetType(self, component: str) -> DatasetType: 

259 """Return a DatasetType suitable for the given component, assuming the 

260 same dimensions as the parent. 

261 

262 Parameters 

263 ---------- 

264 component : `str` 

265 Name of component 

266 

267 Returns 

268 ------- 

269 datasetType : `DatasetType` 

270 A new DatasetType instance. 

271 """ 

272 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

273 storageClass=self.storageClass.components[component]) 

274 

275 def isComponent(self) -> bool: 

276 """Boolean indicating whether this `DatasetType` refers to a 

277 component of a composite. 

278 

279 Returns 

280 ------- 

281 isComponent : `bool` 

282 `True` if this `DatasetType` is a component, `False` otherwise. 

283 """ 

284 if self.component(): 

285 return True 

286 return False 

287 

288 def isComposite(self) -> bool: 

289 """Boolean indicating whether this `DatasetType` is a composite type. 

290 

291 Returns 

292 ------- 

293 isComposite : `bool` 

294 `True` if this `DatasetType` is a composite type, `False` 

295 otherwise. 

296 """ 

297 return self.storageClass.isComposite() 

298 

299 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

300 """Name keys to use when looking up this datasetType in a 

301 configuration. 

302 

303 The names are returned in order of priority. 

304 

305 Returns 

306 ------- 

307 names : `tuple` of `LookupKey` 

308 Tuple of the `DatasetType` name and the `StorageClass` name. 

309 If the name includes a component the name with the component 

310 is first, then the name without the component and finally 

311 the storage class name. 

312 """ 

313 rootName, componentName = self.nameAndComponent() 

314 lookups: Tuple[LookupKey, ...] = (LookupKey(name=self.name),) 

315 if componentName is not None: 

316 lookups = lookups + (LookupKey(name=rootName),) 

317 

318 if self.dimensions: 

319 # Dimensions are a lower priority than dataset type name 

320 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

321 

322 return lookups + self.storageClass._lookupNames() 

323 

324 def __reduce__(self) -> Tuple[Type[DatasetType], Tuple[str, DimensionGraph, str]]: 

325 """Support pickling. 

326 

327 StorageClass instances can not normally be pickled, so we pickle 

328 StorageClass name instead of instance. 

329 """ 

330 return (DatasetType, (self.name, self.dimensions, self._storageClassName)) 

331 

332 def __deepcopy__(self, memo: Any) -> DatasetType: 

333 """Support for deep copy method. 

334 

335 Normally ``deepcopy`` will use pickle mechanism to make copies. 

336 We want to avoid that to support (possibly degenerate) use case when 

337 DatasetType is constructed with StorageClass instance which is not 

338 registered with StorageClassFactory (this happens in unit tests). 

339 Instead we re-implement ``__deepcopy__`` method. 

340 """ 

341 return DatasetType(name=deepcopy(self.name, memo), 

342 dimensions=deepcopy(self.dimensions, memo), 

343 storageClass=deepcopy(self._storageClass or self._storageClassName, memo))