Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30from ..storageClass import StorageClass, StorageClassFactory 

31from ..dimensions import DimensionGraph 

32from ..configSupport import LookupKey 

33 

34 

35def _safeMakeMappingProxyType(data): 

36 if data is None: 

37 data = {} 

38 return MappingProxyType(data) 

39 

40 

41class DatasetType: 

42 r"""A named category of Datasets that defines how they are organized, 

43 related, and stored. 

44 

45 A concrete, final class whose instances represent `DatasetType`\ s. 

46 `DatasetType` instances may be constructed without a `Registry`, 

47 but they must be registered 

48 via `Registry.registerDatasetType()` before corresponding Datasets 

49 may be added. 

50 `DatasetType` instances are immutable. 

51 

52 Parameters 

53 ---------- 

54 name : `str` 

55 A string name for the Dataset; must correspond to the same 

56 `DatasetType` across all Registries. Names must start with an 

57 upper or lowercase letter, and may contain only letters, numbers, 

58 and underscores. Component dataset types should contain a single 

59 period separating the base dataset type name from the component name 

60 (and may be recursive). 

61 dimensions : `DimensionGraph` or iterable of `Dimension` 

62 Dimensions used to label and relate instances of this `DatasetType`. 

63 If not a `DimensionGraph`, ``universe`` must be provided as well. 

64 storageClass : `StorageClass` or `str` 

65 Instance of a `StorageClass` or name of `StorageClass` that defines 

66 how this `DatasetType` is persisted. 

67 universe : `DimensionUniverse`, optional 

68 Set of all known dimensions, used to normalize ``dimensions`` if it 

69 is not already a `DimensionGraph`. 

70 """ 

71 

72 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName") 

73 

74 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

75 

76 @staticmethod 

77 def nameWithComponent(datasetTypeName, componentName): 

78 """Form a valid DatasetTypeName from a parent and component. 

79 

80 No validation is performed. 

81 

82 Parameters 

83 ---------- 

84 datasetTypeName : `str` 

85 Base type name. 

86 componentName : `str` 

87 Name of component. 

88 

89 Returns 

90 ------- 

91 compTypeName : `str` 

92 Name to use for component DatasetType. 

93 """ 

94 return "{}.{}".format(datasetTypeName, componentName) 

95 

96 def __init__(self, name, dimensions, storageClass, *, universe=None): 

97 if self.VALID_NAME_REGEX.match(name) is None: 

98 raise ValueError(f"DatasetType name '{name}' is invalid.") 

99 self._name = name 

100 if not isinstance(dimensions, DimensionGraph): 

101 if universe is None: 

102 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

103 "a universe must be provided.") 

104 dimensions = universe.extract(dimensions) 

105 self._dimensions = dimensions 

106 assert isinstance(storageClass, (StorageClass, str)) 

107 if isinstance(storageClass, StorageClass): 

108 self._storageClass = storageClass 

109 self._storageClassName = storageClass.name 

110 else: 

111 self._storageClass = None 

112 self._storageClassName = storageClass 

113 

114 def __repr__(self): 

115 return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName) 

116 

117 def __eq__(self, other): 

118 if self._name != other._name: 

119 return False 

120 if self._dimensions != other._dimensions: 

121 return False 

122 if self._storageClass is not None and other._storageClass is not None: 

123 return self._storageClass == other._storageClass 

124 else: 

125 return self._storageClassName == other._storageClassName 

126 

127 def __hash__(self): 

128 """Hash DatasetType instance. 

129 

130 This only uses StorageClass name which is it consistent with the 

131 implementation of StorageClass hash method. 

132 """ 

133 return hash((self._name, self._dimensions, self._storageClassName)) 

134 

135 @property 

136 def name(self): 

137 """A string name for the Dataset; must correspond to the same 

138 `DatasetType` across all Registries. 

139 """ 

140 return self._name 

141 

142 @property 

143 def dimensions(self): 

144 r"""The `Dimension`\ s that label and relate instances of this 

145 `DatasetType` (`DimensionGraph`). 

146 """ 

147 return self._dimensions 

148 

149 @property 

150 def storageClass(self): 

151 """`StorageClass` instance that defines how this `DatasetType` 

152 is persisted. Note that if DatasetType was constructed with a name 

153 of a StorageClass then Butler has to be initialized before using 

154 this property. 

155 """ 

156 if self._storageClass is None: 

157 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

158 return self._storageClass 

159 

160 @staticmethod 

161 def splitDatasetTypeName(datasetTypeName): 

162 """Given a dataset type name, return the root name and the component 

163 name. 

164 

165 Parameters 

166 ---------- 

167 datasetTypeName : `str` 

168 The name of the dataset type, can include a component using 

169 a "."-separator. 

170 

171 Returns 

172 ------- 

173 rootName : `str` 

174 Root name without any components. 

175 componentName : `str` 

176 The component if it has been specified, else `None`. 

177 

178 Notes 

179 ----- 

180 If the dataset type name is ``a.b.c`` this method will return a 

181 root name of ``a`` and a component name of ``b.c``. 

182 """ 

183 comp = None 

184 root = datasetTypeName 

185 if "." in root: 

186 # If there is doubt, the component is after the first "." 

187 root, comp = root.split(".", maxsplit=1) 

188 return root, comp 

189 

190 def nameAndComponent(self): 

191 """Return the root name of this dataset type and the component 

192 name (if defined). 

193 

194 Returns 

195 ------- 

196 rootName : `str` 

197 Root name for this `DatasetType` without any components. 

198 componentName : `str` 

199 The component if it has been specified, else `None`. 

200 """ 

201 return self.splitDatasetTypeName(self.name) 

202 

203 def component(self): 

204 """Component name (if defined) 

205 

206 Returns 

207 ------- 

208 comp : `str` 

209 Name of component part of DatasetType name. `None` if this 

210 `DatasetType` is not associated with a component. 

211 """ 

212 _, comp = self.nameAndComponent() 

213 return comp 

214 

215 def componentTypeName(self, component): 

216 """Given a component name, derive the datasetTypeName of that component 

217 

218 Parameters 

219 ---------- 

220 component : `str` 

221 Name of component 

222 

223 Returns 

224 ------- 

225 derived : `str` 

226 Compound name of this `DatasetType` and the component. 

227 

228 Raises 

229 ------ 

230 KeyError 

231 Requested component is not supported by this `DatasetType`. 

232 """ 

233 if component in self.storageClass.components: 

234 return self.nameWithComponent(self.name, component) 

235 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

236 

237 def makeComponentDatasetType(self, component: str) -> DatasetType: 

238 """Return a DatasetType suitable for the given component, assuming the 

239 same dimensions as the parent. 

240 

241 Parameters 

242 ---------- 

243 component : `str` 

244 Name of component 

245 

246 Returns 

247 ------- 

248 datasetType : `DatasetType` 

249 A new DatasetType instance. 

250 """ 

251 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

252 storageClass=self.storageClass.components[component]) 

253 

254 def isComponent(self): 

255 """Boolean indicating whether this `DatasetType` refers to a 

256 component of a composite. 

257 

258 Returns 

259 ------- 

260 isComponent : `bool` 

261 `True` if this `DatasetType` is a component, `False` otherwise. 

262 """ 

263 if self.component(): 

264 return True 

265 return False 

266 

267 def isComposite(self): 

268 """Boolean indicating whether this `DatasetType` is a composite type. 

269 

270 Returns 

271 ------- 

272 isComposite : `bool` 

273 `True` if this `DatasetType` is a composite type, `False` 

274 otherwise. 

275 """ 

276 return self.storageClass.isComposite() 

277 

278 def _lookupNames(self): 

279 """Name keys to use when looking up this datasetType in a 

280 configuration. 

281 

282 The names are returned in order of priority. 

283 

284 Returns 

285 ------- 

286 names : `tuple` of `LookupKey` 

287 Tuple of the `DatasetType` name and the `StorageClass` name. 

288 If the name includes a component the name with the component 

289 is first, then the name without the component and finally 

290 the storage class name. 

291 """ 

292 rootName, componentName = self.nameAndComponent() 

293 lookups = (LookupKey(name=self.name),) 

294 if componentName is not None: 

295 lookups = lookups + (LookupKey(name=rootName),) 

296 

297 if self.dimensions: 

298 # Dimensions are a lower priority than dataset type name 

299 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

300 

301 return lookups + self.storageClass._lookupNames() 

302 

303 def __reduce__(self): 

304 """Support pickling. 

305 

306 StorageClass instances can not normally be pickled, so we pickle 

307 StorageClass name instead of instance. 

308 """ 

309 return (DatasetType, (self.name, self.dimensions, self._storageClassName)) 

310 

311 def __deepcopy__(self, memo): 

312 """Support for deep copy method. 

313 

314 Normally ``deepcopy`` will use pickle mechanism to make copies. 

315 We want to avoid that to support (possibly degenerate) use case when 

316 DatasetType is constructed with StorageClass instance which is not 

317 registered with StorageClassFactory (this happens in unit tests). 

318 Instead we re-implement ``__deepcopy__`` method. 

319 """ 

320 return DatasetType(name=deepcopy(self.name, memo), 

321 dimensions=deepcopy(self.dimensions, memo), 

322 storageClass=deepcopy(self._storageClass or self._storageClassName, memo))