Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["DatasetType"] 

25 

26from copy import deepcopy 

27import re 

28 

29from types import MappingProxyType 

30from ..storageClass import StorageClass, StorageClassFactory 

31from ..dimensions import DimensionGraph 

32from ..configSupport import LookupKey 

33 

34 

35def _safeMakeMappingProxyType(data): 

36 if data is None: 

37 data = {} 

38 return MappingProxyType(data) 

39 

40 

41class DatasetType: 

42 r"""A named category of Datasets that defines how they are organized, 

43 related, and stored. 

44 

45 A concrete, final class whose instances represent `DatasetType`\ s. 

46 `DatasetType` instances may be constructed without a `Registry`, 

47 but they must be registered 

48 via `Registry.registerDatasetType()` before corresponding Datasets 

49 may be added. 

50 `DatasetType` instances are immutable. 

51 

52 Parameters 

53 ---------- 

54 name : `str` 

55 A string name for the Dataset; must correspond to the same 

56 `DatasetType` across all Registries. Names must start with an 

57 upper or lowercase letter, and may contain only letters, numbers, 

58 and underscores. Component dataset types should contain a single 

59 period separating the base dataset type name from the component name 

60 (and may be recursive). 

61 dimensions : `DimensionGraph` or iterable of `Dimension` 

62 Dimensions used to label and relate instances of this `DatasetType`. 

63 If not a `DimensionGraph`, ``universe`` must be provided as well. 

64 storageClass : `StorageClass` or `str` 

65 Instance of a `StorageClass` or name of `StorageClass` that defines 

66 how this `DatasetType` is persisted. 

67 universe : `DimensionUniverse`, optional 

68 Set of all known dimensions, used to normalize ``dimensions`` if it 

69 is not already a `DimensionGraph`. 

70 """ 

71 

72 __slots__ = ("_name", "_dimensions", "_storageClass", "_storageClassName") 

73 

74 VALID_NAME_REGEX = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(\\.[a-zA-Z][a-zA-Z0-9_]*)*$") 

75 

76 @staticmethod 

77 def nameWithComponent(datasetTypeName, componentName): 

78 """Form a valid DatasetTypeName from a parent and component. 

79 

80 No validation is performed. 

81 

82 Parameters 

83 ---------- 

84 datasetTypeName : `str` 

85 Base type name. 

86 componentName : `str` 

87 Name of component. 

88 

89 Returns 

90 ------- 

91 compTypeName : `str` 

92 Name to use for component DatasetType. 

93 """ 

94 return "{}.{}".format(datasetTypeName, componentName) 

95 

96 def __init__(self, name, dimensions, storageClass, *, universe=None): 

97 if self.VALID_NAME_REGEX.match(name) is None: 

98 raise ValueError(f"DatasetType name '{name}' is invalid.") 

99 self._name = name 

100 if not isinstance(dimensions, DimensionGraph): 

101 if universe is None: 

102 raise ValueError("If dimensions is not a normalized DimensionGraph, " 

103 "a universe must be provided.") 

104 dimensions = universe.extract(dimensions) 

105 self._dimensions = dimensions 

106 assert isinstance(storageClass, (StorageClass, str)) 

107 if isinstance(storageClass, StorageClass): 

108 self._storageClass = storageClass 

109 self._storageClassName = storageClass.name 

110 else: 

111 self._storageClass = None 

112 self._storageClassName = storageClass 

113 

114 def __repr__(self): 

115 return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName) 

116 

117 def __eq__(self, other): 

118 if not isinstance(other, type(self)): 

119 return False 

120 if self._name != other._name: 

121 return False 

122 if self._dimensions != other._dimensions: 

123 return False 

124 if self._storageClass is not None and other._storageClass is not None: 

125 return self._storageClass == other._storageClass 

126 else: 

127 return self._storageClassName == other._storageClassName 

128 

129 def __hash__(self): 

130 """Hash DatasetType instance. 

131 

132 This only uses StorageClass name which is it consistent with the 

133 implementation of StorageClass hash method. 

134 """ 

135 return hash((self._name, self._dimensions, self._storageClassName)) 

136 

137 @property 

138 def name(self): 

139 """A string name for the Dataset; must correspond to the same 

140 `DatasetType` across all Registries. 

141 """ 

142 return self._name 

143 

144 @property 

145 def dimensions(self): 

146 r"""The `Dimension`\ s that label and relate instances of this 

147 `DatasetType` (`DimensionGraph`). 

148 """ 

149 return self._dimensions 

150 

151 @property 

152 def storageClass(self): 

153 """`StorageClass` instance that defines how this `DatasetType` 

154 is persisted. Note that if DatasetType was constructed with a name 

155 of a StorageClass then Butler has to be initialized before using 

156 this property. 

157 """ 

158 if self._storageClass is None: 

159 self._storageClass = StorageClassFactory().getStorageClass(self._storageClassName) 

160 return self._storageClass 

161 

162 @staticmethod 

163 def splitDatasetTypeName(datasetTypeName): 

164 """Given a dataset type name, return the root name and the component 

165 name. 

166 

167 Parameters 

168 ---------- 

169 datasetTypeName : `str` 

170 The name of the dataset type, can include a component using 

171 a "."-separator. 

172 

173 Returns 

174 ------- 

175 rootName : `str` 

176 Root name without any components. 

177 componentName : `str` 

178 The component if it has been specified, else `None`. 

179 

180 Notes 

181 ----- 

182 If the dataset type name is ``a.b.c`` this method will return a 

183 root name of ``a`` and a component name of ``b.c``. 

184 """ 

185 comp = None 

186 root = datasetTypeName 

187 if "." in root: 

188 # If there is doubt, the component is after the first "." 

189 root, comp = root.split(".", maxsplit=1) 

190 return root, comp 

191 

192 def nameAndComponent(self): 

193 """Return the root name of this dataset type and the component 

194 name (if defined). 

195 

196 Returns 

197 ------- 

198 rootName : `str` 

199 Root name for this `DatasetType` without any components. 

200 componentName : `str` 

201 The component if it has been specified, else `None`. 

202 """ 

203 return self.splitDatasetTypeName(self.name) 

204 

205 def component(self): 

206 """Component name (if defined) 

207 

208 Returns 

209 ------- 

210 comp : `str` 

211 Name of component part of DatasetType name. `None` if this 

212 `DatasetType` is not associated with a component. 

213 """ 

214 _, comp = self.nameAndComponent() 

215 return comp 

216 

217 def componentTypeName(self, component): 

218 """Given a component name, derive the datasetTypeName of that component 

219 

220 Parameters 

221 ---------- 

222 component : `str` 

223 Name of component 

224 

225 Returns 

226 ------- 

227 derived : `str` 

228 Compound name of this `DatasetType` and the component. 

229 

230 Raises 

231 ------ 

232 KeyError 

233 Requested component is not supported by this `DatasetType`. 

234 """ 

235 if component in self.storageClass.components: 

236 return self.nameWithComponent(self.name, component) 

237 raise KeyError("Requested component ({}) not understood by this DatasetType".format(component)) 

238 

239 def makeComponentDatasetType(self, component: str) -> DatasetType: 

240 """Return a DatasetType suitable for the given component, assuming the 

241 same dimensions as the parent. 

242 

243 Parameters 

244 ---------- 

245 component : `str` 

246 Name of component 

247 

248 Returns 

249 ------- 

250 datasetType : `DatasetType` 

251 A new DatasetType instance. 

252 """ 

253 return DatasetType(self.componentTypeName(component), dimensions=self.dimensions, 

254 storageClass=self.storageClass.components[component]) 

255 

256 def isComponent(self): 

257 """Boolean indicating whether this `DatasetType` refers to a 

258 component of a composite. 

259 

260 Returns 

261 ------- 

262 isComponent : `bool` 

263 `True` if this `DatasetType` is a component, `False` otherwise. 

264 """ 

265 if self.component(): 

266 return True 

267 return False 

268 

269 def isComposite(self): 

270 """Boolean indicating whether this `DatasetType` is a composite type. 

271 

272 Returns 

273 ------- 

274 isComposite : `bool` 

275 `True` if this `DatasetType` is a composite type, `False` 

276 otherwise. 

277 """ 

278 return self.storageClass.isComposite() 

279 

280 def _lookupNames(self): 

281 """Name keys to use when looking up this datasetType in a 

282 configuration. 

283 

284 The names are returned in order of priority. 

285 

286 Returns 

287 ------- 

288 names : `tuple` of `LookupKey` 

289 Tuple of the `DatasetType` name and the `StorageClass` name. 

290 If the name includes a component the name with the component 

291 is first, then the name without the component and finally 

292 the storage class name. 

293 """ 

294 rootName, componentName = self.nameAndComponent() 

295 lookups = (LookupKey(name=self.name),) 

296 if componentName is not None: 

297 lookups = lookups + (LookupKey(name=rootName),) 

298 

299 if self.dimensions: 

300 # Dimensions are a lower priority than dataset type name 

301 lookups = lookups + (LookupKey(dimensions=self.dimensions),) 

302 

303 return lookups + self.storageClass._lookupNames() 

304 

305 def __reduce__(self): 

306 """Support pickling. 

307 

308 StorageClass instances can not normally be pickled, so we pickle 

309 StorageClass name instead of instance. 

310 """ 

311 return (DatasetType, (self.name, self.dimensions, self._storageClassName)) 

312 

313 def __deepcopy__(self, memo): 

314 """Support for deep copy method. 

315 

316 Normally ``deepcopy`` will use pickle mechanism to make copies. 

317 We want to avoid that to support (possibly degenerate) use case when 

318 DatasetType is constructed with StorageClass instance which is not 

319 registered with StorageClassFactory (this happens in unit tests). 

320 Instead we re-implement ``__deepcopy__`` method. 

321 """ 

322 return DatasetType(name=deepcopy(self.name, memo), 

323 dimensions=deepcopy(self.dimensions, memo), 

324 storageClass=deepcopy(self._storageClass or self._storageClassName, memo))