Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ["AmbiguousDatasetError", "DatasetRef"] 

24 

25from typing import ( 

26 Any, 

27 Dict, 

28 Iterable, 

29 List, 

30 Optional, 

31 Tuple, 

32) 

33 

34from ..dimensions import DataCoordinate, DimensionGraph 

35from ..configSupport import LookupKey 

36from ..utils import immutable 

37from ..named import NamedKeyDict 

38from .type import DatasetType 

39 

40 

41class AmbiguousDatasetError(Exception): 

42 """Exception raised when a `DatasetRef` is not resolved (has no ID or run), 

43 but the requested operation requires one of them. 

44 """ 

45 

46 

47@immutable 

48class DatasetRef: 

49 """Reference to a Dataset in a `Registry`. 

50 

51 A `DatasetRef` may point to a Dataset that currently does not yet exist 

52 (e.g., because it is a predicted input for provenance). 

53 

54 Parameters 

55 ---------- 

56 datasetType : `DatasetType` 

57 The `DatasetType` for this Dataset. 

58 dataId : `DataCoordinate` 

59 A mapping of dimensions that labels the Dataset within a Collection. 

60 id : `int`, optional 

61 The unique integer identifier assigned when the dataset is created. 

62 run : `str`, optional 

63 The name of the run this dataset was associated with when it was 

64 created. Must be provided if ``id`` is. 

65 conform : `bool`, optional 

66 If `True` (default), call `DataCoordinate.standardize` to ensure that 

67 the data ID's dimensions are consistent with the dataset type's. 

68 `DatasetRef` instances for which those dimensions are not equal should 

69 not be created in new code, but are still supported for backwards 

70 compatibility. New code should only pass `False` if it can guarantee 

71 that the dimensions are already consistent. 

72 hasParentId : `bool`, optional 

73 If `True` this `DatasetRef` is a component that has the ``id`` 

74 of the composite parent. 

75 

76 Raises 

77 ------ 

78 ValueError 

79 Raised if ``run`` is provided but ``id`` is not, or if ``id`` is 

80 provided but ``run`` is not. 

81 """ 

82 

83 __slots__ = ("id", "datasetType", "dataId", "run", "hasParentId") 

84 

85 def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *, 

86 id: Optional[int] = None, 

87 run: Optional[str] = None, 

88 hasParentId: bool = False, 

89 conform: bool = True) -> DatasetRef: 

90 self = super().__new__(cls) 

91 assert isinstance(datasetType, DatasetType) 

92 self.id = id 

93 self.datasetType = datasetType 

94 self.hasParentId = hasParentId 

95 if conform: 

96 self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions) 

97 else: 

98 self.dataId = dataId 

99 if self.id is not None: 

100 if run is None: 

101 raise ValueError(f"Cannot provide id without run for dataset with id={id}, " 

102 f"type={datasetType}, and dataId={dataId}.") 

103 self.run = run 

104 else: 

105 if run is not None: 

106 raise ValueError("'run' cannot be provided unless 'id' is.") 

107 self.run = None 

108 return self 

109 

110 def __eq__(self, other: Any) -> bool: 

111 try: 

112 return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id) 

113 except AttributeError: 

114 return NotImplemented 

115 

116 def __hash__(self) -> int: 

117 return hash((self.datasetType, self.dataId, self.id)) 

118 

119 @property 

120 def dimensions(self) -> DimensionGraph: 

121 """The dimensions associated with the underlying `DatasetType` 

122 """ 

123 return self.datasetType.dimensions 

124 

125 def __repr__(self) -> str: 

126 # We delegate to __str__ (i.e use "!s") for the data ID) below because 

127 # DataCoordinate's __repr__ - while adhering to the guidelines for 

128 # __repr__ - is much harder to users to read, while its __str__ just 

129 # produces a dict that can also be passed to DatasetRef's constructor. 

130 if self.id is not None: 

131 return (f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})") 

132 else: 

133 return f"DatasetRef({self.datasetType!r}, {self.dataId!s})" 

134 

135 def __str__(self) -> str: 

136 s = f"{self.datasetType.name}@{self.dataId!s}" 

137 if self.id is not None: 

138 s += f" (id={self.id})" 

139 return s 

140 

141 def __getnewargs_ex__(self) -> Tuple[Tuple[Any, ...], Dict[str, Any]]: 

142 return ((self.datasetType, self.dataId), {"id": self.id, "run": self.run}) 

143 

144 def resolved(self, id: int, run: str) -> DatasetRef: 

145 """Return a new `DatasetRef` with the same data ID and dataset type 

146 and the given ID and run. 

147 

148 Parameters 

149 ---------- 

150 id : `int` 

151 The unique integer identifier assigned when the dataset is created. 

152 run : `str` 

153 The run this dataset was associated with when it was created. 

154 

155 Returns 

156 ------- 

157 ref : `DatasetRef` 

158 A new `DatasetRef`. 

159 """ 

160 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, 

161 id=id, run=run, conform=False) 

162 

163 def unresolved(self) -> DatasetRef: 

164 """Return a new `DatasetRef` with the same data ID and dataset type, 

165 but no ID or run. 

166 

167 Returns 

168 ------- 

169 ref : `DatasetRef` 

170 A new `DatasetRef`. 

171 

172 Notes 

173 ----- 

174 This can be used to compare only the data ID and dataset type of a 

175 pair of `DatasetRef` instances, regardless of whether either is 

176 resolved:: 

177 

178 if ref1.unresolved() == ref2.unresolved(): 

179 ... 

180 """ 

181 return DatasetRef(datasetType=self.datasetType, dataId=self.dataId, conform=False) 

182 

183 def expanded(self, dataId: DataCoordinate) -> DatasetRef: 

184 """Return a new `DatasetRef` with the given expanded data ID. 

185 

186 Parameters 

187 ---------- 

188 dataId : `DataCoordinate` 

189 Data ID for the new `DatasetRef`. Must compare equal to the 

190 original data ID. 

191 

192 Returns 

193 ------- 

194 ref : `DatasetRef` 

195 A new `DatasetRef` with the given data ID. 

196 """ 

197 assert dataId == self.dataId 

198 return DatasetRef(datasetType=self.datasetType, dataId=dataId, 

199 id=self.id, run=self.run, 

200 conform=False) 

201 

202 def isComponent(self) -> bool: 

203 """Boolean indicating whether this `DatasetRef` refers to a 

204 component of a composite. 

205 

206 Returns 

207 ------- 

208 isComponent : `bool` 

209 `True` if this `DatasetRef` is a component, `False` otherwise. 

210 """ 

211 return self.datasetType.isComponent() 

212 

213 def isComposite(self) -> bool: 

214 """Boolean indicating whether this `DatasetRef` is a composite type. 

215 

216 Returns 

217 ------- 

218 isComposite : `bool` 

219 `True` if this `DatasetRef` is a composite type, `False` 

220 otherwise. 

221 """ 

222 return self.datasetType.isComposite() 

223 

224 def _lookupNames(self) -> Tuple[LookupKey, ...]: 

225 """Name keys to use when looking up this DatasetRef in a configuration. 

226 

227 The names are returned in order of priority. 

228 

229 Returns 

230 ------- 

231 names : `tuple` of `LookupKey` 

232 Tuple of the `DatasetType` name and the `StorageClass` name. 

233 If ``instrument`` is defined in the dataId, each of those names 

234 is added to the start of the tuple with a key derived from the 

235 value of ``instrument``. 

236 """ 

237 # Special case the instrument Dimension since we allow configs 

238 # to include the instrument name in the hierarchy. 

239 names: Tuple[LookupKey, ...] = self.datasetType._lookupNames() 

240 

241 # mypy doesn't think this could return True, because even though 

242 # __contains__ can take an object of any type, it seems hard-coded to 

243 # assume it will return False if the type doesn't match the key type 

244 # of the Mapping. 

245 if "instrument" in self.dataId: # type: ignore 

246 names = tuple(n.clone(dataId={"instrument": self.dataId["instrument"]}) 

247 for n in names) + names 

248 

249 return names 

250 

251 @staticmethod 

252 def groupByType(refs: Iterable[DatasetRef]) -> NamedKeyDict[DatasetType, List[DatasetRef]]: 

253 """Group an iterable of `DatasetRef` by `DatasetType`. 

254 

255 Parameters 

256 ---------- 

257 refs : `Iterable` [ `DatasetRef` ] 

258 `DatasetRef` instances to group. 

259 

260 Returns 

261 ------- 

262 grouped : `NamedKeyDict` [ `DatasetType`, `list` [ `DatasetRef` ] ] 

263 Grouped `DatasetRef` instances. 

264 """ 

265 result: NamedKeyDict[DatasetType, List[DatasetRef]] = NamedKeyDict() 

266 for ref in refs: 

267 result.setdefault(ref.datasetType, []).append(ref) 

268 return result 

269 

270 def getCheckedId(self) -> int: 

271 """Return ``self.id``, or raise if it is `None`. 

272 

273 This trivial method exists to allow operations that would otherwise be 

274 natural list comprehensions to check that the ID is not `None` as well. 

275 

276 Returns 

277 ------- 

278 id : `int` 

279 ``self.id`` if it is not `None`. 

280 

281 Raises 

282 ------ 

283 AmbiguousDatasetError 

284 Raised if ``ref.id`` is `None`. 

285 """ 

286 if self.id is None: 

287 raise AmbiguousDatasetError(f"ID for dataset {self} is `None`; " 

288 f"a resolved reference is required.") 

289 return self.id 

290 

291 def makeComponentRef(self, name: str) -> DatasetRef: 

292 """Create a `DatasetRef` that corresponds to a component of this 

293 dataset. 

294 

295 Parameters 

296 ---------- 

297 name : `str` 

298 Name of the component. 

299 

300 Returns 

301 ------- 

302 ref : `DatasetRef` 

303 A `DatasetRef` with a dataset type that corresponds to the given 

304 component, with ``hasParentId=True``, and the same ID and run 

305 (which may be `None`, if they are `None` in ``self``). 

306 """ 

307 return DatasetRef(self.datasetType.makeComponentDatasetType(name), self.dataId, 

308 id=self.id, run=self.run, hasParentId=True) 

309 

310 datasetType: DatasetType 

311 """The definition of this dataset (`DatasetType`). 

312 

313 Cannot be changed after a `DatasetRef` is constructed. 

314 """ 

315 

316 dataId: DataCoordinate 

317 """A mapping of `Dimension` primary key values that labels the dataset 

318 within a Collection (`DataCoordinate`). 

319 

320 Cannot be changed after a `DatasetRef` is constructed. 

321 """ 

322 

323 run: Optional[str] 

324 """The name of the run that produced the dataset. 

325 

326 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or 

327 `unresolved` to add or remove this information when creating a new 

328 `DatasetRef`. 

329 """ 

330 

331 id: Optional[int] 

332 """Primary key of the dataset (`int` or `None`). 

333 

334 Cannot be changed after a `DatasetRef` is constructed; use `resolved` or 

335 `unresolved` to add or remove this information when creating a new 

336 `DatasetRef`. 

337 """