Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for configuration snippets""" 

25 

26__all__ = ("LookupKey", "processLookupConfigs", 

27 "processLookupConfigList") 

28 

29import logging 

30import re 

31from collections.abc import Mapping 

32 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Dict, 

37 Iterable, 

38 Optional, 

39 Set, 

40 Union, 

41) 

42 

43from .dimensions import DimensionGraph 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from .dimensions import DimensionUniverse, Dimension 

47 from .config import Config 

48 

49log = logging.getLogger(__name__) 

50 

51DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

52"""Regex to find dataIds embedded in configurations.""" 

53 

54 

55class LookupKey: 

56 """Representation of key that can be used to lookup information. 

57 

58 Look up is based on dataset type name, storage class name, dimensions. 

59 

60 Parameters 

61 ---------- 

62 name : `str`, optional 

63 Primary index string for lookup. If this string looks like it 

64 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

65 is converted to a `DimensionGraph` and stored in ``dimensions`` 

66 property. 

67 dimensions : `DimensionGraph`, optional 

68 Dimensions that are relevant for lookup. Should not be specified 

69 if ``name`` is also specified. 

70 dataId : `dict`, optional 

71 Keys and values from a dataId that should control lookups. 

72 universe : `DimensionUniverse`, optional 

73 Set of all known dimensions, used to expand and validate ``name`` or 

74 ``dimensions``. Required if the key represents dimensions and a 

75 full `DimensionGraph` is not provided. 

76 """ 

77 

78 def __init__(self, name: Optional[str] = None, 

79 dimensions: Optional[Iterable[Union[str, Dimension]]] = None, 

80 dataId: Optional[Dict[str, Any]] = None, *, universe: Optional[DimensionUniverse] = None): 

81 if name is None and dimensions is None: 81 ↛ 82line 81 didn't jump to line 82, because the condition on line 81 was never true

82 raise ValueError("At least one of name or dimensions must be given") 

83 

84 if name is not None and dimensions is not None: 84 ↛ 85line 84 didn't jump to line 85, because the condition on line 84 was never true

85 raise ValueError("Can only accept one of name or dimensions") 

86 

87 self._dimensions = None 

88 self._name = None 

89 

90 if name is not None: 90 ↛ 106line 90 didn't jump to line 106, because the condition on line 90 was never false

91 

92 if not isinstance(name, str): 92 ↛ 93line 92 didn't jump to line 93, because the condition on line 92 was never true

93 raise ValueError(f"Supplied name must be str not: '{name}'") 

94 

95 if "+" in name: 95 ↛ 98line 95 didn't jump to line 98, because the condition on line 95 was never true

96 # If we are given a single dimension we use the "+" to 

97 # indicate this but have to filter out the empty value 

98 dimension_names = [n for n in name.split("+") if n] 

99 if universe is None: 

100 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

101 else: 

102 self._dimensions = universe.extract(dimension_names) 

103 else: 

104 self._name = name 

105 

106 elif dimensions is not None: 

107 if not isinstance(dimensions, DimensionGraph): 

108 if universe is None: 

109 raise ValueError(f"Cannot construct LookupKey for dimensions={dimensions} " 

110 "without universe.") 

111 else: 

112 self._dimensions = universe.extract(dimensions) 

113 else: 

114 self._dimensions = dimensions 

115 else: 

116 # mypy cannot work this out on its own 

117 raise ValueError("Name was None but dimensions is also None") 

118 

119 # The dataId is converted to a frozenset of key/value 

120 # tuples so that it is not mutable 

121 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

122 

123 def __str__(self) -> str: 

124 # For the simple case return the simple string 

125 if self._name: 

126 name = self._name 

127 elif self._dimensions is not None: 

128 name = "+".join(self._dimensions.names) 

129 else: 

130 raise RuntimeError("Internal error since name and dimensions are both None") 

131 

132 if not self._dataId: 

133 return name 

134 

135 return f"{name} ({self.dataId})" 

136 

137 def __repr__(self) -> str: 

138 params = "" 

139 if self.name: 

140 params += f"name={self.name!r}," 

141 if self.dimensions: 

142 params += f"dimensions={self.dimensions!r}," 

143 if self._dataId: 

144 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

145 

146 return f"{self.__class__.__name__}({params})" 

147 

148 def __eq__(self, other: Any) -> bool: 

149 if not isinstance(other, type(self)): 

150 return False 

151 if self._name == other._name and self._dimensions == other._dimensions and \ 

152 self._dataId == other._dataId: 

153 return True 

154 return False 

155 

156 @property 

157 def name(self) -> Optional[str]: 

158 """Primary name string to use as lookup (`str`).""" 

159 return self._name 

160 

161 @property 

162 def dimensions(self) -> Optional[DimensionGraph]: 

163 """Dimensions associated with lookup (`DimensionGraph`).""" 

164 return self._dimensions 

165 

166 @property 

167 def dataId(self) -> Optional[Dict[str, Any]]: 

168 """Return dict of keys/values that are important for dataId lookup. 

169 

170 (`dict` or `None`) 

171 """ 

172 if self._dataId is not None: 

173 return {k: v for k, v in self._dataId} 

174 else: 

175 return None 

176 

177 def __hash__(self) -> int: 

178 """Hash the lookup to allow use as a key in a dict.""" 

179 return hash((self._name, self._dimensions, self._dataId)) 

180 

181 def clone(self, name: Optional[str] = None, dimensions: Optional[DimensionGraph] = None, 

182 dataId: Optional[Dict[str, Any]] = None) -> LookupKey: 

183 """Clone the object, overriding some options. 

184 

185 Used to create a new instance of the object whilst updating 

186 some of it. 

187 

188 Parameters 

189 ---------- 

190 name : `str`, optional 

191 Primary index string for lookup. Will override ``dimensions`` 

192 if ``dimensions`` are set. 

193 dimensions : `DimensionGraph`, optional 

194 Dimensions that are relevant for lookup. Will override ``name`` 

195 if ``name`` is already set. 

196 dataId : `dict`, optional 

197 Keys and values from a dataId that should control lookups. 

198 

199 Returns 

200 ------- 

201 clone : `LookupKey` 

202 Copy with updates. 

203 """ 

204 if name is not None and dimensions is not None: 

205 raise ValueError("Both name and dimensions can not be set") 

206 

207 # if neither name nor dimensions are specified we copy from current 

208 # object. Otherwise we'll use the supplied values 

209 if name is None and dimensions is None: 

210 name = self._name 

211 dimensions = self._dimensions 

212 

213 # Make sure we use the dict form for the constructor 

214 if dataId is None and self._dataId is not None: 

215 dataId = self.dataId 

216 

217 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

218 

219 

220def processLookupConfigs(config: Config, *, 

221 allow_hierarchy: bool = False, 

222 universe: Optional[DimensionUniverse] = None) -> Dict[LookupKey, 

223 Union[str, Dict[str, Any]]]: 

224 """Process sections of configuration relating to lookups. 

225 

226 Can be by dataset type name, storage class name, dimensions, or values 

227 of dimensions. 

228 

229 Parameters 

230 ---------- 

231 config : `Config` 

232 A `Config` representing a configuration mapping keys to values where 

233 the keys can be dataset type names, storage class names, dimensions 

234 or dataId components. 

235 allow_hierarchy : `bool`, optional 

236 If `True`, keys that refer to a hierarchy that does not look like 

237 a DataID specification are allowed and the full hierarchy, as a dict, 

238 will be returned in the value for the lookup key. 

239 universe : `DimensionUniverse`, optional 

240 Set of all known dimensions, used to expand and validate any used 

241 in lookup keys. 

242 

243 Returns 

244 ------- 

245 contents : `dict` of `LookupKey` to `str` 

246 A `dict` with keys constructed from the configuration keys and values 

247 being simple strings. It is assumed the caller will convert the 

248 values to the required form. 

249 

250 Notes 

251 ----- 

252 The configuration is a mapping where the keys correspond to names 

253 that can refer to dataset type or storage class names, or can use a 

254 special syntax to refer to dimensions or dataId values. 

255 

256 Dimensions are indicated by using dimension names separated by a ``+``. 

257 If a single dimension is specified this is also supported so long as 

258 a ``+`` is found. Dimensions are normalized before use such that if 

259 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

260 will automatically be added. 

261 

262 DataID overrides can be specified using the form: ``field<value>`` to 

263 indicate a subhierarchy. All keys within that new hierarchy will take 

264 precedence over equivalent values in the root hierarchy. 

265 

266 Currently only a single dataId field can be specified for a key. 

267 For example with a config such as: 

268 

269 .. code-block:: yaml 

270 

271 something: 

272 calexp: value1 

273 instrument<HSC>: 

274 calexp: value2 

275 

276 Requesting the match for ``calexp`` would return ``value1`` unless 

277 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

278 and value ``HSC``. 

279 

280 The values of the mapping are stored as strings. 

281 """ 

282 contents = {} 

283 for name, value in config.items(): 

284 lookup = LookupKey(name=name, universe=universe) 

285 

286 if isinstance(value, Mapping): 

287 # indicates a dataId component -- check the format 

288 kv = DATAID_RE.match(name) 

289 if kv: 

290 dataIdKey = kv.group(1) 

291 dataIdValue = kv.group(2) 

292 for subKey, subStr in value.items(): 

293 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

294 contents[lookup] = subStr 

295 elif allow_hierarchy: 

296 contents[lookup] = value 

297 else: 

298 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

299 else: 

300 contents[lookup] = value 

301 

302 return contents 

303 

304 

305def processLookupConfigList(config: Iterable[Union[str, Mapping]], 

306 *, universe: Optional[DimensionUniverse] = None) -> Set[LookupKey]: 

307 """Process sections of configuration relating to lookups. 

308 

309 Can be by dataset type name, storage class name, dimensions, or values 

310 of dimensions. 

311 

312 Parameters 

313 ---------- 

314 config : `list` of `str` or `dict` 

315 Contents of a configuration listing keys that can be 

316 dataset type names, storage class names, dimensions 

317 or dataId components. DataId components are represented as entries 

318 in the `list` of `dicts` with a single key with a value of a `list` 

319 of new keys. 

320 universe : `DimensionUniverse`, optional 

321 Set of all known dimensions, used to expand and validate any used 

322 in lookup keys. 

323 

324 Returns 

325 ------- 

326 lookups : `set` of `LookupKey` 

327 All the entries in the input list converted to `LookupKey` and 

328 returned in a `set`. 

329 

330 Notes 

331 ----- 

332 Keys are parsed as described in `processLookupConfigs`. 

333 """ 

334 contents = set() 

335 

336 for name in config: 

337 if isinstance(name, Mapping): 

338 if len(name) != 1: 

339 raise RuntimeError(f"Config dict entry {name} has more than key present") 

340 for dataIdLookUp, subKeys in name.items(): 

341 kv = DATAID_RE.match(dataIdLookUp) 

342 if kv: 

343 dataIdKey = kv.group(1) 

344 dataIdValue = kv.group(2) 

345 for subKey in subKeys: 

346 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

347 contents.add(lookup) 

348 else: 

349 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

350 else: 

351 contents.add(LookupKey(name=name, universe=universe)) 

352 

353 return contents