Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for configuration snippets""" 

25 

26__all__ = ("LookupKey", "processLookupConfigs", 

27 "processLookupConfigList") 

28 

29import logging 

30import re 

31from collections.abc import Mapping 

32 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Dict, 

37 Iterable, 

38 Optional, 

39 Set, 

40 Union, 

41) 

42 

43from .dimensions import DimensionGraph 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from .dimensions import DimensionUniverse, Dimension 

47 from .config import Config 

48 

49log = logging.getLogger(__name__) 

50 

51DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

52"""Regex to find dataIds embedded in configurations.""" 

53 

54 

55class LookupKey: 

56 """Representation of key that can be used to lookup information. 

57 

58 Look up is based on dataset type name, storage class name, dimensions. 

59 

60 Parameters 

61 ---------- 

62 name : `str`, optional 

63 Primary index string for lookup. If this string looks like it 

64 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

65 is converted to a `DimensionGraph` and stored in ``dimensions`` 

66 property. 

67 dimensions : `DimensionGraph`, optional 

68 Dimensions that are relevant for lookup. Should not be specified 

69 if ``name`` is also specified. 

70 dataId : `dict`, optional 

71 Keys and values from a dataId that should control lookups. 

72 universe : `DimensionUniverse`, optional 

73 Set of all known dimensions, used to expand and validate ``name`` or 

74 ``dimensions``. Required if the key represents dimensions and a 

75 full `DimensionGraph` is not provided. 

76 """ 

77 

78 def __init__(self, name: Optional[str] = None, 

79 dimensions: Optional[Iterable[Union[str, Dimension]]] = None, 

80 dataId: Optional[Dict[str, Any]] = None, *, universe: Optional[DimensionUniverse] = None): 

81 if name is None and dimensions is None: 81 ↛ 82line 81 didn't jump to line 82, because the condition on line 81 was never true

82 raise ValueError("At least one of name or dimensions must be given") 

83 

84 if name is not None and dimensions is not None: 84 ↛ 85line 84 didn't jump to line 85, because the condition on line 84 was never true

85 raise ValueError("Can only accept one of name or dimensions") 

86 

87 self._dimensions = None 

88 self._name = None 

89 

90 if name is not None: 90 ↛ 121line 90 didn't jump to line 121, because the condition on line 90 was never false

91 

92 if not isinstance(name, str): 92 ↛ 93line 92 didn't jump to line 93, because the condition on line 92 was never true

93 raise ValueError(f"Supplied name must be str not: '{name}'") 

94 

95 if "+" in name: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 if universe is None: 

97 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

98 

99 # If we are given a single dimension we use the "+" to 

100 # indicate this but have to filter out the empty value 

101 dimension_names = [n for n in name.split("+") if n] 

102 try: 

103 self._dimensions = universe.extract(dimension_names) 

104 except KeyError: 

105 # One or more of the dimensions is not known to the 

106 # universe. This could be a typo or it could be that 

107 # a config is being used that is not compatible with 

108 # this universe. Use the name directly as a lookup key 

109 # but issue a warning. This will be potentially annoying 

110 # in the scenario where a lookup key comes from a 

111 # default config but the users are using an external 

112 # universe. 

113 unknown = [name for name in dimension_names if universe.get(name) is None] 

114 log.warning("A LookupKey '%s' uses unknown dimensions: %s. Possible typo?" 

115 " Using the name explicitly.", 

116 name, unknown) 

117 self._name = name 

118 else: 

119 self._name = name 

120 

121 elif dimensions is not None: 

122 if not isinstance(dimensions, DimensionGraph): 

123 if universe is None: 

124 raise ValueError(f"Cannot construct LookupKey for dimensions={dimensions} " 

125 "without universe.") 

126 else: 

127 self._dimensions = universe.extract(dimensions) 

128 else: 

129 self._dimensions = dimensions 

130 else: 

131 # mypy cannot work this out on its own 

132 raise ValueError("Name was None but dimensions is also None") 

133 

134 # The dataId is converted to a frozenset of key/value 

135 # tuples so that it is not mutable 

136 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

137 

138 def __str__(self) -> str: 

139 # For the simple case return the simple string 

140 if self._name: 

141 name = self._name 

142 elif self._dimensions is not None: 

143 name = "+".join(self._dimensions.names) 

144 else: 

145 raise RuntimeError("Internal error since name and dimensions are both None") 

146 

147 if not self._dataId: 

148 return name 

149 

150 return f"{name} ({self.dataId})" 

151 

152 def __repr__(self) -> str: 

153 params = "" 

154 if self.name: 

155 params += f"name={self.name!r}," 

156 if self.dimensions: 

157 params += f"dimensions={self.dimensions!r}," 

158 if self._dataId: 

159 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

160 

161 return f"{self.__class__.__name__}({params})" 

162 

163 def __eq__(self, other: Any) -> bool: 

164 if not isinstance(other, type(self)): 

165 return False 

166 if self._name == other._name and self._dimensions == other._dimensions and \ 

167 self._dataId == other._dataId: 

168 return True 

169 return False 

170 

171 @property 

172 def name(self) -> Optional[str]: 

173 """Primary name string to use as lookup (`str`).""" 

174 return self._name 

175 

176 @property 

177 def dimensions(self) -> Optional[DimensionGraph]: 

178 """Dimensions associated with lookup (`DimensionGraph`).""" 

179 return self._dimensions 

180 

181 @property 

182 def dataId(self) -> Optional[Dict[str, Any]]: 

183 """Return dict of keys/values that are important for dataId lookup. 

184 

185 (`dict` or `None`) 

186 """ 

187 if self._dataId is not None: 

188 return {k: v for k, v in self._dataId} 

189 else: 

190 return None 

191 

192 def __hash__(self) -> int: 

193 """Hash the lookup to allow use as a key in a dict.""" 

194 return hash((self._name, self._dimensions, self._dataId)) 

195 

196 def clone(self, name: Optional[str] = None, dimensions: Optional[DimensionGraph] = None, 

197 dataId: Optional[Dict[str, Any]] = None) -> LookupKey: 

198 """Clone the object, overriding some options. 

199 

200 Used to create a new instance of the object whilst updating 

201 some of it. 

202 

203 Parameters 

204 ---------- 

205 name : `str`, optional 

206 Primary index string for lookup. Will override ``dimensions`` 

207 if ``dimensions`` are set. 

208 dimensions : `DimensionGraph`, optional 

209 Dimensions that are relevant for lookup. Will override ``name`` 

210 if ``name`` is already set. 

211 dataId : `dict`, optional 

212 Keys and values from a dataId that should control lookups. 

213 

214 Returns 

215 ------- 

216 clone : `LookupKey` 

217 Copy with updates. 

218 """ 

219 if name is not None and dimensions is not None: 

220 raise ValueError("Both name and dimensions can not be set") 

221 

222 # if neither name nor dimensions are specified we copy from current 

223 # object. Otherwise we'll use the supplied values 

224 if name is None and dimensions is None: 

225 name = self._name 

226 dimensions = self._dimensions 

227 

228 # Make sure we use the dict form for the constructor 

229 if dataId is None and self._dataId is not None: 

230 dataId = self.dataId 

231 

232 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

233 

234 

235def processLookupConfigs(config: Config, *, 

236 allow_hierarchy: bool = False, 

237 universe: Optional[DimensionUniverse] = None) -> Dict[LookupKey, 

238 Union[str, Dict[str, Any]]]: 

239 """Process sections of configuration relating to lookups. 

240 

241 Can be by dataset type name, storage class name, dimensions, or values 

242 of dimensions. 

243 

244 Parameters 

245 ---------- 

246 config : `Config` 

247 A `Config` representing a configuration mapping keys to values where 

248 the keys can be dataset type names, storage class names, dimensions 

249 or dataId components. 

250 allow_hierarchy : `bool`, optional 

251 If `True`, keys that refer to a hierarchy that does not look like 

252 a DataID specification are allowed and the full hierarchy, as a dict, 

253 will be returned in the value for the lookup key. 

254 universe : `DimensionUniverse`, optional 

255 Set of all known dimensions, used to expand and validate any used 

256 in lookup keys. 

257 

258 Returns 

259 ------- 

260 contents : `dict` of `LookupKey` to `str` 

261 A `dict` with keys constructed from the configuration keys and values 

262 being simple strings. It is assumed the caller will convert the 

263 values to the required form. 

264 

265 Notes 

266 ----- 

267 The configuration is a mapping where the keys correspond to names 

268 that can refer to dataset type or storage class names, or can use a 

269 special syntax to refer to dimensions or dataId values. 

270 

271 Dimensions are indicated by using dimension names separated by a ``+``. 

272 If a single dimension is specified this is also supported so long as 

273 a ``+`` is found. Dimensions are normalized before use such that if 

274 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

275 will automatically be added. 

276 

277 DataID overrides can be specified using the form: ``field<value>`` to 

278 indicate a subhierarchy. All keys within that new hierarchy will take 

279 precedence over equivalent values in the root hierarchy. 

280 

281 Currently only a single dataId field can be specified for a key. 

282 For example with a config such as: 

283 

284 .. code-block:: yaml 

285 

286 something: 

287 calexp: value1 

288 instrument<HSC>: 

289 calexp: value2 

290 

291 Requesting the match for ``calexp`` would return ``value1`` unless 

292 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

293 and value ``HSC``. 

294 

295 The values of the mapping are stored as strings. 

296 """ 

297 contents = {} 

298 for name, value in config.items(): 

299 lookup = LookupKey(name=name, universe=universe) 

300 

301 if isinstance(value, Mapping): 

302 # indicates a dataId component -- check the format 

303 kv = DATAID_RE.match(name) 

304 if kv: 

305 dataIdKey = kv.group(1) 

306 dataIdValue = kv.group(2) 

307 for subKey, subStr in value.items(): 

308 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

309 contents[lookup] = subStr 

310 elif allow_hierarchy: 

311 contents[lookup] = value 

312 else: 

313 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

314 else: 

315 contents[lookup] = value 

316 

317 return contents 

318 

319 

320def processLookupConfigList(config: Iterable[Union[str, Mapping]], 

321 *, universe: Optional[DimensionUniverse] = None) -> Set[LookupKey]: 

322 """Process sections of configuration relating to lookups. 

323 

324 Can be by dataset type name, storage class name, dimensions, or values 

325 of dimensions. 

326 

327 Parameters 

328 ---------- 

329 config : `list` of `str` or `dict` 

330 Contents of a configuration listing keys that can be 

331 dataset type names, storage class names, dimensions 

332 or dataId components. DataId components are represented as entries 

333 in the `list` of `dicts` with a single key with a value of a `list` 

334 of new keys. 

335 universe : `DimensionUniverse`, optional 

336 Set of all known dimensions, used to expand and validate any used 

337 in lookup keys. 

338 

339 Returns 

340 ------- 

341 lookups : `set` of `LookupKey` 

342 All the entries in the input list converted to `LookupKey` and 

343 returned in a `set`. 

344 

345 Notes 

346 ----- 

347 Keys are parsed as described in `processLookupConfigs`. 

348 """ 

349 contents = set() 

350 

351 for name in config: 

352 if isinstance(name, Mapping): 

353 if len(name) != 1: 

354 raise RuntimeError(f"Config dict entry {name} has more than key present") 

355 for dataIdLookUp, subKeys in name.items(): 

356 kv = DATAID_RE.match(dataIdLookUp) 

357 if kv: 

358 dataIdKey = kv.group(1) 

359 dataIdValue = kv.group(2) 

360 for subKey in subKeys: 

361 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

362 contents.add(lookup) 

363 else: 

364 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

365 else: 

366 contents.add(LookupKey(name=name, universe=universe)) 

367 

368 return contents