Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for configuration snippets""" 

25 

26__all__ = ("LookupKey", "processLookupConfigs", 

27 "processLookupConfigList") 

28 

29import logging 

30import re 

31from collections.abc import Mapping 

32 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Dict, 

37 Iterable, 

38 Optional, 

39 Set, 

40 Union, 

41) 

42 

43from .dimensions import DimensionGraph 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from .dimensions import DimensionUniverse, Dimension 

47 from .config import Config 

48 

49log = logging.getLogger(__name__) 

50 

51DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

52"""Regex to find dataIds embedded in configurations.""" 

53 

54 

55class LookupKey: 

56 """Representation of key that can be used to lookup information based 

57 on dataset type name, storage class name, dimensions. 

58 

59 Parameters 

60 ---------- 

61 name : `str`, optional 

62 Primary index string for lookup. If this string looks like it 

63 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

64 is converted to a `DimensionGraph` and stored in ``dimensions`` 

65 property. 

66 dimensions : `DimensionGraph`, optional 

67 Dimensions that are relevant for lookup. Should not be specified 

68 if ``name`` is also specified. 

69 dataId : `dict`, optional 

70 Keys and values from a dataId that should control lookups. 

71 universe : `DimensionUniverse`, optional 

72 Set of all known dimensions, used to expand and validate ``name`` or 

73 ``dimensions``. Required if the key represents dimensions and a 

74 full `DimensionGraph` is not provided. 

75 """ 

76 

77 def __init__(self, name: Optional[str] = None, 

78 dimensions: Optional[Iterable[Union[str, Dimension]]] = None, 

79 dataId: Optional[Dict[str, Any]] = None, *, universe: Optional[DimensionUniverse] = None): 

80 if name is None and dimensions is None: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true

81 raise ValueError("At least one of name or dimensions must be given") 

82 

83 if name is not None and dimensions is not None: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 raise ValueError("Can only accept one of name or dimensions") 

85 

86 self._dimensions = None 

87 self._name = None 

88 

89 if name is not None: 89 ↛ 105line 89 didn't jump to line 105, because the condition on line 89 was never false

90 

91 if not isinstance(name, str): 91 ↛ 92line 91 didn't jump to line 92, because the condition on line 91 was never true

92 raise ValueError(f"Supplied name must be str not: '{name}'") 

93 

94 if "+" in name: 94 ↛ 97line 94 didn't jump to line 97, because the condition on line 94 was never true

95 # If we are given a single dimension we use the "+" to 

96 # indicate this but have to filter out the empty value 

97 dimension_names = [n for n in name.split("+") if n] 

98 if universe is None: 

99 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

100 else: 

101 self._dimensions = universe.extract(dimension_names) 

102 else: 

103 self._name = name 

104 

105 elif dimensions is not None: 

106 if not isinstance(dimensions, DimensionGraph): 

107 if universe is None: 

108 raise ValueError(f"Cannot construct LookupKey for dimensions={dimensions} " 

109 "without universe.") 

110 else: 

111 self._dimensions = universe.extract(dimensions) 

112 else: 

113 self._dimensions = dimensions 

114 else: 

115 # mypy cannot work this out on its own 

116 raise ValueError("Name was None but dimensions is also None") 

117 

118 # The dataId is converted to a frozenset of key/value 

119 # tuples so that it is not mutable 

120 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

121 

122 def __str__(self) -> str: 

123 # For the simple case return the simple string 

124 if self._name: 

125 name = self._name 

126 elif self._dimensions is not None: 

127 name = "+".join(self._dimensions.names) 

128 else: 

129 raise RuntimeError("Internal error since name and dimensions are both None") 

130 

131 if not self._dataId: 

132 return name 

133 

134 return f"{name} ({self.dataId})" 

135 

136 def __repr__(self) -> str: 

137 params = "" 

138 if self.name: 

139 params += f"name={self.name!r}," 

140 if self.dimensions: 

141 params += f"dimensions={self.dimensions!r}," 

142 if self._dataId: 

143 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

144 

145 return f"{self.__class__.__name__}({params})" 

146 

147 def __eq__(self, other: Any) -> bool: 

148 if not isinstance(other, type(self)): 

149 return False 

150 if self._name == other._name and self._dimensions == other._dimensions and \ 

151 self._dataId == other._dataId: 

152 return True 

153 return False 

154 

155 @property 

156 def name(self) -> Optional[str]: 

157 """Primary name string to use as lookup. (`str`)""" 

158 return self._name 

159 

160 @property 

161 def dimensions(self) -> Optional[DimensionGraph]: 

162 """Dimensions associated with lookup. (`DimensionGraph`)""" 

163 return self._dimensions 

164 

165 @property 

166 def dataId(self) -> Optional[Dict[str, Any]]: 

167 """Dict of keys/values that are important for dataId lookup. 

168 (`dict` or `None`)""" 

169 if self._dataId is not None: 

170 return {k: v for k, v in self._dataId} 

171 else: 

172 return None 

173 

174 def __hash__(self) -> int: 

175 """Hash the lookup to allow use as a key in a dict.""" 

176 return hash((self._name, self._dimensions, self._dataId)) 

177 

178 def clone(self, name: Optional[str] = None, dimensions: Optional[DimensionGraph] = None, 

179 dataId: Optional[Dict[str, Any]] = None) -> LookupKey: 

180 """Clone the object, overriding some options. 

181 

182 Used to create a new instance of the object whilst updating 

183 some of it. 

184 

185 Parameters 

186 ---------- 

187 name : `str`, optional 

188 Primary index string for lookup. Will override ``dimensions`` 

189 if ``dimensions`` are set. 

190 dimensions : `DimensionGraph`, optional 

191 Dimensions that are relevant for lookup. Will override ``name`` 

192 if ``name`` is already set. 

193 dataId : `dict`, optional 

194 Keys and values from a dataId that should control lookups. 

195 

196 Returns 

197 ------- 

198 clone : `LookupKey` 

199 Copy with updates. 

200 """ 

201 if name is not None and dimensions is not None: 

202 raise ValueError("Both name and dimensions can not be set") 

203 

204 # if neither name nor dimensions are specified we copy from current 

205 # object. Otherwise we'll use the supplied values 

206 if name is None and dimensions is None: 

207 name = self._name 

208 dimensions = self._dimensions 

209 

210 # Make sure we use the dict form for the constructor 

211 if dataId is None and self._dataId is not None: 

212 dataId = self.dataId 

213 

214 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

215 

216 

217def processLookupConfigs(config: Config, *, 

218 allow_hierarchy: bool = False, 

219 universe: Optional[DimensionUniverse] = None) -> Dict[LookupKey, 

220 Union[str, Dict[str, Any]]]: 

221 """Process sections of configuration relating to lookups by dataset type 

222 name, storage class name, dimensions, or values of dimensions. 

223 

224 Parameters 

225 ---------- 

226 config : `Config` 

227 A `Config` representing a configuration mapping keys to values where 

228 the keys can be dataset type names, storage class names, dimensions 

229 or dataId components. 

230 allow_hierarchy : `bool`, optional 

231 If `True`, keys that refer to a hierarchy that does not look like 

232 a DataID specification are allowed and the full hierarchy, as a dict, 

233 will be returned in the value for the lookup key. 

234 universe : `DimensionUniverse`, optional 

235 Set of all known dimensions, used to expand and validate any used 

236 in lookup keys. 

237 

238 Returns 

239 ------- 

240 contents : `dict` of `LookupKey` to `str` 

241 A `dict` with keys constructed from the configuration keys and values 

242 being simple strings. It is assumed the caller will convert the 

243 values to the required form. 

244 

245 Notes 

246 ----- 

247 The configuration is a mapping where the keys correspond to names 

248 that can refer to dataset type or storage class names, or can use a 

249 special syntax to refer to dimensions or dataId values. 

250 

251 Dimensions are indicated by using dimension names separated by a ``+``. 

252 If a single dimension is specified this is also supported so long as 

253 a ``+`` is found. Dimensions are normalized before use such that if 

254 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

255 will automatically be added. 

256 

257 DataID overrides can be specified using the form: ``field<value>`` to 

258 indicate a subhierarchy. All keys within that new hierarchy will take 

259 precedence over equivalent values in the root hierarchy. 

260 

261 Currently only a single dataId field can be specified for a key. 

262 For example with a config such as: 

263 

264 .. code-block:: yaml 

265 

266 something: 

267 calexp: value1 

268 instrument<HSC>: 

269 calexp: value2 

270 

271 Requesting the match for ``calexp`` would return ``value1`` unless 

272 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

273 and value ``HSC``. 

274 

275 The values of the mapping are stored as strings. 

276 """ 

277 contents = {} 

278 for name, value in config.items(): 

279 lookup = LookupKey(name=name, universe=universe) 

280 

281 if isinstance(value, Mapping): 

282 # indicates a dataId component -- check the format 

283 kv = DATAID_RE.match(name) 

284 if kv: 

285 dataIdKey = kv.group(1) 

286 dataIdValue = kv.group(2) 

287 for subKey, subStr in value.items(): 

288 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

289 contents[lookup] = subStr 

290 elif allow_hierarchy: 

291 contents[lookup] = value 

292 else: 

293 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

294 else: 

295 contents[lookup] = value 

296 

297 return contents 

298 

299 

300def processLookupConfigList(config: Iterable[Union[str, Mapping]], 

301 *, universe: Optional[DimensionUniverse] = None) -> Set[LookupKey]: 

302 """Process sections of configuration relating to lookups by dataset type 

303 name, storage class name, dimensions, or values of dimensions. 

304 

305 Parameters 

306 ---------- 

307 config : `list` of `str` or `dict` 

308 Contents of a configuration listing keys that can be 

309 dataset type names, storage class names, dimensions 

310 or dataId components. DataId components are represented as entries 

311 in the `list` of `dicts` with a single key with a value of a `list` 

312 of new keys. 

313 universe : `DimensionUniverse`, optional 

314 Set of all known dimensions, used to expand and validate any used 

315 in lookup keys. 

316 

317 Returns 

318 ------- 

319 lookups : `set` of `LookupKey` 

320 All the entries in the input list converted to `LookupKey` and 

321 returned in a `set`. 

322 

323 Notes 

324 ----- 

325 Keys are parsed as described in `processLookupConfigs`. 

326 """ 

327 contents = set() 

328 

329 for name in config: 

330 if isinstance(name, Mapping): 

331 if len(name) != 1: 

332 raise RuntimeError(f"Config dict entry {name} has more than key present") 

333 for dataIdLookUp, subKeys in name.items(): 

334 kv = DATAID_RE.match(dataIdLookUp) 

335 if kv: 

336 dataIdKey = kv.group(1) 

337 dataIdValue = kv.group(2) 

338 for subKey in subKeys: 

339 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

340 contents.add(lookup) 

341 else: 

342 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

343 else: 

344 contents.add(LookupKey(name=name, universe=universe)) 

345 

346 return contents