Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for configuration snippets""" 

25 

26__all__ = ("LookupKey", "processLookupConfigs", 

27 "processLookupConfigList") 

28 

29import logging 

30import re 

31from collections.abc import Mapping 

32 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Dict, 

37 Iterable, 

38 Optional, 

39 Set, 

40 Union, 

41) 

42 

43from .dimensions import DimensionGraph 

44 

45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true

46 from .dimensions import DimensionUniverse, Dimension 

47 from .config import Config 

48 

49log = logging.getLogger(__name__) 

50 

51DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

52"""Regex to find dataIds embedded in configurations.""" 

53 

54 

55class LookupKey: 

56 """Representation of key that can be used to lookup information based 

57 on dataset type name, storage class name, dimensions. 

58 

59 Parameters 

60 ---------- 

61 name : `str`, optional 

62 Primary index string for lookup. If this string looks like it 

63 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

64 is converted to a `DimensionGraph` and stored in ``dimensions`` 

65 property. 

66 dimensions : `DimensionGraph`, optional 

67 Dimensions that are relevant for lookup. Should not be specified 

68 if ``name`` is also specified. 

69 dataId : `dict`, optional 

70 Keys and values from a dataId that should control lookups. 

71 universe : `DimensionUniverse`, optional 

72 Set of all known dimensions, used to expand and validate ``name`` or 

73 ``dimensions``. Required if the key represents dimensions and a 

74 full `DimensionGraph` is not provided. 

75 """ 

76 

77 def __init__(self, name: Optional[str] = None, 

78 dimensions: Optional[Iterable[Union[str, Dimension]]] = None, 

79 dataId: Optional[Dict[str, Any]] = None, *, universe: Optional[DimensionUniverse] = None): 

80 if name is None and dimensions is None: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true

81 raise ValueError("At least one of name or dimensions must be given") 

82 

83 if name is not None and dimensions is not None: 83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true

84 raise ValueError("Can only accept one of name or dimensions") 

85 

86 self._dimensions = None 

87 self._name = None 

88 

89 if name is not None: 89 ↛ 105line 89 didn't jump to line 105, because the condition on line 89 was never false

90 

91 if not isinstance(name, str): 91 ↛ 92line 91 didn't jump to line 92, because the condition on line 91 was never true

92 raise ValueError(f"Supplied name must be str not: '{name}'") 

93 

94 if "+" in name: 94 ↛ 97line 94 didn't jump to line 97, because the condition on line 94 was never true

95 # If we are given a single dimension we use the "+" to 

96 # indicate this but have to filter out the empty value 

97 dimension_names = [n for n in name.split("+") if n] 

98 if universe is None: 

99 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

100 else: 

101 self._dimensions = universe.extract(dimension_names) 

102 else: 

103 self._name = name 

104 

105 elif dimensions is not None: 

106 if not isinstance(dimensions, DimensionGraph): 

107 if universe is None: 

108 raise ValueError(f"Cannot construct LookupKey for dimensions={dimensions} " 

109 "without universe.") 

110 else: 

111 self._dimensions = universe.extract(dimensions) 

112 else: 

113 self._dimensions = dimensions 

114 else: 

115 # mypy cannot work this out on its own 

116 raise ValueError("Name was None but dimensions is also None") 

117 

118 # The dataId is converted to a frozenset of key/value 

119 # tuples so that it is not mutable 

120 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

121 

122 def __str__(self) -> str: 

123 # For the simple case return the simple string 

124 if self._name: 

125 name = self._name 

126 elif self._dimensions is not None: 

127 name = "+".join(self._dimensions.names) 

128 else: 

129 raise RuntimeError("Internal error since name and dimensions are both None") 

130 

131 if not self._dataId: 

132 return name 

133 

134 return f"{name} ({self.dataId})" 

135 

136 def __repr__(self) -> str: 

137 params = "" 

138 if self.name: 

139 params += f"name={self.name!r}," 

140 if self.dimensions: 

141 params += f"dimensions={self.dimensions!r}," 

142 if self._dataId: 

143 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

144 

145 return f"{self.__class__.__name__}({params})" 

146 

147 def __eq__(self, other: Any) -> bool: 

148 if not isinstance(other, type(self)): 

149 return False 

150 if self._name == other._name and self._dimensions == other._dimensions and \ 

151 self._dataId == other._dataId: 

152 return True 

153 return False 

154 

155 @property 

156 def name(self) -> Optional[str]: 

157 """Primary name string to use as lookup. (`str`)""" 

158 return self._name 

159 

160 @property 

161 def dimensions(self) -> Optional[DimensionGraph]: 

162 """Dimensions associated with lookup. (`DimensionGraph`)""" 

163 return self._dimensions 

164 

165 @property 

166 def dataId(self) -> Optional[Dict[str, Any]]: 

167 """Dict of keys/values that are important for dataId lookup. 

168 (`dict` or `None`)""" 

169 if self._dataId is not None: 

170 return {k: v for k, v in self._dataId} 

171 else: 

172 return None 

173 

174 def __hash__(self) -> int: 

175 """Hash the lookup to allow use as a key in a dict.""" 

176 return hash((self._name, self._dimensions, self._dataId)) 

177 

178 def clone(self, name: Optional[str] = None, dimensions: Optional[DimensionGraph] = None, 

179 dataId: Optional[Dict[str, Any]] = None) -> LookupKey: 

180 """Clone the object, overriding some options. 

181 

182 Used to create a new instance of the object whilst updating 

183 some of it. 

184 

185 Parameters 

186 ---------- 

187 name : `str`, optional 

188 Primary index string for lookup. Will override ``dimensions`` 

189 if ``dimensions`` are set. 

190 dimensions : `DimensionGraph`, optional 

191 Dimensions that are relevant for lookup. Will override ``name`` 

192 if ``name`` is already set. 

193 dataId : `dict`, optional 

194 Keys and values from a dataId that should control lookups. 

195 

196 Returns 

197 ------- 

198 clone : `LookupKey` 

199 Copy with updates. 

200 """ 

201 if name is not None and dimensions is not None: 

202 raise ValueError("Both name and dimensions can not be set") 

203 

204 # if neither name nor dimensions are specified we copy from current 

205 # object. Otherwise we'll use the supplied values 

206 if name is None and dimensions is None: 

207 name = self._name 

208 dimensions = self._dimensions 

209 

210 # Make sure we use the dict form for the constructor 

211 if dataId is None and self._dataId is not None: 

212 dataId = self.dataId 

213 

214 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

215 

216 

217def processLookupConfigs(config: Config, *, 

218 universe: Optional[DimensionUniverse] = None) -> Dict[LookupKey, str]: 

219 """Process sections of configuration relating to lookups by dataset type 

220 name, storage class name, dimensions, or values of dimensions. 

221 

222 Parameters 

223 ---------- 

224 config : `Config` 

225 A `Config` representing a configuration mapping keys to values where 

226 the keys can be dataset type names, storage class names, dimensions 

227 or dataId components. 

228 universe : `DimensionUniverse`, optional 

229 Set of all known dimensions, used to expand and validate any used 

230 in lookup keys. 

231 

232 Returns 

233 ------- 

234 contents : `dict` of `LookupKey` to `str` 

235 A `dict` with keys constructed from the configuration keys and values 

236 being simple strings. It is assumed the caller will convert the 

237 values to the required form. 

238 

239 Notes 

240 ----- 

241 The configuration is a mapping where the keys correspond to names 

242 that can refer to dataset type or storage class names, or can use a 

243 special syntax to refer to dimensions or dataId values. 

244 

245 Dimensions are indicated by using dimension names separated by a ``+``. 

246 If a single dimension is specified this is also supported so long as 

247 a ``+`` is found. Dimensions are normalized before use such that if 

248 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

249 will automatically be added. 

250 

251 DataID overrides can be specified using the form: ``field<value>`` to 

252 indicate a subhierarchy. All keys within that new hierarchy will take 

253 precedence over equivalent values in the root hierarchy. 

254 

255 Currently only a single dataId field can be specified for a key. 

256 For example with a config such as: 

257 

258 .. code:: 

259 

260 something: 

261 calexp: value1 

262 instrument<HSC>: 

263 calexp: value2 

264 

265 Requesting the match for ``calexp`` would return ``value1`` unless 

266 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

267 and value ``HSC``. 

268 

269 The values of the mapping are stored as strings. 

270 """ 

271 contents = {} 

272 for name, value in config.items(): 

273 if isinstance(value, Mapping): 

274 # indicates a dataId component -- check the format 

275 kv = DATAID_RE.match(name) 

276 if kv: 

277 dataIdKey = kv.group(1) 

278 dataIdValue = kv.group(2) 

279 for subKey, subStr in value.items(): 

280 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

281 contents[lookup] = subStr 

282 else: 

283 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

284 else: 

285 lookup = LookupKey(name=name, universe=universe) 

286 contents[lookup] = value 

287 

288 return contents 

289 

290 

291def processLookupConfigList(config: Iterable[Union[str, Mapping]], 

292 *, universe: Optional[DimensionUniverse] = None) -> Set[LookupKey]: 

293 """Process sections of configuration relating to lookups by dataset type 

294 name, storage class name, dimensions, or values of dimensions. 

295 

296 Parameters 

297 ---------- 

298 config : `list` of `str` or `dict` 

299 Contents of a configuration listing keys that can be 

300 dataset type names, storage class names, dimensions 

301 or dataId components. DataId components are represented as entries 

302 in the `list` of `dicts` with a single key with a value of a `list` 

303 of new keys. 

304 universe : `DimensionUniverse`, optional 

305 Set of all known dimensions, used to expand and validate any used 

306 in lookup keys. 

307 

308 Returns 

309 ------- 

310 lookups : `set` of `LookupKey` 

311 All the entries in the input list converted to `LookupKey` and 

312 returned in a `set`. 

313 

314 Notes 

315 ----- 

316 Keys are parsed as described in `processLookupConfigs`. 

317 """ 

318 contents = set() 

319 

320 for name in config: 

321 if isinstance(name, Mapping): 

322 if len(name) != 1: 

323 raise RuntimeError(f"Config dict entry {name} has more than key present") 

324 for dataIdLookUp, subKeys in name.items(): 

325 kv = DATAID_RE.match(dataIdLookUp) 

326 if kv: 

327 dataIdKey = kv.group(1) 

328 dataIdValue = kv.group(2) 

329 for subKey in subKeys: 

330 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

331 contents.add(lookup) 

332 else: 

333 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

334 else: 

335 contents.add(LookupKey(name=name, universe=universe)) 

336 

337 return contents