Coverage for python/lsst/daf/butler/_config_support.py: 23%

121 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for configuration snippets""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("LookupKey", "processLookupConfigs", "processLookupConfigList") 

33 

34import logging 

35import re 

36from collections.abc import Iterable, Mapping 

37from typing import TYPE_CHECKING, Any 

38 

39from .dimensions import DimensionGraph 

40 

41if TYPE_CHECKING: 

42 from ._config import Config 

43 from .dimensions import Dimension, DimensionUniverse 

44 

45log = logging.getLogger(__name__) 

46 

47DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

48"""Regex to find dataIds embedded in configurations.""" 

49 

50 

51class LookupKey: 

52 """Representation of key that can be used to lookup information. 

53 

54 Look up is based on dataset type name, storage class name, dimensions. 

55 

56 Parameters 

57 ---------- 

58 name : `str`, optional 

59 Primary index string for lookup. If this string looks like it 

60 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

61 is converted to a `DimensionGraph` and stored in ``dimensions`` 

62 property. 

63 dimensions : `DimensionGraph`, optional 

64 Dimensions that are relevant for lookup. Should not be specified 

65 if ``name`` is also specified. 

66 dataId : `dict`, optional 

67 Keys and values from a dataId that should control lookups. 

68 universe : `DimensionUniverse`, optional 

69 Set of all known dimensions, used to expand and validate ``name`` or 

70 ``dimensions``. Required if the key represents dimensions and a 

71 full `DimensionGraph` is not provided. 

72 """ 

73 

74 def __init__( 

75 self, 

76 name: str | None = None, 

77 dimensions: Iterable[str | Dimension] | None = None, 

78 dataId: dict[str, Any] | None = None, 

79 *, 

80 universe: DimensionUniverse | None = None, 

81 ): 

82 if name is None and dimensions is None: 82 ↛ 83line 82 didn't jump to line 83, because the condition on line 82 was never true

83 raise ValueError("At least one of name or dimensions must be given") 

84 

85 if name is not None and dimensions is not None: 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true

86 raise ValueError("Can only accept one of name or dimensions") 

87 

88 self._dimensions = None 

89 self._name = None 

90 

91 if name is not None: 91 ↛ 124line 91 didn't jump to line 124, because the condition on line 91 was never false

92 if not isinstance(name, str): 92 ↛ 93line 92 didn't jump to line 93, because the condition on line 92 was never true

93 raise ValueError(f"Supplied name must be str not: '{name}'") 

94 

95 if "+" in name: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 if universe is None: 

97 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

98 

99 # If we are given a single dimension we use the "+" to 

100 # indicate this but have to filter out the empty value 

101 dimension_names = [n for n in name.split("+") if n] 

102 try: 

103 self._dimensions = universe.extract(dimension_names) 

104 except KeyError: 

105 # One or more of the dimensions is not known to the 

106 # universe. This could be a typo or it could be that 

107 # a config is being used that is not compatible with 

108 # this universe. Use the name directly as a lookup key 

109 # but issue a warning. This will be potentially annoying 

110 # in the scenario where a lookup key comes from a 

111 # default config but the users are using an external 

112 # universe. 

113 unknown = [name for name in dimension_names if universe.get(name) is None] 

114 log.debug( 

115 "A LookupKey '%s' uses unknown dimensions: %s. Possible typo?" 

116 " Using the name explicitly.", 

117 name, 

118 unknown, 

119 ) 

120 self._name = name 

121 else: 

122 self._name = name 

123 

124 elif dimensions is not None: 

125 if not isinstance(dimensions, DimensionGraph): 

126 if universe is None: 

127 raise ValueError( 

128 f"Cannot construct LookupKey for dimensions={dimensions} without universe." 

129 ) 

130 else: 

131 self._dimensions = universe.extract(dimensions) 

132 else: 

133 self._dimensions = dimensions 

134 else: 

135 # mypy cannot work this out on its own 

136 raise ValueError("Name was None but dimensions is also None") 

137 

138 # The dataId is converted to a frozenset of key/value 

139 # tuples so that it is not mutable 

140 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

141 

142 def __str__(self) -> str: 

143 # For the simple case return the simple string 

144 if self._name: 

145 name = self._name 

146 elif self._dimensions is not None: 

147 name = "+".join(self._dimensions.names) 

148 else: 

149 raise RuntimeError("Internal error since name and dimensions are both None") 

150 

151 if not self._dataId: 

152 return name 

153 

154 return f"{name} ({self.dataId})" 

155 

156 def __repr__(self) -> str: 

157 params = "" 

158 if self.name: 

159 params += f"name={self.name!r}," 

160 if self.dimensions: 

161 params += f"dimensions={self.dimensions!r}," 

162 if self._dataId: 

163 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

164 

165 return f"{self.__class__.__name__}({params})" 

166 

167 def __eq__(self, other: Any) -> bool: 

168 if not isinstance(other, type(self)): 

169 return False 

170 if ( 

171 self._name == other._name 

172 and self._dimensions == other._dimensions 

173 and self._dataId == other._dataId 

174 ): 

175 return True 

176 return False 

177 

178 @property 

179 def name(self) -> str | None: 

180 """Primary name string to use as lookup (`str`).""" 

181 return self._name 

182 

183 @property 

184 def dimensions(self) -> DimensionGraph | None: 

185 """Dimensions associated with lookup (`DimensionGraph`).""" 

186 return self._dimensions 

187 

188 @property 

189 def dataId(self) -> dict[str, Any] | None: 

190 """Return dict of keys/values that are important for dataId lookup. 

191 

192 (`dict` or `None`) 

193 """ 

194 if self._dataId is not None: 

195 return dict(self._dataId) 

196 else: 

197 return None 

198 

199 def __hash__(self) -> int: 

200 """Hash the lookup to allow use as a key in a dict.""" 

201 return hash((self._name, self._dimensions, self._dataId)) 

202 

203 def clone( 

204 self, 

205 name: str | None = None, 

206 dimensions: DimensionGraph | None = None, 

207 dataId: dict[str, Any] | None = None, 

208 ) -> LookupKey: 

209 """Clone the object, overriding some options. 

210 

211 Used to create a new instance of the object whilst updating 

212 some of it. 

213 

214 Parameters 

215 ---------- 

216 name : `str`, optional 

217 Primary index string for lookup. Will override ``dimensions`` 

218 if ``dimensions`` are set. 

219 dimensions : `DimensionGraph`, optional 

220 Dimensions that are relevant for lookup. Will override ``name`` 

221 if ``name`` is already set. 

222 dataId : `dict`, optional 

223 Keys and values from a dataId that should control lookups. 

224 

225 Returns 

226 ------- 

227 clone : `LookupKey` 

228 Copy with updates. 

229 """ 

230 if name is not None and dimensions is not None: 

231 raise ValueError("Both name and dimensions can not be set") 

232 

233 # if neither name nor dimensions are specified we copy from current 

234 # object. Otherwise we'll use the supplied values 

235 if name is None and dimensions is None: 

236 name = self._name 

237 dimensions = self._dimensions 

238 

239 # Make sure we use the dict form for the constructor 

240 if dataId is None and self._dataId is not None: 

241 dataId = self.dataId 

242 

243 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

244 

245 

246def processLookupConfigs( 

247 config: Config, *, allow_hierarchy: bool = False, universe: DimensionUniverse | None = None 

248) -> dict[LookupKey, str | dict[str, Any]]: 

249 """Process sections of configuration relating to lookups. 

250 

251 Can be by dataset type name, storage class name, dimensions, or values 

252 of dimensions. 

253 

254 Parameters 

255 ---------- 

256 config : `Config` 

257 A `Config` representing a configuration mapping keys to values where 

258 the keys can be dataset type names, storage class names, dimensions 

259 or dataId components. 

260 allow_hierarchy : `bool`, optional 

261 If `True`, keys that refer to a hierarchy that does not look like 

262 a DataID specification are allowed and the full hierarchy, as a dict, 

263 will be returned in the value for the lookup key. 

264 universe : `DimensionUniverse`, optional 

265 Set of all known dimensions, used to expand and validate any used 

266 in lookup keys. 

267 

268 Returns 

269 ------- 

270 contents : `dict` of `LookupKey` to `str` 

271 A `dict` with keys constructed from the configuration keys and values 

272 being simple strings. It is assumed the caller will convert the 

273 values to the required form. 

274 

275 Notes 

276 ----- 

277 The configuration is a mapping where the keys correspond to names 

278 that can refer to dataset type or storage class names, or can use a 

279 special syntax to refer to dimensions or dataId values. 

280 

281 Dimensions are indicated by using dimension names separated by a ``+``. 

282 If a single dimension is specified this is also supported so long as 

283 a ``+`` is found. Dimensions are normalized before use such that if 

284 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

285 will automatically be added. 

286 

287 DataID overrides can be specified using the form: ``field<value>`` to 

288 indicate a subhierarchy. All keys within that new hierarchy will take 

289 precedence over equivalent values in the root hierarchy. 

290 

291 Currently only a single dataId field can be specified for a key. 

292 For example with a config such as: 

293 

294 .. code-block:: yaml 

295 

296 something: 

297 calexp: value1 

298 instrument<HSC>: 

299 calexp: value2 

300 

301 Requesting the match for ``calexp`` would return ``value1`` unless 

302 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

303 and value ``HSC``. 

304 

305 The values of the mapping are stored as strings. 

306 """ 

307 contents = {} 

308 for name, value in config.items(): 

309 lookup = LookupKey(name=name, universe=universe) 

310 

311 if isinstance(value, Mapping): 

312 # indicates a dataId component -- check the format 

313 kv = DATAID_RE.match(name) 

314 if kv: 

315 dataIdKey = kv.group(1) 

316 dataIdValue = kv.group(2) 

317 for subKey, subStr in value.items(): 

318 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

319 contents[lookup] = subStr 

320 elif allow_hierarchy: 

321 contents[lookup] = value 

322 else: 

323 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

324 else: 

325 contents[lookup] = value 

326 

327 return contents 

328 

329 

330def processLookupConfigList( 

331 config: Iterable[str | Mapping], *, universe: DimensionUniverse | None = None 

332) -> set[LookupKey]: 

333 """Process sections of configuration relating to lookups. 

334 

335 Can be by dataset type name, storage class name, dimensions, or values 

336 of dimensions. 

337 

338 Parameters 

339 ---------- 

340 config : `list` of `str` or `dict` 

341 Contents of a configuration listing keys that can be 

342 dataset type names, storage class names, dimensions 

343 or dataId components. DataId components are represented as entries 

344 in the `list` of `dicts` with a single key with a value of a `list` 

345 of new keys. 

346 universe : `DimensionUniverse`, optional 

347 Set of all known dimensions, used to expand and validate any used 

348 in lookup keys. 

349 

350 Returns 

351 ------- 

352 lookups : `set` of `LookupKey` 

353 All the entries in the input list converted to `LookupKey` and 

354 returned in a `set`. 

355 

356 Notes 

357 ----- 

358 Keys are parsed as described in `processLookupConfigs`. 

359 """ 

360 contents = set() 

361 

362 for name in config: 

363 if isinstance(name, Mapping): 

364 if len(name) != 1: 

365 raise RuntimeError(f"Config dict entry {name} has more than key present") 

366 for dataIdLookUp, subKeys in name.items(): 

367 kv = DATAID_RE.match(dataIdLookUp) 

368 if kv: 

369 dataIdKey = kv.group(1) 

370 dataIdValue = kv.group(2) 

371 for subKey in subKeys: 

372 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

373 contents.add(lookup) 

374 else: 

375 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

376 else: 

377 contents.add(LookupKey(name=name, universe=universe)) 

378 

379 return contents