Coverage for python/lsst/daf/butler/_config_support.py: 24%

120 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 09:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for configuration snippets.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("LookupKey", "processLookupConfigs", "processLookupConfigList") 

33 

34import logging 

35import re 

36from collections.abc import Iterable, Mapping 

37from typing import TYPE_CHECKING, Any 

38 

39from ._config import Config 

40from .dimensions import DimensionGroup 

41 

42if TYPE_CHECKING: 

43 from .dimensions import DimensionUniverse 

44 

45log = logging.getLogger(__name__) 

46 

47DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

48"""Regex to find dataIds embedded in configurations.""" 

49 

50 

51class LookupKey: 

52 """Representation of key that can be used to lookup information. 

53 

54 Look up is based on dataset type name, storage class name, dimensions. 

55 

56 Parameters 

57 ---------- 

58 name : `str`, optional 

59 Primary index string for lookup. If this string looks like it 

60 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

61 is converted to a `DimensionGroup` and stored in ``dimensions`` 

62 property. 

63 dimensions : `DimensionGroup`, optional 

64 Dimensions that are relevant for lookup. Should not be specified 

65 if ``name`` is also specified. 

66 dataId : `dict`, optional 

67 Keys and values from a dataId that should control lookups. 

68 universe : `DimensionUniverse`, optional 

69 Set of all known dimensions, used to expand and validate ``name`` or 

70 ``dimensions``. Required if the key represents dimensions and a 

71 full `DimensionGroup` is not provided. 

72 """ 

73 

74 def __init__( 

75 self, 

76 name: str | None = None, 

77 dimensions: DimensionGroup | None = None, 

78 dataId: dict[str, Any] | None = None, 

79 *, 

80 universe: DimensionUniverse | None = None, 

81 ): 

82 if name is None and dimensions is None: 82 ↛ 83line 82 didn't jump to line 83, because the condition on line 82 was never true

83 raise ValueError("At least one of name or dimensions must be given") 

84 

85 if name is not None and dimensions is not None: 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true

86 raise ValueError("Can only accept one of name or dimensions") 

87 

88 self._dimensions = None 

89 self._name = None 

90 

91 if name is not None: 91 ↛ 124line 91 didn't jump to line 124, because the condition on line 91 was never false

92 if not isinstance(name, str): 92 ↛ 93line 92 didn't jump to line 93, because the condition on line 92 was never true

93 raise ValueError(f"Supplied name must be str not: '{name}'") 

94 

95 if "+" in name: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 if universe is None: 

97 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

98 

99 # If we are given a single dimension we use the "+" to 

100 # indicate this but have to filter out the empty value 

101 dimension_names = [n for n in name.split("+") if n] 

102 try: 

103 self._dimensions = universe.conform(dimension_names) 

104 except KeyError: 

105 # One or more of the dimensions is not known to the 

106 # universe. This could be a typo or it could be that 

107 # a config is being used that is not compatible with 

108 # this universe. Use the name directly as a lookup key 

109 # but issue a warning. This will be potentially annoying 

110 # in the scenario where a lookup key comes from a 

111 # default config but the users are using an external 

112 # universe. 

113 unknown = [name for name in dimension_names if universe.get(name) is None] 

114 log.debug( 

115 "A LookupKey '%s' uses unknown dimensions: %s. Possible typo?" 

116 " Using the name explicitly.", 

117 name, 

118 unknown, 

119 ) 

120 self._name = name 

121 else: 

122 self._name = name 

123 

124 elif dimensions is not None: 

125 self._dimensions = dimensions 

126 else: 

127 # mypy cannot work this out on its own 

128 raise ValueError("Name was None but dimensions is also None") 

129 

130 # The dataId is converted to a frozenset of key/value 

131 # tuples so that it is not mutable 

132 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

133 

134 def __str__(self) -> str: 

135 # For the simple case return the simple string 

136 if self._name: 

137 name = self._name 

138 elif self._dimensions is not None: 

139 name = "+".join(self._dimensions.names) 

140 else: 

141 raise RuntimeError("Internal error since name and dimensions are both None") 

142 

143 if not self._dataId: 

144 return name 

145 

146 return f"{name} ({self.dataId})" 

147 

148 def __repr__(self) -> str: 

149 params = "" 

150 if self.name: 

151 params += f"name={self.name!r}," 

152 if self.dimensions: 

153 params += f"dimensions={self.dimensions!r}," 

154 if self._dataId: 

155 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

156 

157 return f"{self.__class__.__name__}({params})" 

158 

159 def __eq__(self, other: Any) -> bool: 

160 if not isinstance(other, type(self)): 

161 return False 

162 if ( 

163 self._name == other._name 

164 and self._dimensions == other._dimensions 

165 and self._dataId == other._dataId 

166 ): 

167 return True 

168 return False 

169 

170 @property 

171 def name(self) -> str | None: 

172 """Primary name string to use as lookup (`str`).""" 

173 return self._name 

174 

175 @property 

176 def dimensions(self) -> DimensionGroup | None: 

177 """Dimensions associated with lookup (`DimensionGroup`).""" 

178 return self._dimensions 

179 

180 @property 

181 def dataId(self) -> dict[str, Any] | None: 

182 """Return dict of keys/values that are important for dataId lookup. 

183 

184 (`dict` or `None`) 

185 """ 

186 if self._dataId is not None: 

187 return dict(self._dataId) 

188 else: 

189 return None 

190 

191 def __hash__(self) -> int: 

192 """Hash the lookup to allow use as a key in a dict.""" 

193 return hash((self._name, self._dimensions, self._dataId)) 

194 

195 def clone( 

196 self, 

197 name: str | None = None, 

198 dimensions: DimensionGroup | None = None, 

199 dataId: dict[str, Any] | None = None, 

200 ) -> LookupKey: 

201 """Clone the object, overriding some options. 

202 

203 Used to create a new instance of the object whilst updating 

204 some of it. 

205 

206 Parameters 

207 ---------- 

208 name : `str`, optional 

209 Primary index string for lookup. Will override ``dimensions`` 

210 if ``dimensions`` are set. 

211 dimensions : `DimensionGroup`, optional 

212 Dimensions that are relevant for lookup. Will override ``name`` 

213 if ``name`` is already set. 

214 dataId : `dict`, optional 

215 Keys and values from a dataId that should control lookups. 

216 

217 Returns 

218 ------- 

219 clone : `LookupKey` 

220 Copy with updates. 

221 """ 

222 if name is not None and dimensions is not None: 

223 raise ValueError("Both name and dimensions can not be set") 

224 

225 # if neither name nor dimensions are specified we copy from current 

226 # object. Otherwise we'll use the supplied values 

227 if name is None and dimensions is None: 

228 name = self._name 

229 dimensions = self._dimensions 

230 

231 # Make sure we use the dict form for the constructor 

232 if dataId is None and self._dataId is not None: 

233 dataId = self.dataId 

234 

235 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

236 

237 

238def processLookupConfigs( 

239 config: Config, *, allow_hierarchy: bool = False, universe: DimensionUniverse | None = None 

240) -> dict[LookupKey, str | dict[str, Any]]: 

241 """Process sections of configuration relating to lookups. 

242 

243 Can be by dataset type name, storage class name, dimensions, or values 

244 of dimensions. 

245 

246 Parameters 

247 ---------- 

248 config : `Config` 

249 A `Config` representing a configuration mapping keys to values where 

250 the keys can be dataset type names, storage class names, dimensions 

251 or dataId components. 

252 allow_hierarchy : `bool`, optional 

253 If `True`, keys that refer to a hierarchy that does not look like 

254 a DataID specification are allowed and the full hierarchy, as a dict, 

255 will be returned in the value for the lookup key. 

256 universe : `DimensionUniverse`, optional 

257 Set of all known dimensions, used to expand and validate any used 

258 in lookup keys. 

259 

260 Returns 

261 ------- 

262 contents : `dict` of `LookupKey` to `str` 

263 A `dict` with keys constructed from the configuration keys and values 

264 being simple strings. It is assumed the caller will convert the 

265 values to the required form. 

266 

267 Notes 

268 ----- 

269 The configuration is a mapping where the keys correspond to names 

270 that can refer to dataset type or storage class names, or can use a 

271 special syntax to refer to dimensions or dataId values. 

272 

273 Dimensions are indicated by using dimension names separated by a ``+``. 

274 If a single dimension is specified this is also supported so long as 

275 a ``+`` is found. Dimensions are normalized before use such that if 

276 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

277 will automatically be added. 

278 

279 DataID overrides can be specified using the form: ``field<value>`` to 

280 indicate a subhierarchy. All keys within that new hierarchy will take 

281 precedence over equivalent values in the root hierarchy. 

282 

283 Currently only a single dataId field can be specified for a key. 

284 For example with a config such as: 

285 

286 .. code-block:: yaml 

287 

288 something: 

289 calexp: value1 

290 instrument<HSC>: 

291 calexp: value2 

292 

293 Requesting the match for ``calexp`` would return ``value1`` unless 

294 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

295 and value ``HSC``. 

296 

297 The values of the mapping are stored as strings. 

298 """ 

299 contents = {} 

300 for name, value in config.items(): 

301 lookup = LookupKey(name=name, universe=universe) 

302 

303 if isinstance(value, Mapping): 

304 # indicates a dataId component -- check the format 

305 kv = DATAID_RE.match(name) 

306 if kv: 

307 dataIdKey = kv.group(1) 

308 dataIdValue = kv.group(2) 

309 for subKey, subStr in value.items(): 

310 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

311 contents[lookup] = subStr 

312 elif allow_hierarchy: 

313 if isinstance(value, Config): 

314 # Converting to a dict makes subsequent lookups much 

315 # faster. 

316 contents[lookup] = value.toDict() 

317 else: 

318 contents[lookup] = value 

319 else: 

320 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

321 else: 

322 contents[lookup] = value 

323 

324 return contents 

325 

326 

327def processLookupConfigList( 

328 config: Iterable[str | Mapping], *, universe: DimensionUniverse | None = None 

329) -> set[LookupKey]: 

330 """Process sections of configuration relating to lookups. 

331 

332 Can be by dataset type name, storage class name, dimensions, or values 

333 of dimensions. 

334 

335 Parameters 

336 ---------- 

337 config : `list` of `str` or `dict` 

338 Contents of a configuration listing keys that can be 

339 dataset type names, storage class names, dimensions 

340 or dataId components. DataId components are represented as entries 

341 in the `list` of `dicts` with a single key with a value of a `list` 

342 of new keys. 

343 universe : `DimensionUniverse`, optional 

344 Set of all known dimensions, used to expand and validate any used 

345 in lookup keys. 

346 

347 Returns 

348 ------- 

349 lookups : `set` of `LookupKey` 

350 All the entries in the input list converted to `LookupKey` and 

351 returned in a `set`. 

352 

353 Notes 

354 ----- 

355 Keys are parsed as described in `processLookupConfigs`. 

356 """ 

357 contents = set() 

358 

359 for name in config: 

360 if isinstance(name, Mapping): 

361 if len(name) != 1: 

362 raise RuntimeError(f"Config dict entry {name} has more than key present") 

363 for dataIdLookUp, subKeys in name.items(): 

364 kv = DATAID_RE.match(dataIdLookUp) 

365 if kv: 

366 dataIdKey = kv.group(1) 

367 dataIdValue = kv.group(2) 

368 for subKey in subKeys: 

369 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

370 contents.add(lookup) 

371 else: 

372 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

373 else: 

374 contents.add(LookupKey(name=name, universe=universe)) 

375 

376 return contents