Coverage for python/lsst/daf/butler/core/configSupport.py: 22%

124 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-31 10:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for configuration snippets""" 

25 

26__all__ = ("LookupKey", "processLookupConfigs", "processLookupConfigList") 

27 

28import logging 

29import re 

30from collections.abc import Mapping 

31from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Set, Union 

32 

33from .dimensions import DimensionGraph 

34 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true

36 from .config import Config 

37 from .dimensions import Dimension, DimensionUniverse 

38 

39log = logging.getLogger(__name__) 

40 

41DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

42"""Regex to find dataIds embedded in configurations.""" 

43 

44 

45class LookupKey: 

46 """Representation of key that can be used to lookup information. 

47 

48 Look up is based on dataset type name, storage class name, dimensions. 

49 

50 Parameters 

51 ---------- 

52 name : `str`, optional 

53 Primary index string for lookup. If this string looks like it 

54 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

55 is converted to a `DimensionGraph` and stored in ``dimensions`` 

56 property. 

57 dimensions : `DimensionGraph`, optional 

58 Dimensions that are relevant for lookup. Should not be specified 

59 if ``name`` is also specified. 

60 dataId : `dict`, optional 

61 Keys and values from a dataId that should control lookups. 

62 universe : `DimensionUniverse`, optional 

63 Set of all known dimensions, used to expand and validate ``name`` or 

64 ``dimensions``. Required if the key represents dimensions and a 

65 full `DimensionGraph` is not provided. 

66 """ 

67 

68 def __init__( 

69 self, 

70 name: Optional[str] = None, 

71 dimensions: Optional[Iterable[Union[str, Dimension]]] = None, 

72 dataId: Optional[Dict[str, Any]] = None, 

73 *, 

74 universe: Optional[DimensionUniverse] = None, 

75 ): 

76 if name is None and dimensions is None: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true

77 raise ValueError("At least one of name or dimensions must be given") 

78 

79 if name is not None and dimensions is not None: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 raise ValueError("Can only accept one of name or dimensions") 

81 

82 self._dimensions = None 

83 self._name = None 

84 

85 if name is not None: 85 ↛ 119line 85 didn't jump to line 119, because the condition on line 85 was never false

86 

87 if not isinstance(name, str): 87 ↛ 88line 87 didn't jump to line 88, because the condition on line 87 was never true

88 raise ValueError(f"Supplied name must be str not: '{name}'") 

89 

90 if "+" in name: 90 ↛ 91line 90 didn't jump to line 91, because the condition on line 90 was never true

91 if universe is None: 

92 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

93 

94 # If we are given a single dimension we use the "+" to 

95 # indicate this but have to filter out the empty value 

96 dimension_names = [n for n in name.split("+") if n] 

97 try: 

98 self._dimensions = universe.extract(dimension_names) 

99 except KeyError: 

100 # One or more of the dimensions is not known to the 

101 # universe. This could be a typo or it could be that 

102 # a config is being used that is not compatible with 

103 # this universe. Use the name directly as a lookup key 

104 # but issue a warning. This will be potentially annoying 

105 # in the scenario where a lookup key comes from a 

106 # default config but the users are using an external 

107 # universe. 

108 unknown = [name for name in dimension_names if universe.get(name) is None] 

109 log.debug( 

110 "A LookupKey '%s' uses unknown dimensions: %s. Possible typo?" 

111 " Using the name explicitly.", 

112 name, 

113 unknown, 

114 ) 

115 self._name = name 

116 else: 

117 self._name = name 

118 

119 elif dimensions is not None: 

120 if not isinstance(dimensions, DimensionGraph): 

121 if universe is None: 

122 raise ValueError( 

123 f"Cannot construct LookupKey for dimensions={dimensions} without universe." 

124 ) 

125 else: 

126 self._dimensions = universe.extract(dimensions) 

127 else: 

128 self._dimensions = dimensions 

129 else: 

130 # mypy cannot work this out on its own 

131 raise ValueError("Name was None but dimensions is also None") 

132 

133 # The dataId is converted to a frozenset of key/value 

134 # tuples so that it is not mutable 

135 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

136 

137 def __str__(self) -> str: 

138 # For the simple case return the simple string 

139 if self._name: 

140 name = self._name 

141 elif self._dimensions is not None: 

142 name = "+".join(self._dimensions.names) 

143 else: 

144 raise RuntimeError("Internal error since name and dimensions are both None") 

145 

146 if not self._dataId: 

147 return name 

148 

149 return f"{name} ({self.dataId})" 

150 

151 def __repr__(self) -> str: 

152 params = "" 

153 if self.name: 

154 params += f"name={self.name!r}," 

155 if self.dimensions: 

156 params += f"dimensions={self.dimensions!r}," 

157 if self._dataId: 

158 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

159 

160 return f"{self.__class__.__name__}({params})" 

161 

162 def __eq__(self, other: Any) -> bool: 

163 if not isinstance(other, type(self)): 

164 return False 

165 if ( 

166 self._name == other._name 

167 and self._dimensions == other._dimensions 

168 and self._dataId == other._dataId 

169 ): 

170 return True 

171 return False 

172 

173 @property 

174 def name(self) -> Optional[str]: 

175 """Primary name string to use as lookup (`str`).""" 

176 return self._name 

177 

178 @property 

179 def dimensions(self) -> Optional[DimensionGraph]: 

180 """Dimensions associated with lookup (`DimensionGraph`).""" 

181 return self._dimensions 

182 

183 @property 

184 def dataId(self) -> Optional[Dict[str, Any]]: 

185 """Return dict of keys/values that are important for dataId lookup. 

186 

187 (`dict` or `None`) 

188 """ 

189 if self._dataId is not None: 

190 return {k: v for k, v in self._dataId} 

191 else: 

192 return None 

193 

194 def __hash__(self) -> int: 

195 """Hash the lookup to allow use as a key in a dict.""" 

196 return hash((self._name, self._dimensions, self._dataId)) 

197 

198 def clone( 

199 self, 

200 name: Optional[str] = None, 

201 dimensions: Optional[DimensionGraph] = None, 

202 dataId: Optional[Dict[str, Any]] = None, 

203 ) -> LookupKey: 

204 """Clone the object, overriding some options. 

205 

206 Used to create a new instance of the object whilst updating 

207 some of it. 

208 

209 Parameters 

210 ---------- 

211 name : `str`, optional 

212 Primary index string for lookup. Will override ``dimensions`` 

213 if ``dimensions`` are set. 

214 dimensions : `DimensionGraph`, optional 

215 Dimensions that are relevant for lookup. Will override ``name`` 

216 if ``name`` is already set. 

217 dataId : `dict`, optional 

218 Keys and values from a dataId that should control lookups. 

219 

220 Returns 

221 ------- 

222 clone : `LookupKey` 

223 Copy with updates. 

224 """ 

225 if name is not None and dimensions is not None: 

226 raise ValueError("Both name and dimensions can not be set") 

227 

228 # if neither name nor dimensions are specified we copy from current 

229 # object. Otherwise we'll use the supplied values 

230 if name is None and dimensions is None: 

231 name = self._name 

232 dimensions = self._dimensions 

233 

234 # Make sure we use the dict form for the constructor 

235 if dataId is None and self._dataId is not None: 

236 dataId = self.dataId 

237 

238 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

239 

240 

241def processLookupConfigs( 

242 config: Config, *, allow_hierarchy: bool = False, universe: Optional[DimensionUniverse] = None 

243) -> Dict[LookupKey, Union[str, Dict[str, Any]]]: 

244 """Process sections of configuration relating to lookups. 

245 

246 Can be by dataset type name, storage class name, dimensions, or values 

247 of dimensions. 

248 

249 Parameters 

250 ---------- 

251 config : `Config` 

252 A `Config` representing a configuration mapping keys to values where 

253 the keys can be dataset type names, storage class names, dimensions 

254 or dataId components. 

255 allow_hierarchy : `bool`, optional 

256 If `True`, keys that refer to a hierarchy that does not look like 

257 a DataID specification are allowed and the full hierarchy, as a dict, 

258 will be returned in the value for the lookup key. 

259 universe : `DimensionUniverse`, optional 

260 Set of all known dimensions, used to expand and validate any used 

261 in lookup keys. 

262 

263 Returns 

264 ------- 

265 contents : `dict` of `LookupKey` to `str` 

266 A `dict` with keys constructed from the configuration keys and values 

267 being simple strings. It is assumed the caller will convert the 

268 values to the required form. 

269 

270 Notes 

271 ----- 

272 The configuration is a mapping where the keys correspond to names 

273 that can refer to dataset type or storage class names, or can use a 

274 special syntax to refer to dimensions or dataId values. 

275 

276 Dimensions are indicated by using dimension names separated by a ``+``. 

277 If a single dimension is specified this is also supported so long as 

278 a ``+`` is found. Dimensions are normalized before use such that if 

279 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

280 will automatically be added. 

281 

282 DataID overrides can be specified using the form: ``field<value>`` to 

283 indicate a subhierarchy. All keys within that new hierarchy will take 

284 precedence over equivalent values in the root hierarchy. 

285 

286 Currently only a single dataId field can be specified for a key. 

287 For example with a config such as: 

288 

289 .. code-block:: yaml 

290 

291 something: 

292 calexp: value1 

293 instrument<HSC>: 

294 calexp: value2 

295 

296 Requesting the match for ``calexp`` would return ``value1`` unless 

297 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

298 and value ``HSC``. 

299 

300 The values of the mapping are stored as strings. 

301 """ 

302 contents = {} 

303 for name, value in config.items(): 

304 lookup = LookupKey(name=name, universe=universe) 

305 

306 if isinstance(value, Mapping): 

307 # indicates a dataId component -- check the format 

308 kv = DATAID_RE.match(name) 

309 if kv: 

310 dataIdKey = kv.group(1) 

311 dataIdValue = kv.group(2) 

312 for subKey, subStr in value.items(): 

313 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

314 contents[lookup] = subStr 

315 elif allow_hierarchy: 

316 contents[lookup] = value 

317 else: 

318 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

319 else: 

320 contents[lookup] = value 

321 

322 return contents 

323 

324 

325def processLookupConfigList( 

326 config: Iterable[Union[str, Mapping]], *, universe: Optional[DimensionUniverse] = None 

327) -> Set[LookupKey]: 

328 """Process sections of configuration relating to lookups. 

329 

330 Can be by dataset type name, storage class name, dimensions, or values 

331 of dimensions. 

332 

333 Parameters 

334 ---------- 

335 config : `list` of `str` or `dict` 

336 Contents of a configuration listing keys that can be 

337 dataset type names, storage class names, dimensions 

338 or dataId components. DataId components are represented as entries 

339 in the `list` of `dicts` with a single key with a value of a `list` 

340 of new keys. 

341 universe : `DimensionUniverse`, optional 

342 Set of all known dimensions, used to expand and validate any used 

343 in lookup keys. 

344 

345 Returns 

346 ------- 

347 lookups : `set` of `LookupKey` 

348 All the entries in the input list converted to `LookupKey` and 

349 returned in a `set`. 

350 

351 Notes 

352 ----- 

353 Keys are parsed as described in `processLookupConfigs`. 

354 """ 

355 contents = set() 

356 

357 for name in config: 

358 if isinstance(name, Mapping): 

359 if len(name) != 1: 

360 raise RuntimeError(f"Config dict entry {name} has more than key present") 

361 for dataIdLookUp, subKeys in name.items(): 

362 kv = DATAID_RE.match(dataIdLookUp) 

363 if kv: 

364 dataIdKey = kv.group(1) 

365 dataIdValue = kv.group(2) 

366 for subKey in subKeys: 

367 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

368 contents.add(lookup) 

369 else: 

370 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

371 else: 

372 contents.add(LookupKey(name=name, universe=universe)) 

373 

374 return contents