Coverage for python/lsst/daf/butler/core/configSupport.py: 21%

121 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-23 09:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for configuration snippets""" 

25 

26__all__ = ("LookupKey", "processLookupConfigs", "processLookupConfigList") 

27 

28import logging 

29import re 

30from collections.abc import Iterable, Mapping 

31from typing import TYPE_CHECKING, Any 

32 

33from .dimensions import DimensionGraph 

34 

35if TYPE_CHECKING: 

36 from .config import Config 

37 from .dimensions import Dimension, DimensionUniverse 

38 

39log = logging.getLogger(__name__) 

40 

41DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

42"""Regex to find dataIds embedded in configurations.""" 

43 

44 

45class LookupKey: 

46 """Representation of key that can be used to lookup information. 

47 

48 Look up is based on dataset type name, storage class name, dimensions. 

49 

50 Parameters 

51 ---------- 

52 name : `str`, optional 

53 Primary index string for lookup. If this string looks like it 

54 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

55 is converted to a `DimensionGraph` and stored in ``dimensions`` 

56 property. 

57 dimensions : `DimensionGraph`, optional 

58 Dimensions that are relevant for lookup. Should not be specified 

59 if ``name`` is also specified. 

60 dataId : `dict`, optional 

61 Keys and values from a dataId that should control lookups. 

62 universe : `DimensionUniverse`, optional 

63 Set of all known dimensions, used to expand and validate ``name`` or 

64 ``dimensions``. Required if the key represents dimensions and a 

65 full `DimensionGraph` is not provided. 

66 """ 

67 

68 def __init__( 

69 self, 

70 name: str | None = None, 

71 dimensions: Iterable[str | Dimension] | None = None, 

72 dataId: dict[str, Any] | None = None, 

73 *, 

74 universe: DimensionUniverse | None = None, 

75 ): 

76 if name is None and dimensions is None: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true

77 raise ValueError("At least one of name or dimensions must be given") 

78 

79 if name is not None and dimensions is not None: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 raise ValueError("Can only accept one of name or dimensions") 

81 

82 self._dimensions = None 

83 self._name = None 

84 

85 if name is not None: 85 ↛ 118line 85 didn't jump to line 118, because the condition on line 85 was never false

86 if not isinstance(name, str): 86 ↛ 87line 86 didn't jump to line 87, because the condition on line 86 was never true

87 raise ValueError(f"Supplied name must be str not: '{name}'") 

88 

89 if "+" in name: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true

90 if universe is None: 

91 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

92 

93 # If we are given a single dimension we use the "+" to 

94 # indicate this but have to filter out the empty value 

95 dimension_names = [n for n in name.split("+") if n] 

96 try: 

97 self._dimensions = universe.extract(dimension_names) 

98 except KeyError: 

99 # One or more of the dimensions is not known to the 

100 # universe. This could be a typo or it could be that 

101 # a config is being used that is not compatible with 

102 # this universe. Use the name directly as a lookup key 

103 # but issue a warning. This will be potentially annoying 

104 # in the scenario where a lookup key comes from a 

105 # default config but the users are using an external 

106 # universe. 

107 unknown = [name for name in dimension_names if universe.get(name) is None] 

108 log.debug( 

109 "A LookupKey '%s' uses unknown dimensions: %s. Possible typo?" 

110 " Using the name explicitly.", 

111 name, 

112 unknown, 

113 ) 

114 self._name = name 

115 else: 

116 self._name = name 

117 

118 elif dimensions is not None: 

119 if not isinstance(dimensions, DimensionGraph): 

120 if universe is None: 

121 raise ValueError( 

122 f"Cannot construct LookupKey for dimensions={dimensions} without universe." 

123 ) 

124 else: 

125 self._dimensions = universe.extract(dimensions) 

126 else: 

127 self._dimensions = dimensions 

128 else: 

129 # mypy cannot work this out on its own 

130 raise ValueError("Name was None but dimensions is also None") 

131 

132 # The dataId is converted to a frozenset of key/value 

133 # tuples so that it is not mutable 

134 self._dataId = frozenset(dataId.items()) if dataId is not None else None 

135 

136 def __str__(self) -> str: 

137 # For the simple case return the simple string 

138 if self._name: 

139 name = self._name 

140 elif self._dimensions is not None: 

141 name = "+".join(self._dimensions.names) 

142 else: 

143 raise RuntimeError("Internal error since name and dimensions are both None") 

144 

145 if not self._dataId: 

146 return name 

147 

148 return f"{name} ({self.dataId})" 

149 

150 def __repr__(self) -> str: 

151 params = "" 

152 if self.name: 

153 params += f"name={self.name!r}," 

154 if self.dimensions: 

155 params += f"dimensions={self.dimensions!r}," 

156 if self._dataId: 

157 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

158 

159 return f"{self.__class__.__name__}({params})" 

160 

161 def __eq__(self, other: Any) -> bool: 

162 if not isinstance(other, type(self)): 

163 return False 

164 if ( 

165 self._name == other._name 

166 and self._dimensions == other._dimensions 

167 and self._dataId == other._dataId 

168 ): 

169 return True 

170 return False 

171 

172 @property 

173 def name(self) -> str | None: 

174 """Primary name string to use as lookup (`str`).""" 

175 return self._name 

176 

177 @property 

178 def dimensions(self) -> DimensionGraph | None: 

179 """Dimensions associated with lookup (`DimensionGraph`).""" 

180 return self._dimensions 

181 

182 @property 

183 def dataId(self) -> dict[str, Any] | None: 

184 """Return dict of keys/values that are important for dataId lookup. 

185 

186 (`dict` or `None`) 

187 """ 

188 if self._dataId is not None: 

189 return {k: v for k, v in self._dataId} 

190 else: 

191 return None 

192 

193 def __hash__(self) -> int: 

194 """Hash the lookup to allow use as a key in a dict.""" 

195 return hash((self._name, self._dimensions, self._dataId)) 

196 

197 def clone( 

198 self, 

199 name: str | None = None, 

200 dimensions: DimensionGraph | None = None, 

201 dataId: dict[str, Any] | None = None, 

202 ) -> LookupKey: 

203 """Clone the object, overriding some options. 

204 

205 Used to create a new instance of the object whilst updating 

206 some of it. 

207 

208 Parameters 

209 ---------- 

210 name : `str`, optional 

211 Primary index string for lookup. Will override ``dimensions`` 

212 if ``dimensions`` are set. 

213 dimensions : `DimensionGraph`, optional 

214 Dimensions that are relevant for lookup. Will override ``name`` 

215 if ``name`` is already set. 

216 dataId : `dict`, optional 

217 Keys and values from a dataId that should control lookups. 

218 

219 Returns 

220 ------- 

221 clone : `LookupKey` 

222 Copy with updates. 

223 """ 

224 if name is not None and dimensions is not None: 

225 raise ValueError("Both name and dimensions can not be set") 

226 

227 # if neither name nor dimensions are specified we copy from current 

228 # object. Otherwise we'll use the supplied values 

229 if name is None and dimensions is None: 

230 name = self._name 

231 dimensions = self._dimensions 

232 

233 # Make sure we use the dict form for the constructor 

234 if dataId is None and self._dataId is not None: 

235 dataId = self.dataId 

236 

237 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

238 

239 

240def processLookupConfigs( 

241 config: Config, *, allow_hierarchy: bool = False, universe: DimensionUniverse | None = None 

242) -> dict[LookupKey, str | dict[str, Any]]: 

243 """Process sections of configuration relating to lookups. 

244 

245 Can be by dataset type name, storage class name, dimensions, or values 

246 of dimensions. 

247 

248 Parameters 

249 ---------- 

250 config : `Config` 

251 A `Config` representing a configuration mapping keys to values where 

252 the keys can be dataset type names, storage class names, dimensions 

253 or dataId components. 

254 allow_hierarchy : `bool`, optional 

255 If `True`, keys that refer to a hierarchy that does not look like 

256 a DataID specification are allowed and the full hierarchy, as a dict, 

257 will be returned in the value for the lookup key. 

258 universe : `DimensionUniverse`, optional 

259 Set of all known dimensions, used to expand and validate any used 

260 in lookup keys. 

261 

262 Returns 

263 ------- 

264 contents : `dict` of `LookupKey` to `str` 

265 A `dict` with keys constructed from the configuration keys and values 

266 being simple strings. It is assumed the caller will convert the 

267 values to the required form. 

268 

269 Notes 

270 ----- 

271 The configuration is a mapping where the keys correspond to names 

272 that can refer to dataset type or storage class names, or can use a 

273 special syntax to refer to dimensions or dataId values. 

274 

275 Dimensions are indicated by using dimension names separated by a ``+``. 

276 If a single dimension is specified this is also supported so long as 

277 a ``+`` is found. Dimensions are normalized before use such that if 

278 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

279 will automatically be added. 

280 

281 DataID overrides can be specified using the form: ``field<value>`` to 

282 indicate a subhierarchy. All keys within that new hierarchy will take 

283 precedence over equivalent values in the root hierarchy. 

284 

285 Currently only a single dataId field can be specified for a key. 

286 For example with a config such as: 

287 

288 .. code-block:: yaml 

289 

290 something: 

291 calexp: value1 

292 instrument<HSC>: 

293 calexp: value2 

294 

295 Requesting the match for ``calexp`` would return ``value1`` unless 

296 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

297 and value ``HSC``. 

298 

299 The values of the mapping are stored as strings. 

300 """ 

301 contents = {} 

302 for name, value in config.items(): 

303 lookup = LookupKey(name=name, universe=universe) 

304 

305 if isinstance(value, Mapping): 

306 # indicates a dataId component -- check the format 

307 kv = DATAID_RE.match(name) 

308 if kv: 

309 dataIdKey = kv.group(1) 

310 dataIdValue = kv.group(2) 

311 for subKey, subStr in value.items(): 

312 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

313 contents[lookup] = subStr 

314 elif allow_hierarchy: 

315 contents[lookup] = value 

316 else: 

317 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

318 else: 

319 contents[lookup] = value 

320 

321 return contents 

322 

323 

324def processLookupConfigList( 

325 config: Iterable[str | Mapping], *, universe: DimensionUniverse | None = None 

326) -> set[LookupKey]: 

327 """Process sections of configuration relating to lookups. 

328 

329 Can be by dataset type name, storage class name, dimensions, or values 

330 of dimensions. 

331 

332 Parameters 

333 ---------- 

334 config : `list` of `str` or `dict` 

335 Contents of a configuration listing keys that can be 

336 dataset type names, storage class names, dimensions 

337 or dataId components. DataId components are represented as entries 

338 in the `list` of `dicts` with a single key with a value of a `list` 

339 of new keys. 

340 universe : `DimensionUniverse`, optional 

341 Set of all known dimensions, used to expand and validate any used 

342 in lookup keys. 

343 

344 Returns 

345 ------- 

346 lookups : `set` of `LookupKey` 

347 All the entries in the input list converted to `LookupKey` and 

348 returned in a `set`. 

349 

350 Notes 

351 ----- 

352 Keys are parsed as described in `processLookupConfigs`. 

353 """ 

354 contents = set() 

355 

356 for name in config: 

357 if isinstance(name, Mapping): 

358 if len(name) != 1: 

359 raise RuntimeError(f"Config dict entry {name} has more than key present") 

360 for dataIdLookUp, subKeys in name.items(): 

361 kv = DATAID_RE.match(dataIdLookUp) 

362 if kv: 

363 dataIdKey = kv.group(1) 

364 dataIdValue = kv.group(2) 

365 for subKey in subKeys: 

366 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

367 contents.add(lookup) 

368 else: 

369 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

370 else: 

371 contents.add(LookupKey(name=name, universe=universe)) 

372 

373 return contents