Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for configuration snippets""" 

23 

24__all__ = ("LookupKey", "processLookupConfigs", 

25 "processLookupConfigList") 

26 

27import logging 

28import re 

29from collections.abc import Mapping 

30from .dimensions import DimensionGraph 

31 

32log = logging.getLogger(__name__) 

33 

34DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

35"""Regex to find dataIds embedded in configurations.""" 

36 

37 

38class LookupKey: 

39 """Representation of key that can be used to lookup information based 

40 on dataset type name, storage class name, dimensions. 

41 

42 Parameters 

43 ---------- 

44 name : `str`, optional 

45 Primary index string for lookup. If this string looks like it 

46 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

47 is converted to a `DimensionGraph` and stored in ``dimensions`` 

48 property. 

49 dimensions : `DimensionGraph`, optional 

50 Dimensions that are relevant for lookup. Should not be specified 

51 if ``name`` is also specified. 

52 dataId : `dict`, optional 

53 Keys and values from a dataId that should control lookups. 

54 universe : `DimensionUniverse`, optional 

55 Set of all known dimensions, used to expand and validate ``name`` or 

56 ``dimensions``. Required if the key represents dimensions and a 

57 full `DimensionGraph` is not provided. 

58 """ 

59 

60 def __init__(self, name=None, dimensions=None, dataId=None, *, universe=None): 

61 if name is None and dimensions is None: 61 ↛ 62line 61 didn't jump to line 62, because the condition on line 61 was never true

62 raise ValueError("At least one of name or dimensions must be given") 

63 

64 if name is not None and dimensions is not None: 64 ↛ 65line 64 didn't jump to line 65, because the condition on line 64 was never true

65 raise ValueError("Can only accept one of name or dimensions") 

66 

67 self._dimensions = None 

68 self._name = None 

69 

70 if name is not None: 70 ↛ 86line 70 didn't jump to line 86, because the condition on line 70 was never false

71 

72 if not isinstance(name, str): 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true

73 raise ValueError(f"Supplied name must be str not: '{name}'") 

74 

75 if "+" in name: 75 ↛ 78line 75 didn't jump to line 78, because the condition on line 75 was never true

76 # If we are given a single dimension we use the "+" to 

77 # indicate this but have to filter out the empty value 

78 dimensions = [n for n in name.split("+") if n] 

79 if universe is None: 

80 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.") 

81 else: 

82 self._dimensions = universe.extract(dimensions) 

83 else: 

84 self._name = name 

85 else: 

86 if not isinstance(dimensions, DimensionGraph): 

87 if universe is None: 

88 raise ValueError(f"Cannot construct LookupKey for dimensions={dimensions} " 

89 f"without universe.") 

90 else: 

91 self._dimensions = universe.extract(dimensions) 

92 else: 

93 self._dimensions = dimensions 

94 

95 # The dataId is converted to a frozenset of key/value 

96 # tuples so that it is not mutable 

97 if dataId is not None: 97 ↛ 98line 97 didn't jump to line 98, because the condition on line 97 was never true

98 self._dataId = frozenset(dataId.items()) 

99 else: 

100 self._dataId = None 

101 

102 def __str__(self): 

103 # For the simple case return the simple string 

104 if self._name: 

105 name = self._name 

106 else: 

107 name = "+".join(self._dimensions.names) 

108 

109 if not self._dataId: 

110 return name 

111 

112 return f"{name} ({self.dataId})" 

113 

114 def __repr__(self): 

115 params = "" 

116 if self.name: 

117 params += f"name={self.name!r}," 

118 if self.dimensions: 

119 params += f"dimensions={self.dimensions!r}," 

120 if self._dataId: 

121 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

122 

123 return f"{self.__class__.__name__}({params})" 

124 

125 def __eq__(self, other): 

126 if self._name == other._name and self._dimensions == other._dimensions and \ 

127 self._dataId == other._dataId: 

128 return True 

129 return False 

130 

131 @property 

132 def name(self): 

133 """Primary name string to use as lookup. (`str`)""" 

134 return self._name 

135 

136 @property 

137 def dimensions(self): 

138 """Dimensions associated with lookup. (`DimensionGraph`)""" 

139 return self._dimensions 

140 

141 @property 

142 def dataId(self): 

143 """Dict of keys/values that are important for dataId lookup. 

144 (`dict` or `None`)""" 

145 if self._dataId is not None: 

146 return {k: v for k, v in self._dataId} 

147 else: 

148 return 

149 

150 def __hash__(self): 

151 """Hash the lookup to allow use as a key in a dict.""" 

152 return hash((self._name, self._dimensions, self._dataId)) 

153 

154 def clone(self, name=None, dimensions=None, dataId=None): 

155 """Clone the object, overriding some options. 

156 

157 Used to create a new instance of the object whilst updating 

158 some of it. 

159 

160 Parameters 

161 ---------- 

162 name : `str`, optional 

163 Primary index string for lookup. Will override ``dimensions`` 

164 if ``dimensions`` are set. 

165 dimensions : `DimensionGraph`, optional 

166 Dimensions that are relevant for lookup. Will override ``name`` 

167 if ``name`` is already set. 

168 dataId : `dict`, optional 

169 Keys and values from a dataId that should control lookups. 

170 

171 Returns 

172 ------- 

173 clone : `LookupKey` 

174 Copy with updates. 

175 """ 

176 if name is not None and dimensions is not None: 

177 raise ValueError("Both name and dimensions can not be set") 

178 

179 # if neither name nor dimensions are specified we copy from current 

180 # object. Otherwise we'll use the supplied values 

181 if name is None and dimensions is None: 

182 name = self._name 

183 dimensions = self._dimensions 

184 

185 # Make sure we use the dict form for the constructor 

186 if dataId is None and self._dataId is not None: 

187 dataId = self.dataId 

188 

189 return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

190 

191 

192def processLookupConfigs(config, *, universe=None): 

193 """Process sections of configuration relating to lookups by dataset type 

194 name, storage class name, dimensions, or values of dimensions. 

195 

196 Parameters 

197 ---------- 

198 config : `Config` 

199 A `Config` representing a configuration mapping keys to values where 

200 the keys can be dataset type names, storage class names, dimensions 

201 or dataId components. 

202 universe : `DimensionUniverse`, optional 

203 Set of all known dimensions, used to expand and validate any used 

204 in lookup keys. 

205 

206 Returns 

207 ------- 

208 contents : `dict` of `LookupKey` to `str` 

209 A `dict` with keys constructed from the configuration keys and values 

210 being simple strings. It is assumed the caller will convert the 

211 values to the required form. 

212 

213 Notes 

214 ----- 

215 The configuration is a mapping where the keys correspond to names 

216 that can refer to dataset type or storage class names, or can use a 

217 special syntax to refer to dimensions or dataId values. 

218 

219 Dimensions are indicated by using dimension names separated by a ``+``. 

220 If a single dimension is specified this is also supported so long as 

221 a ``+`` is found. Dimensions are normalized before use such that if 

222 ``physical_filter+visit`` is defined, then an implicit ``instrument`` 

223 will automatically be added. 

224 

225 DataID overrides can be specified using the form: ``field<value>`` to 

226 indicate a subhierarchy. All keys within that new hierarchy will take 

227 precedence over equivalent values in the root hierarchy. 

228 

229 Currently only a single dataId field can be specified for a key. 

230 For example with a config such as: 

231 

232 .. code:: 

233 

234 something: 

235 calexp: value1 

236 instrument<HSC>: 

237 calexp: value2 

238 

239 Requesting the match for ``calexp`` would return ``value1`` unless 

240 a `DatasetRef` is used with a dataId containing the key ``instrument`` 

241 and value ``HSC``. 

242 

243 The values of the mapping are stored as strings. 

244 """ 

245 contents = {} 

246 for name, value in config.items(): 

247 if isinstance(value, Mapping): 

248 # indicates a dataId component -- check the format 

249 kv = DATAID_RE.match(name) 

250 if kv: 

251 dataIdKey = kv.group(1) 

252 dataIdValue = kv.group(2) 

253 for subKey, subStr in value.items(): 

254 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

255 contents[lookup] = subStr 

256 else: 

257 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

258 else: 

259 lookup = LookupKey(name=name, universe=universe) 

260 contents[lookup] = value 

261 

262 return contents 

263 

264 

265def processLookupConfigList(config, *, universe=None): 

266 """Process sections of configuration relating to lookups by dataset type 

267 name, storage class name, dimensions, or values of dimensions. 

268 

269 Parameters 

270 ---------- 

271 config : `list` of `str` or `dict` 

272 Contents a configuration listing keys that can be 

273 dataset type names, storage class names, dimensions 

274 or dataId components. DataId components are represented as entries 

275 in the `list` of `dicts` with a single key with a value of a `list` 

276 of new keys. 

277 universe : `DimensionUniverse`, optional 

278 Set of all known dimensions, used to expand and validate any used 

279 in lookup keys. 

280 

281 Returns 

282 ------- 

283 lookups : `set` of `LookupKey` 

284 All the entries in the input list converted to `LookupKey` and 

285 returned in a `set`. 

286 

287 Notes 

288 ----- 

289 Keys are parsed as described in `processLookupConfigs`. 

290 """ 

291 contents = set() 

292 

293 for name in config: 

294 if isinstance(name, Mapping): 

295 if len(name) != 1: 

296 raise RuntimeError(f"Config dict entry {name} has more than key present") 

297 for dataIdLookUp, subKeys in name.items(): 

298 kv = DATAID_RE.match(dataIdLookUp) 

299 if kv: 

300 dataIdKey = kv.group(1) 

301 dataIdValue = kv.group(2) 

302 for subKey in subKeys: 

303 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe) 

304 contents.add(lookup) 

305 else: 

306 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

307 else: 

308 contents.add(LookupKey(name=name, universe=universe)) 

309 

310 return contents