Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

"""Support for configuration snippets""" 

 

__all__ = ("LookupKey", "processLookupConfigs", "normalizeLookupKeys", 

"processLookupConfigList") 

 

import logging 

import re 

from collections.abc import Mapping 

from .dimensions import DimensionNameSet, DimensionGraph 

 

log = logging.getLogger(__name__) 

 

DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$") 

"""Regex to find dataIds embedded in configurations.""" 

 

 

class LookupKey: 

"""Representation of key that can be used to lookup information based 

on dataset type name, storage class name, dimensions. 

 

Parameters 

---------- 

name : `str`, optional 

Primary index string for lookup. If this string looks like it 

represents dimensions (via ``dim1+dim2+dim3`` syntax) the name 

is converted to a `DimensionNameSet` and stored in ``dimensions`` 

property. 

dimensions : `DimensionNameSet` or `DimensionGraph`, optional 

Dimensions that are relevant for lookup. Should not be specified 

if ``name`` is also specified. 

dataId : `dict`, optional 

Keys and values from a dataId that should control lookups. 

""" 

 

def __init__(self, name=None, dimensions=None, dataId=None): 

57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true if name is None and dimensions is None: 

raise ValueError("At least one of name or dimensions must be given") 

 

60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true if name is not None and dimensions is not None: 

raise ValueError("Can only accept one of name or dimensions") 

 

self._dimensions = None 

self._name = None 

 

66 ↛ 79line 66 didn't jump to line 79, because the condition on line 66 was never false if name is not None: 

 

68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true if not isinstance(name, str): 

raise ValueError(f"Supplied name must be str not: '{name}'") 

 

71 ↛ 74line 71 didn't jump to line 74, because the condition on line 71 was never true if "+" in name: 

# If we are given a single dimension we use the "+" to 

# indicate this but have to filter out the empty value 

dimensions = [n for n in name.split("+") if n] 

self._dimensions = DimensionNameSet(dimensions) 

else: 

self._name = name 

else: 

self._dimensions = dimensions 

 

# The dataId is converted to a frozenset of key/value 

# tuples so that it is not mutable 

83 ↛ 84line 83 didn't jump to line 84, because the condition on line 83 was never true if dataId is not None: 

self._dataId = frozenset(dataId.items()) 

else: 

self._dataId = None 

 

def __str__(self): 

# For the simple case return the simple string 

if self._name: 

name = self._name 

else: 

name = "+".join(self._dimensions.names) 

 

if not self._dataId: 

return name 

 

return f"{name} ({self.dataId})" 

 

def __repr__(self): 

params = "" 

if self.name: 

params += f"name={self.name!r}," 

if self.dimensions: 

params += f"dimensions={self.dimensions!r}," 

if self._dataId: 

params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}" 

 

return f"{self.__class__.__name__}({params})" 

 

def __eq__(self, other): 

if self._name == other._name and self._dimensions == other._dimensions and \ 

self._dataId == other._dataId: 

return True 

return False 

 

@property 

def name(self): 

"""Primary name string to use as lookup. (`str`)""" 

return self._name 

 

@property 

def dimensions(self): 

"""Dimensions associated with lookup. 

(`DimensionGraph` or `DimensionNameSet`)""" 

return self._dimensions 

 

@property 

def dataId(self): 

"""Dict of keys/values that are important for dataId lookup. 

(`dict` or `None`)""" 

if self._dataId is not None: 

return {k: v for k, v in self._dataId} 

else: 

return 

 

def __hash__(self): 

"""Hash the lookup to allow use as a key in a dict.""" 

return hash((self._name, self._dimensions, self._dataId)) 

 

def clone(self, name=None, dimensions=None, dataId=None): 

"""Clone the object, overriding some options. 

 

Used to create a new instance of the object whilst updating 

some of it. 

 

Parameters 

---------- 

name : `str`, optional 

Primary index string for lookup. Will override ``dimensions`` 

if ``dimensions`` are set. 

dimensions : `DimensionNameSet`, optional 

Dimensions that are relevant for lookup. Will override ``name`` 

if ``name`` is already set. 

dataId : `dict`, optional 

Keys and values from a dataId that should control lookups. 

 

Returns 

------- 

clone : `LookupKey` 

Copy with updates. 

""" 

if name is not None and dimensions is not None: 

raise ValueError("Both name and dimensions can not be set") 

 

# if neither name nor dimensions are specified we copy from current 

# object. Otherwise we'll use the supplied values 

if name is None and dimensions is None: 

name = self._name 

dimensions = self._dimensions 

 

# Make sure we use the dict form for the constructor 

if dataId is None and self._dataId is not None: 

dataId = self.dataId 

 

return self.__class__(name=name, dimensions=dimensions, dataId=dataId) 

 

 

def normalizeLookupKeys(toUpdate, universe): 

"""Normalize dimensions used in keys of supplied dict. 

 

Parameters 

---------- 

toUpdate : `dict` with keys of `LookupKey` 

Dictionary to update. The values are reassigned to normalized 

versions of the keys. Keys are ignored that are not `LookupKey`. 

universe : `DimensionUniverse` 

The set of all known dimensions. If `None`, returns without 

action. 

 

Notes 

----- 

Goes through all keys, and for keys that include 

dimensions, rewrites those keys to use a verified set of 

dimensions. 

 

Raises 

------ 

ValueError 

Raised if a key exists where a dimension is not part of 

the ``universe``. 

""" 

if universe is None: 

return 

 

# Get the keys because we are going to change them 

allKeys = list(toUpdate.keys()) 

 

for k in allKeys: 

if not isinstance(k, LookupKey): 

continue 

if k.dimensions is not None and not isinstance(k.dimensions, DimensionGraph): 

newDimensions = universe.extract(k.dimensions) 

newKey = k.clone(dimensions=newDimensions) 

# Delete before adding the new version since LookupKeys hash 

# to the same value regardless of DimensionGraph vs 

# DimensionNameSet 

oldValue = toUpdate[k] 

del toUpdate[k] 

toUpdate[newKey] = oldValue 

 

 

def processLookupConfigs(config): 

"""Process sections of configuration relating to lookups by dataset type 

name, storage class name, dimensions, or values of dimensions. 

 

Parameters 

---------- 

config : `Config` 

A `Config` representing a configuration mapping keys to values where 

the keys can be dataset type names, storage class names, dimensions 

or dataId components. 

 

Returns 

------- 

contents : `dict` of `LookupKey` to `str` 

A `dict` with keys constructed from the configuration keys and values 

being simple strings. It is assumed the caller will convert the 

values to the required form. 

 

Notes 

----- 

The configuration is a mapping where the keys correspond to names 

that can refer to dataset type or storage class names, or can use a 

special syntax to refer to dimensions or dataId values. 

 

Dimensions are indicated by using dimension names separated by a ``+``. 

If a single dimension is specified this is also supported so long as 

a ``+`` is found. Dimensions are normalized before use such that if 

``physical_filter+visit`` is defined, then an implicit ``instrument`` 

will automatically be added. 

 

DataID overrides can be specified using the form: ``field<value>`` to 

indicate a subhierarchy. All keys within that new hierarchy will take 

precedence over equivalent values in the root hierarchy. 

 

Currently only a single dataId field can be specified for a key. 

For example with a config such as: 

 

.. code:: 

 

something: 

calexp: value1 

instrument<HSC>: 

calexp: value2 

 

Requesting the match for ``calexp`` would return ``value1`` unless 

a `DatasetRef` is used with a dataId containing the key ``instrument`` 

and value ``HSC``. 

 

The values of the mapping are stored as strings. 

""" 

contents = {} 

for name, value in config.items(): 

if isinstance(value, Mapping): 

# indicates a dataId component -- check the format 

kv = DATAID_RE.match(name) 

if kv: 

dataIdKey = kv.group(1) 

dataIdValue = kv.group(2) 

for subKey, subStr in value.items(): 

lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}) 

contents[lookup] = subStr 

else: 

raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

else: 

lookup = LookupKey(name=name) 

contents[lookup] = value 

 

return contents 

 

 

def processLookupConfigList(config): 

"""Process sections of configuration relating to lookups by dataset type 

name, storage class name, dimensions, or values of dimensions. 

 

Parameters 

---------- 

config : `list` of `str` or `dict` 

Contents a configuration listing keys that can be 

dataset type names, storage class names, dimensions 

or dataId components. DataId components are represented as entries 

in the `list` of `dicts` with a single key with a value of a `list` 

of new keys. 

 

Returns 

------- 

lookups : `set` of `LookupKey` 

All the entries in the input list converted to `LookupKey` and 

returned in a `set`. 

 

Notes 

----- 

Keys are parsed as described in `processLookupConfigs`. 

""" 

contents = set() 

 

for name in config: 

if isinstance(name, Mapping): 

if len(name) != 1: 

raise RuntimeError(f"Config dict entry {name} has more than key present") 

for dataIdLookUp, subKeys in name.items(): 

kv = DATAID_RE.match(dataIdLookUp) 

if kv: 

dataIdKey = kv.group(1) 

dataIdValue = kv.group(2) 

for subKey in subKeys: 

lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}) 

contents.add(lookup) 

else: 

raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'") 

else: 

contents.add(LookupKey(name=name)) 

 

return contents