Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

__all__ = ("DataId",) 

 

import itertools 

from collections.abc import Mapping 

from .graph import DimensionGraph 

from .elements import Dimension 

 

 

class DataId(Mapping): 

r"""A dict-like identifier for data usable across multiple collections 

and `DatasetType`\s. 

 

Parameters 

---------- 

dataId : `dict` or `DataId` 

A `dict`-like object containing `Dimension` links. If this is a true 

`DataId` and the set of dimensions identified does not change, this 

object will be updated in-place and returned instead of a new instance. 

dimensions : iterable of `Dimension` or `str`, optional 

The set of dimensions the `DataId` will identify, either as `Dimension` 

instances or string names thereof. 

dimension : `Dimension` or `str`, optional 

The single dimension this `DataId` will identify (along with all of 

its required dependencies). 

universe : `DimensionGraph`, optional 

A graph containing all known dimensions and joins. Must be provided 

if names are passed instead of `Dimension` instances in ``dimensions`` 

or ``dimension``, or when dimensions are inferred from the provided 

link keys. 

region : `lsst.sphgeom.ConvexPolygon`, optional 

Spatial region on the sky associated with this combination of 

dimension entries. 

entries : `dict`, optional 

A nested dictionary of additional metadata column values associated 

with these dimensions, with `DimensionElement` instances or `str` 

names as the outer keys, `str` column names as inner keys, and 

column values as inner dictionary values. 

extra : `dict`, optional 

Additional key-value pairs to update ``dataId`` with. 

kwds : `dict`, optional 

Additional key-value pairs to update ``dataId`` with. 

 

Notes 

----- 

The keys of a `DataId` correspond to the "link" columns of one or more 

`Dimension`\s, while values identify particular rows in the tables or 

views for those `Dimension`\s. In addition to implementing the 

(immutable) `collections.abc.Mapping` API, `DataId`\s have additional 

attributes to hold additional metadata, regions, and definitions for those 

`Dimension`\s. They are also hashable, and hence can be used as keys in 

dictionaries. 

 

The `DataId` class represents a complete ID that has either been obtained 

from or validated with the set of known `Dimension`\s. Regular `dict`\s 

are typically used to represent conceptual data IDs that have not been 

validated. 

The `DataId` constructor serves as a sort of standardization routine; most 

APIs that conceptually operate on `DataId`\s should accept either true 

`DataId`\s or regular dicts via a single ``dataId`` argument, and pass 

this through the `DataId` construction (usually with additional keyword 

arguments forwarded) to guarantee a true `DataId`. When convenient, that 

`DataId` should also be returned. 

 

The set of dimensions a `DataId` identifies can be provided to the 

constructor four ways: 

 

- Multiple dimensions may be passed via the ``dimensions`` argument. 

- A single dimension may be passed via the ``dimension`` argument. 

- If a true `DataId` is passed, its dimensions will be used if they are 

not overridden by one of the above. 

- If none of the above is provided, the dimensions are inferred from the 

set of keys provided in ``dataId``, ``extra``, and ``kwds``; any 

dimensions in ``universe`` whose links are a subset of those keys is 

included. 

 

Raises 

------ 

ValueError 

Raised if incomplete or incompatible arguments are provided. 

""" 

 

def __new__(cls, dataId=None, *, dimensions=None, dimension=None, universe=None, region=None, 

entries=None, extra=None, **kwds): 

 

if isinstance(dataId, DataId): 

if dimensions is None and dimension is None: 

# Shortcut the case where we already have a true DataId and the 

# dimensions are not changing. 

# Note that this still invokes __init__, which may update 

# the region and/or entries. 

return dataId 

if universe is not None and universe != dataId.dimensions.universe: 

raise ValueError("Input DataId has dimensions from a different universe.") 

universe = dataId.dimensions.universe 

elif dataId is None: 

dataId = {} 

 

# Transform 'dimension' arg into a Dimension object if it isn't already 

if dimension is not None and not isinstance(dimension, Dimension): 

if universe is None: 

raise ValueError(f"Cannot use {type(dimension)} as 'dimension' argument without universe.") 

dimension = universe[dimension] 

 

# Transform 'dimensions' arg into a DimensionGraph object if it isn't already 

if dimensions is not None and not isinstance(dimensions, DimensionGraph): 

if universe is None: 

raise ValueError(f"Cannot use {type(dimensions)} as 'dimensions' argument without universe.") 

dimensions = universe.extract(dimensions) 

 

if dimensions is None: 

if dimension is None: 

if universe is None: 

raise ValueError(f"Cannot infer dimensions without universe.") 

allLinks = dict(dataId) 

if extra is not None: 

allLinks.update(extra) 

allLinks.update(kwds) 

dimensions = universe.extract(dim for dim in universe if dim.link.issubset(allLinks)) 

else: 

# Set DimensionGraph to the full set of dependencies for the 

# single Dimension that was provided. 

dimensions = dimension.graph() 

elif dimension is not None and dimension.graph() != dimensions: 

# Both 'dimensions' and 'dimension' were provided but they 

# disagree. 

raise ValueError(f"Dimension conflict: {dimension.graph()} != {dimensions}") 

 

assert dimensions is not None, "should be set by earlier logic" 

 

# One more attempt to shortcut by returning the original object: if 

# caller provided a true DataId and explicit dimensions, but they 

# already agree. As above, __init__ will still fire. 

if isinstance(dataId, DataId) and dataId.dimensions == dimensions: 

return dataId 

 

if extra is None: 

extra = {} 

 

# Make a new instance with the dimensions and links we've identified. 

self = super().__new__(cls) 

self._dimensions = dimensions 

self._links = { 

linkName: linkValue for linkName, linkValue 

in itertools.chain(dataId.items(), extra.items(), kwds.items()) 

if linkName in self._dimensions.links 

} 

 

# Transfer more stuff if we're starting from a real DataId 

if isinstance(dataId, DataId): 

# Transfer the region if it's the right one. 

if self._dimensions.getRegionHolder() == dataId.dimensions.getRegionHolder(): 

self.region = dataId.region 

else: 

self.region = None 

 

# Transfer entries for the dimensions, making new dicts where 

# necessary. We let the new DataId and the old share the same 

# second-level dictionaries, because these correspond to the same 

# rows in the Registry and updates to those rows are rare, so it 

# doesn't make sense to worry about conflicts here. 

self._entries = {element: dataId.entries.get(element, {}) 

for element in self._dimensions.elements} 

else: 

# Create appropriately empty regions and entries if we're not 

# starting from a real DataId. 

self.region = None 

self._entries = {element: {} for element in self._dimensions.elements} 

 

# Return the new instance, invoking __init__ to do further updates. 

return self 

 

def __init__(self, dataId=None, *, dimensions=None, dimension=None, universe=None, region=None, 

entries=None, extra=None, **kwds): 

if dataId is None: 

dataId = {} 

 

if dimension is not None: 

# If a single dimension was explicitly provided, it's must be the 

# only leaf dimension in the graph; extract that to ensure that 

# the 'dimension' is in fact a `Dimension`, not a `str` name. 

dimension, = self.dimensions.leaves 

 

if entries is not None: 

for element, subdict in entries.items(): 

self.entries[element].update(subdict) 

 

missing = self.dimensions.links - self._links.keys() 

for linkName in missing: 

# Didn't get enough key-value pairs to identify all dimensions 

# from the links; look in entries for those. 

for element in self.dimensions.withLink(linkName): 

try: 

self._links[linkName] = self.entries[element][linkName] 

break 

except KeyError: 

pass 

else: 

raise LookupError(f"No value found for link '{linkName}'") 

 

# If we got an explicit region argument, use it. 

if region is not None: 

self.region = region 

self.entries[self.dimensions.getRegionHolder()]["region"] = region 

 

# Entries should contain link fields as well, so transfer them from 

# 'extra' + 'kwds'. Also transfer from 'links' iff it's not a DataId; 

# if it is, we can safely assume the transfer has already been done. 

 

def addLinksToEntries(items): 

for linkName, linkValue in items: 

try: 

associated = self.dimensions.withLink(linkName) 

except KeyError: 

# This isn't a link. If an explicit dimension was 

# provided, assume these fields are metadata for that 

# dimension. 

if dimension is not None: 

self.entries[dimension][linkName] = linkValue 

else: 

raise 

for element in associated: 

if element in self.dimensions: 

self.entries[element][linkName] = linkValue 

 

if extra is not None: 

addLinksToEntries(extra.items()) 

addLinksToEntries(kwds.items()) 

if not isinstance(dataId, DataId): 

addLinksToEntries(dataId.items()) 

 

# If we still haven't got a region, look for one in entries. 

if self.region is None: 

holder = self.dimensions.getRegionHolder() 

if holder is not None: 

self.region = self.entries[holder].get("region", None) 

 

@property 

def dimensions(self): 

"""The dimensions this `DataId` identifies (`DimensionGraph`). 

""" 

return self._dimensions 

 

@property 

def entries(self): 

r"""A nested dictionary of additional values associated with the 

identified dimension entries (`dict`). 

 

The outer dictionary maps `DimensionElement` objects to dictionaries 

of field names and values. 

 

Entry values are not in general guaranteed to have been validated 

against any actual `Registry` schema. 

""" 

return self._entries 

 

def __str__(self): 

return "{{{}}}".format(", ".join(f"{k}: {v}" for k, v in self.items())) 

 

def __repr__(self): 

return f"DataId({self}, dimensions={self.dimensions})" 

 

def __iter__(self): 

return iter(self._links) 

 

def __contains__(self, key): 

return key in self._links 

 

def __len__(self): 

return len(self._links) 

 

def __getitem__(self, key): 

return self._links[key] 

 

def keys(self): 

return self._links.keys() 

 

def values(self): 

return self._links.values() 

 

def items(self): 

return self._links.items() 

 

def __eq__(self, other): 

try: 

return self._links == other._links 

except AttributeError: 

# also compare equal to regular dicts with the same keys and values 

return self._links == other 

 

def __hash__(self): 

return hash(frozenset(self._links.items())) 

 

def fields(self, element, region=True, metadata=True): 

"""Return the entries for a particular `DimensionElement`. 

 

Parameters 

---------- 

element : `DimensionElement` or `str` 

The `Dimension` or `DimensionJoin` for which fields should be 

returned. 

region : `bool` 

Whether to include the region in the result. Ignored if this 

`DataId` has no region or the given `Dimension` is not the 

region holder for it. 

metadata : `bool` 

Whether to include metadata (non-link, non-region columns) in the 

result. Ignored if this `DataId` has no metadata for the given 

`Dimension`. 

 

Returns 

------- 

fields : `dict` 

A dictionary of column name-value pairs. 

""" 

element = self.dimensions.universe.elements[element] 

entries = self.entries[element] 

if region and metadata: 

return entries 

return {k: v for k, v in entries.items() 

if (metadata or k in self.keys()) and (region or k != "region")}