Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

from __future__ import annotations 

 

__all__ = ("DataCoordinate", "ExpandedDataCoordinate", "DataId") 

 

from typing import Any, Tuple, Mapping, Optional, Dict, Union, TYPE_CHECKING 

 

from lsst.sphgeom import Region 

from ..utils import IndexedTupleDict, immutable 

from ..timespan import Timespan 

from .graph import DimensionGraph 

 

33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never trueif TYPE_CHECKING: # Imports needed only for type annotations; may be circular. 

from .elements import DimensionElement, Dimension 

from .universe import DimensionUniverse 

from .records import DimensionRecord 

 

 

@immutable 

class DataCoordinate(IndexedTupleDict): 

"""An immutable data ID dictionary that guarantees that its key-value pairs 

identify all required dimensions in a `DimensionGraph`. 

 

`DataCoordinate` instances should usually be constructed via the 

`standardize` class method; the constructor is reserved for callers that 

can guarantee that the ``values`` tuple has exactly the right elements. 

 

Parameters 

---------- 

graph : `DimensionGraph` 

The dimensions identified by this instance. 

values : `tuple` 

Tuple of primary key values for the given dimensions. 

 

Notes 

----- 

Like any data ID class, `DataCoordinate` behaves like a dictionary, 

mostly via methods inherited from `IndexedTupleDict`. Like `NamedKeyDict`, 

both `Dimension` instances and `str` names thereof may be used as keys in 

lookup operations. 

 

Subclasses are permitted to support lookup for any dimension in 

``self.graph.dimensions``, but the base class only supports lookup for 

those in ``self.graph.required``, which is the minimal set needed to 

identify all others in a `Registry`. Both the base class and subclasses 

define comparisons, iterators, and the `keys`, `values`, and `items` views 

to just the ``self.graph.required`` subset in order to guarantee true 

(i.e. Liskov) substitutability. 

""" 

 

__slots__ = ("graph",) 

 

def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...]): 

self = super().__new__(cls, graph._requiredIndices, values) 

self.graph = graph 

return self 

 

@staticmethod 

def standardize(mapping: Optional[Mapping[str, Any]] = None, *, 

graph: Optional[DimensionGraph] = None, 

universe: Optional[DimensionUniverse] = None, 

**kwds) -> DataCoordinate: 

"""Adapt an arbitrary mapping and/or additional arguments into a true 

`DataCoordinate`, or augment an existing one. 

 

Parameters 

---------- 

mapping : `~collections.abc.Mapping`, optional 

An informal data ID that maps dimension names to their primary key 

values (may also be a true `DataCoordinate`). 

graph : `DimensionGraph` 

The dimensions to be identified by the new `DataCoordinate`. 

If not provided, will be inferred from the keys of ``mapping``, 

and ``universe`` must be provided unless ``mapping`` is already a 

`DataCoordinate`. 

universe : `DimensionUniverse` 

All known dimensions and their relationships; used to expand 

and validate dependencies when ``graph`` is not provided. 

kwds 

Additional keyword arguments are treated like additional key-value 

pairs in ``mapping``. 

 

Returns 

------- 

coordinate : `DataCoordinate` 

A validated `DataCoordinate` instance. May be a subclass instance 

if and only if ``mapping`` is a subclass instance and ``graph`` 

is a subset of ``mapping.graph``. 

 

Raises 

------ 

TypeError 

Raised if the set of optional arguments provided is not supported. 

KeyError 

Raised if a key-value pair for a required dimension is missing. 

 

Notes 

----- 

Because `DataCoordinate` stores only values for required dimensions, 

key-value pairs for other related dimensions will be ignored and 

excluded from the result. This means that a `DataCoordinate` may 

contain *fewer* key-value pairs than the informal data ID dictionary 

it was constructed from. 

""" 

if isinstance(mapping, DataCoordinate): 

if graph is None: 

if not kwds: 

# Already standardized to exactly what we want. 

return mapping 

elif mapping.graph.issuperset(graph): 

# Already standardized; just return the relevant subset. 

return mapping.subset(graph) 

assert universe is None or universe == mapping.universe 

universe = mapping.universe 

if kwds: 

if mapping: 

try: 

d = dict(mapping.byName(), **kwds) 

except AttributeError: 

d = dict(mapping, **kwds) 

else: 

d = kwds 

elif mapping: 

try: 

d = mapping.byName() 

except AttributeError: 

d = mapping 

else: 

d = {} 

if graph is None: 

if universe is None: 

raise TypeError("universe must be provided if graph is not.") 

graph = DimensionGraph(universe, names=d.keys()) 

try: 

values = tuple(d[name] for name in graph.required.names) 

except KeyError as err: 

raise KeyError(f"No value in data ID for required dimension {err}.") from err 

return DataCoordinate(graph, values) 

 

def byName(self) -> Dict[str, Any]: 

"""Return a true `dict` keyed by `str` dimension name and the same 

values as ``self``. 

""" 

return {k.name: v for k, v in self.items()} 

 

def __getnewargs__(self) -> tuple: 

# Implements pickle support (in addition to methods provided by 

# @immutable decorator). 

return (self.graph, self.values()) 

 

def __hash__(self) -> int: 

return hash((self.graph, self.values())) 

 

def __eq__(self, other: DataCoordinate) -> bool: 

try: 

# Optimized code path for DataCoordinate comparisons. 

return self.graph == other.graph and self.values() == other.values() 

except AttributeError: 

# Also support comparison with informal data ID dictionaries that 

# map dimension name to value. 

return self.byName() == other 

 

def __str__(self): 

return f"{self.byName()}" 

 

def __repr__(self): 

return f"DataCoordinate({self.graph}, {self.values()})" 

 

def fingerprint(self, update): 

"""Update a secure hash function with the values in this data ID. 

 

Parameters 

---------- 

update : `~collections.abc.Callable` 

Callable that accepts a single `bytes` argument to update 

the hash; usually the ``update`` method of an instance from 

the ``hashlib`` module. 

""" 

for k, v in self.items(): 

update(k.name.encode("utf8")) 

if isinstance(v, int): 

update(v.to_bytes(64, "big", signed=False)) 

elif isinstance(v, str): 

update(v.encode("utf8")) 

else: 

raise TypeError(f"Only `int` and `str` are allowed as dimension keys, not {v} ({type(v)}).") 

 

def matches(self, other: DataCoordinate) -> bool: 

"""Test whether the values of all keys in both coordinates are equal. 

 

Parameters 

---------- 

other : `DataCoordinate` 

The other coordinate to compare to. 

 

Returns 

------- 

consistent : `bool` 

`True` if all keys that are in in both ``other`` and ``self`` 

are associated with the same values, and `False` otherwise. 

`True` if there are no keys in common. 

""" 

d = getattr(other, "full", other) 

return all(self[k] == d[k] for k in (self.keys() & d.keys())) 

 

def subset(self, graph: DimensionGraph) -> DataCoordinate: 

"""Return a new `DataCoordinate` whose graph is a subset of 

``self.graph``. 

 

Subclasses may override this method to return a subclass instance. 

 

Parameters 

---------- 

graph : `DimensionGraph` 

The dimensions identified by the returned `DataCoordinate`. 

 

Returns 

------- 

coordinate : `DataCoordinate` 

A `DataCoordinate` instance that identifies only the given 

dimensions. 

 

Raises 

------ 

KeyError 

Raised if ``graph`` is not a subset of ``self.graph``, and hence 

one or more dimensions has no associated primary key value. 

""" 

return DataCoordinate(graph, tuple(self[dimension] for dimension in graph.required)) 

 

@property 

def universe(self) -> DimensionUniverse: 

"""The universe that defines all known dimensions compatible with 

this coordinate (`DimensionUniverse`). 

""" 

return self.graph.universe 

 

# Class attributes below are shadowed by instance attributes, and are 

# present just to hold the docstrings for those instance attributes. 

 

graph: DimensionGraph 

"""The dimensions identified by this data ID (`DimensionGraph`). 

 

Note that values are only required to be present for dimensions in 

``self.graph.required``; all others may be retrieved (from a `Registry`) 

given these. 

""" 

 

 

DataId = Union[DataCoordinate, Mapping[str, Any]] 

"""A type-annotation alias for signatures that accept both informal data ID 

dictionaries and validated `DataCoordinate` instances. 

""" 

 

 

def _intersectRegions(*args: Region) -> Optional[Region]: 

"""Return the intersection of several regions. 

 

For internal use by `ExpandedDataCoordinate` only. 

 

If no regions are provided, returns `None`. 

 

This is currently a placeholder; it actually returns `NotImplemented` 

(it does *not* raise an exception) when multiple regions are given, which 

propagates to `ExpandedDataCoordinate`. This reflects the fact that we 

don't want to fail to construct an `ExpandedDataCoordinate` entirely when 

we can't compute its region, and at present we don't have a high-level use 

case for the regions of these particular data IDs. 

""" 

if len(args) == 0: 

return None 

elif len(args) == 1: 

return args[0] 

else: 

return NotImplemented 

 

 

@immutable 

class ExpandedDataCoordinate(DataCoordinate): 

"""A data ID that has been expanded to include all relevant metadata. 

 

Instances should usually be obtained by calling `Registry.expandDataId`. 

 

Parameters 

---------- 

graph : `DimensionGraph` 

The dimensions identified by this instance. 

values : `tuple` 

Tuple of primary key values for the given dimensions. 

records : `~collections.abc.Mapping` 

Dictionary mapping `DimensionElement` to `DimensionRecord`. 

full : `~collections.abc.Mapping` 

Dictionary mapping dimensions to their primary key values for all 

dimensions in the graph, not just required ones. Ignored unless 

``conform`` is `False.` 

region : `sphgeom.Region`, optional 

Region on the sky associated with this data ID, or `None` if there 

are no spatial dimensions. At present, this may be the special value 

`NotImplemented` if there multiple spatial dimensions identified; in 

the future this will be replaced with the intersection. Ignored unless 

``conform`` is `False`.Timespan 

timespan : `Timespan`, optionalTimespan 

Timespan associated with this data ID, or `None` if there are no 

temporal dimensions. 

Ignored unless ``conform`` is `False`. 

conform : `bool`, optional 

If `True` (default), adapt arguments from arbitrary mappings to the 

custom dictionary types and check that all expected key-value pairs are 

present. `False` is only for internal use. 

 

Notes 

----- 

To maintain Liskov substitutability with `DataCoordinate`, 

`ExpandedDataCoordinate` mostly acts like a mapping that contains only 

values for its graph's required dimensions, even though it also contains 

values for all implied dimensions - its length, iteration, and 

keys/values/items views reflect only required dimensions. Values for 

the primary keys of implied dimensions can be obtained from the `full` 

attribute, and are also accessible in dict lookups and the ``in`` operator. 

""" 

 

__slots__ = ("records", "full", "region", "timespan") 

 

def __new__(cls, graph: DimensionGraph, values: Tuple[Any, ...], *, 

records: Mapping[DimensionElement, DimensionRecord], 

full: Optional[Mapping[Dimension, Any]] = None, 

region: Optional[Region] = None, 

timespan: Optional[Timespan] = None, 

conform: bool = True): 

self = super().__new__(cls, graph, values) 

if conform: 

self.records = IndexedTupleDict( 

indices=graph._elementIndices, 

values=tuple(records[element] for element in graph.elements) 

) 

self.full = IndexedTupleDict( 

indices=graph._dimensionIndices, 

values=tuple(getattr(self.records[dimension], dimension.primaryKey.name, None) 

for dimension in graph.dimensions) 

) 

regions = [] 

for element in self.graph.spatial: 

record = self.records[element.name] 

if record is None or record.region is None: 

self.region = None 

break 

else: 

regions.append(record.region) 

else: 

self.region = _intersectRegions(*regions) 

timespans = [] 

for element in self.graph.temporal: 

record = self.records[element.name] 

if record is None or record.timespan is None: 

self.timespan = None 

break 

else: 

timespans.append(record.timespan) 

else: 

self.timespan = Timespan.intersection(*timespans) 

else: 

self.records = records 

self.full = full 

self.region = region 

self.timespan = timespan 

return self 

 

def __contains__(self, key: Union[DimensionElement, str]) -> bool: 

return key in self.full 

 

def __getitem__(self, key: Union[DimensionElement, str]) -> Any: 

return self.full[key] 

 

def __repr__(self): 

return f"ExpandedDataCoordinate({self.graph}, {self.values()})" 

 

def pack(self, name: str, *, returnMaxBits: bool = False) -> int: 

"""Pack this data ID into an integer. 

 

Parameters 

---------- 

name : `str` 

Name of the `DimensionPacker` algorithm (as defined in the 

dimension configuration). 

returnMaxBits : `bool`, optional 

If `True` (`False` is default), return the maximum number of 

nonzero bits in the returned integer across all data IDs. 

 

Returns 

------- 

packed : `int` 

Integer ID. This ID is unique only across data IDs that have 

the same values for the packer's "fixed" dimensions. 

maxBits : `int`, optional 

Maximum number of nonzero bits in ``packed``. Not returned unless 

``returnMaxBits`` is `True`. 

""" 

return self.universe.makePacker(name, self).pack(self, returnMaxBits=returnMaxBits) 

 

def matches(self, other) -> bool: 

# Docstring inherited from DataCoordinate.matches. 

d = getattr(other, "full", other) 

return all(self[k] == d[k] for k in (self.full.keys() & d.keys())) 

 

def subset(self, graph: DimensionGraph) -> ExpandedDataCoordinate: 

# Docstring inherited from DataCoordinate.subset. 

return ExpandedDataCoordinate( 

graph, 

tuple(self[dimension] for dimension in graph.required), 

records=self.records, 

conform=True 

) 

 

def __getnewargs_ex__(self) -> Tuple(tuple, dict): 

return ( 

(self.graph, self.values()), 

dict( 

records=self.records, 

full=self.full, 

region=self.region, 

timespan=self.timespan, 

conform=False, 

) 

) 

 

# Class attributes below are shadowed by instance attributes, and are 

# present just to hold the docstrings for those instance attributes. 

 

full: IndexedTupleDict[Dimension, Any] 

"""Dictionary mapping dimensions to their primary key values for all 

dimensions in the graph, not just required ones (`IndexedTupleDict`). 

 

Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

as well as `Dimension` instance. 

""" 

 

records: IndexedTupleDict[DimensionElement, DimensionRecord] 

"""Dictionary mapping `DimensionElement` to the associated 

`DimensionRecord` (`IndexedTupleDict`). 

 

Like `DataCoordinate` itself, this dictionary can be indexed by `str` name 

as well as `DimensionElement` instance. 

""" 

 

region: Optional[Region] 

"""Region on the sky associated with this data ID, or `None` if there 

are no spatial dimensions (`sphgeom.Region`). 

 

At present, this may be the special value `NotImplemented` if there 

multiple spatial dimensions identified; in the future this will be replaced 

with the intersection. 

""" 

 

timespan: Optional[Timespan] 

"""Timespan associated with this data ID, or `None` if there are no 

temporal dimensions (`TimeSpan`). 

"""