Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

# This file is part of daf_butler. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

 

__all__ = ["makeTestRepo", "makeTestCollection", "addDatasetType", "expandUniqueId"] 

 

 

import numpy as np 

 

from lsst.daf.butler import Butler, Config, DatasetType 

 

 

def makeTestRepo(root, dataIds, *, config=None, **kwargs): 

"""Create an empty repository with dummy data IDs. 

 

Parameters 

---------- 

root : `str` 

The location of the root directory for the repository. 

dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

A mapping keyed by the dimensions used in the test. Each value 

is an iterable of names for that dimension (e.g., detector IDs for 

`"detector"`). Related dimensions (e.g., instruments and detectors) 

are linked arbitrarily. 

config : `lsst.daf.butler.Config`, optional 

A configuration for the repository (for details, see 

`lsst.daf.butler.Butler.makeRepo`). If omitted, creates a repository 

with default dataset and storage types, but optimized for speed. 

**kwargs 

Extra arguments to `lsst.daf.butler.Butler.makeRepo`. 

 

Returns 

------- 

butler : `lsst.daf.butler.Butler` 

A Butler referring to the new repository. This Butler is provided only 

for additional setup; to keep test cases isolated, it is highly 

recommended that each test create its own Butler with a 

unique run/collection. See `makeTestCollection`. 

 

Notes 

----- 

This function provides a "quick and dirty" repository for simple unit 

tests that don't depend on complex data relationships. Because it assigns 

dimension relationships and other metadata abitrarily, it is ill-suited 

for tests where the structure of the data matters. If you need such a 

dataset, create it directly or use a saved test dataset. 

 

Since the values in ``dataIds`` uniquely determine the repository's 

data IDs, the fully linked IDs can be recovered by calling 

`expandUniqueId`, so long as no other code has inserted dimensions into 

the repository registry. 

""" 

if not config: 

config = Config() 

config["datastore", "cls"] = "lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore" 

config["datastore", "checksum"] = False # In case of future changes 

config["registry", "db"] = "sqlite:///:memory:" 

# newConfig guards against location-related keywords like outfile 

newConfig = Butler.makeRepo(root, config=config, **kwargs) 

butler = Butler(newConfig, writeable=True) 

dimensionRecords = _makeRecords(dataIds, butler.registry.dimensions) 

for dimension, records in dimensionRecords.items(): 

butler.registry.insertDimensionData(dimension, *records) 

return butler 

 

 

def makeTestCollection(repo): 

"""Create a read/write Butler to a fresh collection. 

 

Parameters 

---------- 

repo : `lsst.daf.butler.Butler` 

A previously existing Butler to a repository, such as that returned by 

`~lsst.daf.butler.Butler.makeRepo` or `makeTestRepo`. 

 

Returns 

------- 

butler : `lsst.daf.butler.Butler` 

A Butler referring to a new collection in the repository at ``root``. 

The collection is (almost) guaranteed to be new. 

""" 

# Create a "random" collection name 

# Speed matters more than cryptographic guarantees 

collection = "test" + "".join((str(i) for i in np.random.randint(0, 10, size=8))) 

return Butler(butler=repo, run=collection) 

 

 

def _makeRecords(dataIds, universe): 

"""Create cross-linked dimension records from a collection of 

data ID values. 

 

Parameters 

---------- 

dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

A mapping keyed by the dimensions of interest. Each value is an 

iterable of names for that dimension (e.g., detector IDs for 

`"detector"`). 

universe : lsst.daf.butler.DimensionUniverse 

Set of all known dimensions and their relationships. 

 

Returns 

------- 

dataIds : `~collections.abc.Mapping` [`str`, `iterable`] 

A mapping keyed by the dimensions of interest, giving one 

`~lsst.daf.butler.DimensionRecord` for each input name. Related 

dimensions (e.g., instruments and detectors) are linked arbitrarily. 

""" 

expandedIds = {} 

# Provide alternate keys like detector names 

for name, values in dataIds.items(): 

expandedIds[name] = [] 

dimension = universe[name] 

for value in values: 

expandedValue = {} 

for key in dimension.uniqueKeys: 

if key.nbytes: 

castType = bytes 

else: 

castType = key.dtype().python_type 

try: 

castValue = castType(value) 

except TypeError: 

castValue = castType() 

expandedValue[key.name] = castValue 

for key in dimension.metadata: 

if not key.nullable: 

expandedValue[key.name] = key.dtype().python_type(value) 

expandedIds[name].append(expandedValue) 

 

# Pick cross-relationships arbitrarily 

for name, values in expandedIds.items(): 

dimension = universe[name] 

for value in values: 

for other in dimension.graph.required: 

if other != dimension: 

relation = expandedIds[other.name][0] 

value[other.name] = relation[other.primaryKey.name] 

# Do not recurse, to keep the user from having to provide 

# irrelevant dimensions 

for other in dimension.implied: 

if other != dimension and other.name in expandedIds and other.viewOf is None: 

relation = expandedIds[other.name][0] 

value[other.name] = relation[other.primaryKey.name] 

 

return {dimension: [universe[dimension].RecordClass.fromDict(value) for value in values] 

for dimension, values in expandedIds.items()} 

 

 

def expandUniqueId(butler, partialId): 

"""Return a complete data ID matching some criterion. 

 

Parameters 

---------- 

butler : `lsst.daf.butler.Butler` 

The repository to query. 

partialId : `~collections.abc.Mapping` [`str`, any] 

A mapping of known dimensions and values. 

 

Returns 

------- 

dataId : `lsst.daf.butler.DataCoordinate` 

The unique data ID that matches ``partialId``. 

 

Raises 

------ 

ValueError 

Raised if ``partialId`` does not uniquely identify a data ID. 

 

Notes 

----- 

This method will only work correctly if all dimensions attached to the 

target dimension (eg., "physical_filter" for "visit") are known to the 

repository, even if they're not needed to identify a dataset. 

 

Examples 

-------- 

.. code-block:: py 

 

>>> butler = makeTestRepo( 

"testdir", {"instrument": ["notACam"], "detector": [1]}) 

>>> expandUniqueId(butler, {"detector": 1}) 

DataCoordinate({instrument, detector}, ('notACam', 1)) 

""" 

# The example is *not* a doctest because it requires dangerous I/O 

registry = butler.registry 

dimensions = registry.dimensions.extract(partialId.keys()).required 

 

query = " AND ".join(f"{dimension} = {value!r}" for dimension, value in partialId.items()) 

 

dataId = [id for id in registry.queryDimensions(dimensions, where=query, expand=False)] 

if len(dataId) == 1: 

return dataId[0] 

else: 

raise ValueError(f"Found {len(dataId)} matches for {partialId}, expected 1.") 

 

 

def addDatasetType(butler, name, dimensions, storageClass): 

"""Add a new dataset type to a repository. 

 

Parameters 

---------- 

butler : `lsst.daf.butler.Butler` 

The repository to update. 

name : `str` 

The name of the dataset type. 

dimensions : `set` [`str`] 

The dimensions of the new dataset type. 

storageClass : `str` 

The storage class the dataset will use. 

 

Returns 

------- 

datasetType : `lsst.daf.butler.DatasetType` 

The new type. 

 

Raises 

------ 

ValueError 

Raised if the dimensions or storage class is invalid. 

 

Notes 

----- 

Dataset types are shared across all collections in a repository, so this 

function does not need to be run for each collection. 

""" 

try: 

datasetType = DatasetType(name, dimensions, storageClass, 

universe=butler.registry.dimensions) 

butler.registry.registerDatasetType(datasetType) 

return datasetType 

except KeyError as e: 

raise ValueError from e