Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

# 

# LSST Data Management System 

# 

# Copyright 2008-2017 AURA/LSST. 

# 

# This product includes software developed by the 

# LSST Project (http://www.lsst.org/). 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the LSST License Statement and 

# the GNU General Public License along with this program. If not, 

# see <https://www.lsstcorp.org/LegalNotices/>. 

# 

 

__all__ = ["IngestIndexedReferenceConfig", "IngestIndexedReferenceTask", "DatasetConfig"] 

 

import numpy as np 

 

import lsst.pex.config as pexConfig 

import lsst.pipe.base as pipeBase 

import lsst.geom 

import lsst.afw.table as afwTable 

from lsst.afw.image import fluxFromABMag, fluxErrFromABMagErr 

from .indexerRegistry import IndexerRegistry 

from .readTextCatalogTask import ReadTextCatalogTask 

 

 

class IngestReferenceRunner(pipeBase.TaskRunner): 

"""!Task runner for the reference catalog ingester 

 

Data IDs are ignored so the runner should just run the task on the parsed command. 

""" 

 

def run(self, parsedCmd): 

"""!Run the task. 

Several arguments need to be collected to send on to the task methods. 

 

@param[in] parsedCmd Parsed command including command line arguments. 

@returns Struct containing the result of the indexing. 

""" 

files = parsedCmd.files 

butler = parsedCmd.butler 

task = self.TaskClass(config=self.config, log=self.log, butler=butler) 

task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup) 

 

result = task.create_indexed_catalog(files) 

56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true if self.doReturnResults: 

return pipeBase.Struct( 

result=result, 

) 

 

 

class DatasetConfig(pexConfig.Config): 

ref_dataset_name = pexConfig.Field( 

dtype=str, 

default='cal_ref_cat', 

doc='String to pass to the butler to retrieve persisted files.', 

) 

indexer = IndexerRegistry.makeField( 

default='HTM', 

doc='Name of indexer algoritm to use. Default is HTM', 

) 

 

 

class IngestIndexedReferenceConfig(pexConfig.Config): 

dataset_config = pexConfig.ConfigField( 

dtype=DatasetConfig, 

doc="Configuration for reading the ingested data", 

) 

file_reader = pexConfig.ConfigurableField( 

target=ReadTextCatalogTask, 

doc='Task to use to read the files. Default is to expect text files.' 

) 

ra_name = pexConfig.Field( 

dtype=str, 

doc="Name of RA column", 

) 

dec_name = pexConfig.Field( 

dtype=str, 

doc="Name of Dec column", 

) 

mag_column_list = pexConfig.ListField( 

dtype=str, 

doc="The values in the reference catalog are assumed to be in AB magnitudes. " 

"List of column names to use for photometric information. At least one entry is required." 

) 

mag_err_column_map = pexConfig.DictField( 

keytype=str, 

itemtype=str, 

default={}, 

doc="A map of magnitude column name (key) to magnitude error column (value)." 

) 

is_photometric_name = pexConfig.Field( 

dtype=str, 

optional=True, 

doc='Name of column stating if satisfactory for photometric calibration (optional).' 

) 

is_resolved_name = pexConfig.Field( 

dtype=str, 

optional=True, 

doc='Name of column stating if the object is resolved (optional).' 

) 

is_variable_name = pexConfig.Field( 

dtype=str, 

optional=True, 

doc='Name of column stating if the object is measured to be variable (optional).' 

) 

id_name = pexConfig.Field( 

dtype=str, 

optional=True, 

doc='Name of column to use as an identifier (optional).' 

) 

extra_col_names = pexConfig.ListField( 

dtype=str, 

default=[], 

doc='Extra columns to add to the reference catalog.' 

) 

 

def validate(self): 

pexConfig.Config.validate(self) 

130 ↛ 131line 130 didn't jump to line 131, because the condition on line 130 was never true if not (self.ra_name and self.dec_name and self.mag_column_list): 

raise ValueError("ra_name and dec_name and at least one entry in mag_column_list must be" + 

" supplied.") 

if len(self.mag_err_column_map) > 0 and not len(self.mag_column_list) == len(self.mag_err_column_map): 

raise ValueError("If magnitude errors are provided, all magnitudes must have an error column") 

 

 

class IngestIndexedReferenceTask(pipeBase.CmdLineTask): 

"""!Class for both producing indexed reference catalogs and for loading them. 

 

This implements an indexing scheme based on hierarchical triangular mesh (HTM). 

The term index really means breaking the catalog into localized chunks called 

shards. In this case each shard contains the entries from the catalog in a single 

HTM trixel 

""" 

canMultiprocess = False 

ConfigClass = IngestIndexedReferenceConfig 

RunnerClass = IngestReferenceRunner 

_DefaultName = 'IngestIndexedReferenceTask' 

 

_flags = ['photometric', 'resolved', 'variable'] 

 

@classmethod 

def _makeArgumentParser(cls): 

"""Create an argument parser 

 

This overrides the original because we need the file arguments 

""" 

parser = pipeBase.InputOnlyArgumentParser(name=cls._DefaultName) 

parser.add_argument("files", nargs="+", help="Names of files to index") 

return parser 

 

def __init__(self, *args, **kwargs): 

"""!Constructor for the HTM indexing engine 

 

@param[in] butler dafPersistence.Butler object for reading and writing catalogs 

""" 

self.butler = kwargs.pop('butler') 

pipeBase.Task.__init__(self, *args, **kwargs) 

self.indexer = IndexerRegistry[self.config.dataset_config.indexer.name]( 

self.config.dataset_config.indexer.active) 

self.makeSubtask('file_reader') 

 

def create_indexed_catalog(self, files): 

"""!Index a set of files comprising a reference catalog. Outputs are persisted in the 

data repository. 

 

@param[in] files A list of file names to read. 

""" 

rec_num = 0 

first = True 

for filename in files: 

arr = self.file_reader.run(filename) 

index_list = self.indexer.index_points(arr[self.config.ra_name], arr[self.config.dec_name]) 

if first: 

schema, key_map = self.make_schema(arr.dtype) 

# persist empty catalog to hold the master schema 

dataId = self.indexer.make_data_id('master_schema', 

self.config.dataset_config.ref_dataset_name) 

self.butler.put(self.get_catalog(dataId, schema), 'ref_cat', 

dataId=dataId) 

first = False 

pixel_ids = set(index_list) 

for pixel_id in pixel_ids: 

dataId = self.indexer.make_data_id(pixel_id, self.config.dataset_config.ref_dataset_name) 

catalog = self.get_catalog(dataId, schema) 

els = np.where(index_list == pixel_id) 

for row in arr[els]: 

record = catalog.addNew() 

rec_num = self._fill_record(record, row, rec_num, key_map) 

self.butler.put(catalog, 'ref_cat', dataId=dataId) 

dataId = self.indexer.make_data_id(None, self.config.dataset_config.ref_dataset_name) 

self.butler.put(self.config.dataset_config, 'ref_cat_config', dataId=dataId) 

 

@staticmethod 

def compute_coord(row, ra_name, dec_name): 

"""!Create an ICRS SpherePoint from a np.array row 

@param[in] row dict like object with ra/dec info in degrees 

@param[in] ra_name name of RA key 

@param[in] dec_name name of Dec key 

@returns ICRS SpherePoint constructed from the RA/Dec values 

""" 

return lsst.geom.SpherePoint(row[ra_name], row[dec_name], lsst.geom.degrees) 

 

def _set_flags(self, record, row, key_map): 

"""!Set the flags for a record. Relies on the _flags class attribute 

@param[in,out] record SourceCatalog record to modify 

@param[in] row dict like object containing flag info 

@param[in] key_map Map of catalog keys to use in filling the record 

""" 

names = record.schema.getNames() 

for flag in self._flags: 

if flag in names: 

attr_name = 'is_{}_name'.format(flag) 

record.set(key_map[flag], bool(row[getattr(self.config, attr_name)])) 

 

def _set_mags(self, record, row, key_map): 

"""!Set the flux records from the input magnitudes 

@param[in,out] record SourceCatalog record to modify 

@param[in] row dict like object containing magnitude values 

@param[in] key_map Map of catalog keys to use in filling the record 

""" 

for item in self.config.mag_column_list: 

record.set(key_map[item+'_flux'], fluxFromABMag(row[item])) 

234 ↛ exitline 234 didn't return from function '_set_mags', because the condition on line 234 was never false if len(self.config.mag_err_column_map) > 0: 

for err_key in self.config.mag_err_column_map.keys(): 

error_col_name = self.config.mag_err_column_map[err_key] 

record.set(key_map[err_key+'_fluxSigma'], 

fluxErrFromABMagErr(row[error_col_name], row[err_key])) 

 

def _set_extra(self, record, row, key_map): 

"""!Copy the extra column information to the record 

@param[in,out] record SourceCatalog record to modify 

@param[in] row dict like object containing the column values 

@param[in] key_map Map of catalog keys to use in filling the record 

""" 

for extra_col in self.config.extra_col_names: 

value = row[extra_col] 

# If data read from a text file contains string like entires, 

# numpy stores this as its own internal type, a numpy.str_ 

# object. This seems to be a consequence of how numpy stores 

# string like objects in fixed column arrays. This checks 

# if any of the values to be added to the catalog are numpy 

# string types, and if they are, casts them to a python string 

# which is what the python c++ records expect 

if isinstance(value, np.str_): 

value = str(value) 

record.set(key_map[extra_col], value) 

 

def _fill_record(self, record, row, rec_num, key_map): 

"""!Fill a record to put in the persisted indexed catalogs 

 

@param[in,out] record afwTable.SourceRecord in a reference catalog to fill. 

@param[in] row A row from a numpy array constructed from the input catalogs. 

@param[in] rec_num Starting integer to increment for the unique id 

@param[in] key_map Map of catalog keys to use in filling the record 

""" 

record.setCoord(self.compute_coord(row, self.config.ra_name, self.config.dec_name)) 

if self.config.id_name: 

record.setId(row[self.config.id_name]) 

else: 

rec_num += 1 

record.setId(rec_num) 

# No parents 

record.setParent(-1) 

 

self._set_flags(record, row, key_map) 

self._set_mags(record, row, key_map) 

self._set_extra(record, row, key_map) 

return rec_num 

 

def get_catalog(self, dataId, schema): 

"""!Get a catalog from the butler or create it if it doesn't exist 

 

@param[in] dataId Identifier for catalog to retrieve 

@param[in] schema Schema to use in catalog creation if the butler can't get it 

@returns table (an lsst.afw.table.SourceCatalog) for the specified identifier 

""" 

if self.butler.datasetExists('ref_cat', dataId=dataId): 

return self.butler.get('ref_cat', dataId=dataId) 

return afwTable.SourceCatalog(schema) 

 

def make_schema(self, dtype): 

"""!Make the schema to use in constructing the persisted catalogs. 

 

@param[in] dtype A np.dtype to use in constructing the schema 

@returns a pair of items: 

- The schema for the output source catalog. 

- A map of catalog keys to use in filling the record 

""" 

key_map = {} 

mag_column_list = self.config.mag_column_list 

mag_err_column_map = self.config.mag_err_column_map 

303 ↛ 306line 303 didn't jump to line 306, because the condition on line 303 was never true if len(mag_err_column_map) > 0 and ( 

not len(mag_column_list) == len(mag_err_column_map) or 

not sorted(mag_column_list) == sorted(mag_err_column_map.keys())): 

raise ValueError("Every magnitude column must have a corresponding error column") 

# makes a schema with a coord, id and parent_id 

schema = afwTable.SourceTable.makeMinimalSchema() 

 

def add_field(name): 

if dtype[name].kind == 'U': 

# dealing with a string like thing. Need to get type and size. 

at_type = str 

at_size = dtype[name].itemsize 

return schema.addField(name, type=at_type, size=at_size) 

else: 

at_type = dtype[name].type 

return schema.addField(name, at_type) 

 

for item in mag_column_list: 

key_map[item+'_flux'] = schema.addField(item+'_flux', float) 

322 ↛ 325line 322 didn't jump to line 325, because the condition on line 322 was never false if len(mag_err_column_map) > 0: 

for err_item in mag_err_column_map.keys(): 

key_map[err_item+'_fluxSigma'] = schema.addField(err_item+'_fluxSigma', float) 

for flag in self._flags: 

attr_name = 'is_{}_name'.format(flag) 

if getattr(self.config, attr_name): 

key_map[flag] = schema.addField(flag, 'Flag') 

for col in self.config.extra_col_names: 

key_map[col] = add_field(col) 

return schema, key_map