Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

"""Instance Catalog""" 

from __future__ import print_function 

from builtins import zip 

from builtins import str 

from builtins import range 

from builtins import object 

import warnings 

import numpy as np 

import inspect 

import re 

import copy 

from collections import OrderedDict 

from lsst.sims.utils import defaultSpecMap 

from lsst.sims.utils import ObservationMetaData 

from future.utils import with_metaclass 

 

__all__ = ["InstanceCatalog"] 

 

 

class InstanceCatalogMeta(type): 

"""Meta class for registering instance catalogs. 

 

When any new type of instance catalog class is created, this registers it 

in a `registry` class attribute, available to all derived instance 

catalogs. 

""" 

@staticmethod 

def convert_to_underscores(name): 

"""convert, e.g. CatalogName to catalog_name""" 

s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) 

return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() 

 

def __new__(cls, name, bases, dct): 

# check if attribute catalog_type is specified. 

# If not, create a default 

36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true if 'registry' in dct: 

warnings.warn("registry class attribute should not be " 

"over-ridden in InstanceCatalog classes. " 

"Proceed with caution") 

if 'catalog_type' not in dct: 

dct['catalog_type'] = cls.convert_to_underscores(name) 

 

dct['_cached_columns'] = {} 

dct['_compound_columns'] = {} 

dct['_compound_column_names'] = {} 

 

return super(InstanceCatalogMeta, cls).__new__(cls, name, bases, dct) 

 

def __init__(cls, name, bases, dct): 

# check if 'registry' is specified. 

# if not, then this is the base class: add the registry 

if not hasattr(cls, 'registry'): 

cls.registry = {} 

 

# add this class to the registry 

56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true if cls.catalog_type in cls.registry: 

raise ValueError("Catalog Type %s is duplicated" 

% cls.catalog_type) 

cls.registry[cls.catalog_type] = cls 

 

# add methods for default columns 

for default in cls.default_columns: 

63 ↛ exit,   63 ↛ exit2 missed branches: 1) line 65 didn't finish the list comprehension on line 65, 2) line 64 didn't finish the lambda on line 64 setattr(cls, 'default_%s'%(default[0]), 

lambda self, value=default[1], type=default[2]: 

np.array([value for i in range(len(self._current_chunk))], dtype=type)) 

 

# store compound columns and check for collisions 

# 

# We create a forward and backward mapping. 

# The dictionary cls._compound_columns maps the compound column 

# name to the multiple individual columns it represents. 

# The dictionary cls._compound_column_names maps the individual 

# column names to the compound column that contains them 

for key in dir(cls): 

if not key.startswith('get_'): 

continue 

compound_getter = getattr(cls, key) 

if not hasattr(compound_getter, '_compound_column'): 

continue 

 

for col in compound_getter._colnames: 

try: 

getter = 'get_'+col 

except TypeError: 

raise ValueError("column names in compound " 

"decorator must be strings") 

 

88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true if hasattr(cls, getter): 

raise ValueError("column name '%s' in compound getter " 

"'%s' conflicts with getter '%s'" 

% (col, key, getter)) 

 

93 ↛ 94line 93 didn't jump to line 94, because the condition on line 93 was never true elif col in cls._compound_column_names: 

raise ValueError("duplicate compound column name: '%s'" 

% col) 

 

else: 

cls._compound_column_names[col] = key 

cls._compound_columns[key] = compound_getter._colnames 

 

return super(InstanceCatalogMeta, cls).__init__(name, bases, dct) 

 

 

class _MimicRecordArray(object): 

"""An object used for introspection of the database colums. 

 

This mimics a numpy record array, but when a column is referenced, 

it logs the reference and returns zeros. 

""" 

def __init__(self): 

self.referenced_columns = set() 

 

def __getitem__(self, column): 

self.referenced_columns.add(column) 

return np.empty(0) 

 

def __len__(self): 

return 0 

 

 

class InstanceCatalog(with_metaclass(InstanceCatalogMeta, object)): 

""" Base class for instance catalogs generated by simulations. 

 

Instance catalogs include a dictionary of numpy arrays which contains 

core data. Additional arrays can be appended as ancillary data. 

 

Catalog types and Object types are defined in the CatalogDescription class 

catalogType = TRIM, SCIENCE, PHOTCAL, DIASOURCE, MISC, INVALID 

objectType = Point, Moving, Sersic, Image, Artefact, MISC 

catalogTable is name of the database table queried 

dataArray dictionary of numpy arrays of data 

""" 

 

# These are the class attributes to be specified in any derived class: 

catalog_type = 'instance_catalog' 

column_outputs = None 

specFileMap = defaultSpecMap 

default_columns = [] 

cannot_be_null = None # will be a list of columns which, if null, cause a row not to be printed by write_catalog() 

# Note: these columns will be filtered on even if they are not included in column_outputs 

 

default_formats = {'S': '%s', 'f': '%.4f', 'i': '%i'} 

override_formats = {} 

transformations = {} 

delimiter = ", " 

comment_char = "#" 

endline = "\n" 

_pre_screen = False # if true, write_catalog() will check database query results against 

# cannot_be_null before calculating getter columns 

 

@classmethod 

def new_catalog(cls, catalog_type, *args, **kwargs): 

"""Return a new catalog of the given catalog type""" 

154 ↛ 156line 154 didn't jump to line 156, because the condition on line 154 was never false if catalog_type in cls.registry: 

return cls.registry[catalog_type](*args, **kwargs) 

elif inspect.isclass(catalog_type) and issubclass(catalog_type, InstanceCatalog): 

return catalog_type(*args, **kwargs) 

else: 

raise ValueError("Unrecognized catalog_type: %s" 

% str(catalog_type)) 

 

@classmethod 

def is_compound_column(cls, column_name): 

"""Return true if the given column name is a compound column""" 

getfunc = "get_%s" % column_name 

if hasattr(cls, getfunc): 

167 ↛ 168line 167 didn't jump to line 168, because the condition on line 167 was never true if hasattr(getattr(cls, getfunc), '_compound_column'): 

return True 

return False 

 

def iter_column_names(self): 

"""Iterate the column names, expanding any compound columns""" 

 

for column in self._column_outputs: 

175 ↛ 176line 175 didn't jump to line 176, because the condition on line 175 was never true if self.is_compound_column(column): 

for col in getattr(getattr(self, "get_" + column), '_colnames'): 

yield col 

else: 

yield column 

 

def __init__(self, db_obj, obs_metadata=None, column_outputs=None, 

constraint=None, specFileMap=None, cannot_be_null=None): 

 

""" 

@param [in] db_obj is an instantiation of the CatalogDBObject class, 

which provide connection to a specific database table 

 

see sims_catalogs_generation/python/lsst/sims/catalogs/generation/db/dbConnection.py 

 

@param [in] obs_metadata is an instantiation of the ObservationMetaData class 

characterizing a specific telescope observation 

 

see sims_catalogs_generation/python/lsst/sims/catalogs/generation/db/ObservationMetaData.py 

 

@param [in] column_outputs is a list of column names to be output 

in the catalog. This is optional and will be appended to the list 

of column_outputs defined int he class definition. 

 

@param [in] cannot_be_null is a list of column names indicating columns 

which cannot have the values Null, None, or NaN. Rows running afoul 

of this criterion will not be written by the write_catalog() method 

(though they may appear in the iterator returned by iter_catalog()). 

Note: these columns will be filtered on, even if they do not appear in 

column_outputs. 

 

@param [in] constraint is an optional SQL constraint to be applied to the 

database query 

 

@param [in] specFileMap is an instantiation of the SpecMap class 

 

(defined in sims_catalogs_measures/python/sims/catalogs/measures/instance/fileMaps.py) 

 

that maps database entries for SED names to actual file paths. If set to None, 

the class definition of InstanceCatalog ensures that it will be set to 

defaultSpecMap, which is the correct mapping for the LSST sims_sed_library 

""" 

 

self.verbose = db_obj.verbose 

 

self.db_obj = db_obj 

self._current_chunk = None 

 

# this dict will contain information telling the user where the columns in 

# the catalog come from 

self._column_origins = {} 

 

if obs_metadata is not None: 

if not isinstance(obs_metadata, ObservationMetaData): 

raise ValueError("You passed InstanceCatalog something that was not ObservationMetaData") 

 

self.obs_metadata = copy.deepcopy(obs_metadata) 

else: 

self.obs_metadata = ObservationMetaData() 

 

if self.column_outputs is not None: 

self._column_outputs = copy.deepcopy(self.column_outputs) 

 

if column_outputs is not None: 

if self.column_outputs is None: 

self._column_outputs = copy.deepcopy(column_outputs) 

else: 

for col in column_outputs: 

if col not in self._column_outputs: 

self._column_outputs.append(col) 

 

# Because cannot_be_null can both be declared at class definition 

# and at instantiation, we need to be able to combine the two inputs 

# into something the InstanceCatalog will actually use to filter 

# rows. self._cannot_be_null is a member variable that contains 

# the contents both of self.cannot_be_null (set at class definition) 

# and the cannot_be_null kwarg passed to __init__(). self._cannot_be_null 

# is what the catalog actually uses in self._filter_chunk 

self._cannot_be_null = None 

if self.cannot_be_null is not None: 

self._cannot_be_null = copy.deepcopy(self.cannot_be_null) 

 

if cannot_be_null is not None: 

if self.cannot_be_null is None: 

self._cannot_be_null = copy.deepcopy(cannot_be_null) 

else: 

for col in cannot_be_null: 

262 ↛ 261line 262 didn't jump to line 261, because the condition on line 262 was never false if col not in self._cannot_be_null: 

self._cannot_be_null.append(col) 

 

self._actually_calculated_columns = [] # a list of all the columns referenced by self.column_by_name 

self.constraint = constraint 

 

268 ↛ 269line 268 didn't jump to line 269, because the condition on line 268 was never true if specFileMap is not None: 

self.specFileMap = specFileMap 

 

self.refIdCol = self.db_obj.getIdColKey() 

 

self._column_cache = {} 

 

# self._column_origins_switch tells column_by_name to log where it is getting 

# the columns in self._column_origins (we only want to do that once) 

self._column_origins_switch = True 

 

# now we will create and populate a list containing the names of 

# all of the columns which this InstanceCatalog can return. 

# Note: this needs to happen before self._check_requirements() 

# is called in case any getters depend on the contents of 

# _all_available_columns. That way, self._check_requirements() 

# can verify that the getter will run the way it is actually 

# being called. 

self._all_available_columns = [] 

 

for name in self.db_obj.columnMap.keys(): 

289 ↛ 288line 289 didn't jump to line 288, because the condition on line 289 was never false if name not in self._all_available_columns: 

self._all_available_columns.append(name) 

 

for name in self._compound_column_names: 

293 ↛ 292line 293 didn't jump to line 292, because the condition on line 293 was never false if name not in self._all_available_columns: 

self._all_available_columns.append(name) 

 

for name in self._compound_columns: 

297 ↛ 296line 297 didn't jump to line 296, because the condition on line 297 was never false if name not in self._all_available_columns: 

self._all_available_columns.append(name) 

 

for name in dir(self): 

if name[:4] == 'get_': 

columnName = name[4:] 

if columnName not in self._all_available_columns: 

self._all_available_columns.append(columnName) 

elif name[:8] == 'default_': 

columnName = name[8:] 

if columnName not in self._all_available_columns: 

self._all_available_columns.append(columnName) 

 

310 ↛ 311line 310 didn't jump to line 311, because the condition on line 310 was never true if not hasattr(self, '_column_outputs'): 

self._column_outputs = [] 

 

# because asking for a compound_column means asking for 

# its individual sub-columns, which means those columns 

# will get listed twice in the catalog 

for name in self._all_available_columns: 

if name not in self._compound_columns: 

self._column_outputs.append(name) 

 

self._check_requirements() 

 

def _set_current_chunk(self, chunk, column_cache=None): 

"""Set the current chunk and clear the column cache""" 

self._current_chunk = chunk 

if column_cache is None: 

self._column_cache = {} 

else: 

self._column_cache = column_cache 

 

def db_required_columns(self): 

"""Get the list of columns required to be in the database object.""" 

saved_cache = self._cached_columns 

saved_chunk = self._current_chunk 

self._set_current_chunk(_MimicRecordArray()) 

 

for col_name in self.iter_column_names(): 

# just call the column: this will log queries to the database. 

self.column_by_name(col_name) 

 

# now do the same thing for columns specified in _cannot_be_null 

# (in case the catalog is filtered on columns that are not meant 

# to be written to the catalog) 

if self._cannot_be_null is not None: 

for col_name in self._cannot_be_null: 

self.column_by_name(col_name) 

 

db_required_columns = list(self._current_chunk.referenced_columns) 

 

default_columns_set = set(el[0] for el in self.default_columns) 

required_columns_set = set(db_required_columns) 

required_columns_with_defaults = default_columns_set & required_columns_set 

 

self._set_current_chunk(saved_chunk, saved_cache) 

 

return db_required_columns, list(required_columns_with_defaults) 

 

def column_by_name(self, column_name, *args, **kwargs): 

"""Given a column name, return the column data""" 

 

if (isinstance(self._current_chunk, _MimicRecordArray) and 

column_name not in self._actually_calculated_columns): 

 

self._actually_calculated_columns.append(column_name) 

 

getfunc = "get_%s" % column_name 

if hasattr(self, getfunc): 

function = getattr(self, getfunc) 

 

if self._column_origins_switch: 

self._column_origins[column_name] = self._get_class_that_defined_method(function) 

 

return function(*args, **kwargs) 

elif column_name in self._compound_column_names: 

getfunc = self._compound_column_names[column_name] 

function = getattr(self, getfunc) 

 

if self._column_origins_switch and column_name: 

self._column_origins[column_name] = self._get_class_that_defined_method(function) 

 

compound_column = function(*args, **kwargs) 

return compound_column[column_name] 

382 ↛ 391line 382 didn't jump to line 391, because the condition on line 382 was never false elif (isinstance(self._current_chunk, _MimicRecordArray) or 

column_name in self._current_chunk.dtype.names): 

 

if self._column_origins_switch: 

self._column_origins[column_name] = 'the database' 

 

return self._current_chunk[column_name] 

else: 

 

if self._column_origins_switch: 

self._column_origins[column_name] = 'default column' 

 

return getattr(self, "default_%s"%column_name)(*args, **kwargs) 

 

def _check_requirements(self): 

"""Check whether the supplied db_obj has the necessary column names""" 

 

missing_cols = [] 

self._active_columns = [] 

cols, defaults = self.db_required_columns() 

 

for col in cols: 

if col not in self.db_obj.columnMap: 

missing_cols.append(col) 

else: 

self._active_columns.append(col) 

 

self._column_origins_switch = False # do not want to log column origins any more 

 

if len(missing_cols) > 0: 

nodefault = [] 

for col in missing_cols: 

if col not in defaults: 

nodefault.append(col) 

else: 

# Because some earlier part of the code copies default columns 

# into the same place as columns that exist natively in the 

# database, this is where we have to mark columns that are 

# set by default 

self._column_origins[col] = 'default column' 

 

if len(nodefault) > 0: 

raise ValueError("Required columns missing from database: " 

"({0})".format(', '.join(nodefault))) 

 

427 ↛ 428line 427 didn't jump to line 428, because the condition on line 427 was never true if self.verbose: 

self.print_column_origins() 

 

def _make_line_template(self, chunk_cols): 

templ_list = [] 

for i, col in enumerate(self.iter_column_names()): 

templ = self.override_formats.get(col, None) 

 

435 ↛ 439line 435 didn't jump to line 439, because the condition on line 435 was never false if templ is None: 

typ = chunk_cols[i].dtype.kind 

templ = self.default_formats.get(typ) 

 

if templ is None: 

440 ↛ 441line 440 didn't jump to line 441, because the condition on line 440 was never true if self.verbose: 

warnings.warn("Using raw formatting for column '%s' " 

"with type %s" % (col, chunk_cols[i].dtype)) 

templ = "%s" 

templ_list.append(templ) 

 

return self.delimiter.join(templ_list) + self.endline 

 

def write_header(self, file_handle): 

column_names = list(self.iter_column_names()) 

templ = [self.comment_char, ] 

templ += ["%s" for col in column_names] 

file_handle.write("{0}".format(self.comment_char + self.delimiter.join(column_names)) + 

self.endline) 

 

def write_catalog(self, filename, chunk_size=None, 

write_header=True, write_mode='w'): 

""" 

Write query self.db_obj and write the resulting InstanceCatalog to 

an ASCII output file 

 

@param [in] filename is the name of the ASCII file to be written 

 

@param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog 

to query the database in manageable chunks (in case returning the whole catalog 

takes too much memory) 

 

@param [in] write_header a boolean specifying whether or not to add a header 

to the output catalog (default True) 

 

@param [in] write_mode is 'w' if you want to overwrite the output file or 

'a' if you want to append to an existing output file (default: 'w') 

""" 

 

self._write_pre_process() 

 

self._query_and_write(filename, chunk_size=chunk_size, 

write_header=write_header, 

write_mode=write_mode, 

obs_metadata=self.obs_metadata, 

constraint=self.constraint) 

 

def _query_and_write(self, filename, chunk_size=None, write_header=True, 

write_mode='w', obs_metadata=None, constraint=None): 

""" 

This method queries db_obj, and then writes the resulting recarray 

to the specified ASCII output file. 

 

@param [in] filename is the name of the ASCII file to be written 

 

@param [in] obs_metadata is an ObservationMetaData instantiation 

characterizing the telescope pointing (optional) 

 

@param [in] constraint is an optional SQL constraint applied to the database query. 

 

@param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog 

to query the database in manageable chunks (in case returning the whole catalog 

takes too much memory) 

 

@param [in] write_header a boolean specifying whether or not to add a header 

to the output catalog (default True) 

 

@param [in] write_mode is 'w' if you want to overwrite the output file or 

'a' if you want to append to an existing output file (default: 'w') 

""" 

 

with open(filename, write_mode) as file_handle: 

507 ↛ 510line 507 didn't jump to line 510, because the condition on line 507 was never false if write_header: 

self.write_header(file_handle) 

 

query_result = self.db_obj.query_columns(colnames=self._active_columns, 

obs_metadata=obs_metadata, 

constraint=constraint, 

chunk_size=chunk_size) 

 

for chunk in query_result: 

self._write_recarray(chunk, file_handle) 

 

def _write_pre_process(self): 

""" 

This function verifies the catalog's required columns, initializes 

some member variables that are required for the catalog-writing process. 

""" 

db_required_columns, required_columns_with_defaults = self.db_required_columns() 

self._template = None 

 

def _update_current_chunk(self, good_dexes): 

""" 

Update self._current_chunk and self._column_cache to only include the rows 

specified by good_dexes (which will be a list of indexes). 

""" 

# In the event that self._column_cache has already been created, 

# update the cache so that only valid rows remain therein 

new_cache = {} 

if len(self._column_cache) > 0: 

for col_name in self._column_cache: 

536 ↛ 540line 536 didn't jump to line 540, because the condition on line 536 was never true if col_name in self._compound_column_names: 

# this is a sub-column of a compound column; 

# ignore it, we will update the cache when we come 

# to the compound column 

continue 

elif 'get_'+col_name in self._compound_columns: 

super_col = self._column_cache[col_name] 

new_cache[col_name] = OrderedDict([(key, super_col[key][good_dexes]) for key in super_col]) 

else: 

new_cache[col_name] = self._column_cache[col_name][good_dexes] 

 

self._set_current_chunk(self._current_chunk[good_dexes], column_cache=new_cache) 

 

def _filter_chunk(self, chunk): 

""" 

Take a chunk of database rows and select only those that match the criteria 

set by self._cannot_be_null. Set self._current_chunk to be the rows that pass 

this test. Return a numpy array of the indices of those rows relative to 

the original chunk. 

""" 

final_dexes = np.arange(len(chunk), dtype=int) 

 

if self._pre_screen and self._cannot_be_null is not None: 

# go through the database query results and remove all of those 

# rows that have already run afoul of self._cannot_be_null 

for col_name in self._cannot_be_null: 

if col_name in chunk.dtype.names: 

str_vec = np.char.lower(chunk[col_name].astype('str')) 

good_dexes = np.where(np.logical_and(str_vec != 'none', 

np.logical_and(str_vec != 'nan', str_vec != 'null'))) 

chunk = chunk[good_dexes] 

final_dexes = final_dexes[good_dexes] 

 

self._set_current_chunk(chunk) 

 

# If some columns are specified as cannot_be_null, loop over those columns, 

# removing rows that run afoul of that criterion from the chunk. 

if self._cannot_be_null is not None: 

for filter_col in self._cannot_be_null: 

filter_vals = np.char.lower(self.column_by_name(filter_col).astype('str')) 

 

good_dexes = np.where(np.logical_and(filter_vals != 'none', 

np.logical_and(filter_vals != 'nan', filter_vals != 'null'))) 

 

final_dexes = final_dexes[good_dexes] 

 

if len(good_dexes[0]) < len(chunk): 

self._update_current_chunk(good_dexes) 

 

return final_dexes 

 

def _write_current_chunk(self, file_handle): 

""" 

write self._current_chunk to the file specified by file_handle 

""" 

if len(self._current_chunk) is 0: 

return 

 

list_of_transform_keys = list(self.transformations.keys()) 

 

chunk_cols = [self.transformations[col](self.column_by_name(col)) 

if col in list_of_transform_keys else 

self.column_by_name(col) 

for col in self.iter_column_names()] 

 

# Create the template with the first chunk 

if self._template is None: 

self._template = self._make_line_template(chunk_cols) 

 

# use a generator expression for lines rather than a list 

# for memory efficiency 

file_handle.writelines(self._template % line for line in zip(*chunk_cols)) 

 

def _write_recarray(self, chunk, file_handle): 

""" 

This method takes a recarray (usually returned by querying db_obj), 

and writes it to the catalog. This method also handles any transformation 

of columns that needs to happen before they are written to the catalog. 

 

@param [in] chunk is the recarray of queried columns to be formatted 

and written to the catalog. 

 

@param [in] file_handle is a file handle pointing to the file where 

the catalog is being written. 

""" 

self._filter_chunk(chunk) 

self._write_current_chunk(file_handle) 

 

def iter_catalog(self, chunk_size=None): 

""" 

Iterate over the lines of a catalog. 

 

chunk_size controls the number of rows returned at a 

time from the database (smaller chunk_size will result 

in less memory usage but slower performance). 

 

Catalog rows will be returned as lists. 

""" 

self.db_required_columns() 

 

query_result = self.db_obj.query_columns(colnames=self._active_columns, 

obs_metadata=self.obs_metadata, 

constraint=self.constraint, 

chunk_size=chunk_size) 

 

list_of_transform_keys = list(self.transformations.keys()) 

 

for chunk in query_result: 

self._filter_chunk(chunk) 

chunk_cols = [self.transformations[col](self.column_by_name(col)) 

if col in list_of_transform_keys else 

self.column_by_name(col) 

for col in self.iter_column_names()] 

for line in zip(*chunk_cols): 

yield line 

 

def iter_catalog_chunks(self, chunk_size=None): 

""" 

Iterate over catalog contents one chunk at a time. 

 

chunk_size controls the number of catalog rows contained 

in each chunk. 

 

The iterator will return a chunk of the database (a list of lists 

containing the contents of the datbase chunk). The first dimension 

of the chunk corresponds to the columns of the catalog, i.e. chunk[0] 

is a list containing the 0th column of the catalog. 

 

The iterator will also yield a colMap, which is a dict mapping the 

names of the columns to their index value in the chunk. 

 

Usage: 

 

for chunk, colMap in cat.iter_catalog_chunks(chunk_size=1000): 

for ix in range(len(chunk[0])): 

print chunk[0][ix], chunk[1][ix], chunk[2][ix] 

 

will print out the first three columns of the catalog, row by row 

""" 

self.db_required_columns() 

 

query_result = self.db_obj.query_columns(colnames=self._active_columns, 

obs_metadata=self.obs_metadata, 

constraint=self.constraint, 

chunk_size=chunk_size) 

 

list_of_transform_keys = list(self.transformations.keys()) 

 

for chunk in query_result: 

self._filter_chunk(chunk) 

chunk_cols = [self.transformations[col](self.column_by_name(col)) 

if col in list_of_transform_keys else 

self.column_by_name(col) 

for col in self.iter_column_names()] 

chunkColMap = dict([(col, i) for i, col in enumerate(self.iter_column_names())]) 

yield chunk_cols, chunkColMap 

 

def get_objId(self): 

return self.column_by_name(self.refIdCol) 

 

def get_uniqueId(self, nShift=10): 

arr = self.column_by_name(self.refIdCol) 

if len(arr) > 0: 

return np.left_shift(self.column_by_name(self.refIdCol), nShift) + \ 

self.db_obj.getObjectTypeId() 

else: 

return arr 

 

def _get_class_that_defined_method(self, meth): 

""" 

This method will return the name of the class that first defined the 

input method. 

 

This is taken verbatim from 

http://stackoverflow.com/questions/961048/get-class-that-defined-method 

""" 

 

713 ↛ 717line 713 didn't jump to line 717, because the loop on line 713 didn't complete for cls in inspect.getmro(meth.__self__.__class__): 

if meth.__name__ in cls.__dict__: 

return cls 

 

return None 

 

def print_column_origins(self): 

""" 

Print the origins of the columns in this catalog 

""" 

 

print('\nwhere the columns in ', self.__class__, ' come from') 

for column_name in self._column_origins: 

print(column_name, self._column_origins[column_name]) 

 

print('\n')