Coverage for python/lsst/sims/catalogs/definitions/InstanceCatalog.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Instance Catalog"""
2from __future__ import print_function
3from builtins import zip
4from builtins import str
5from builtins import range
6from builtins import object
7import warnings
8import numpy as np
9import inspect
10import re
11import copy
12from collections import OrderedDict
13from lsst.sims.utils import defaultSpecMap
14from lsst.sims.utils import ObservationMetaData
15from future.utils import with_metaclass
17__all__ = ["InstanceCatalog"]
20class InstanceCatalogMeta(type):
21 """Meta class for registering instance catalogs.
23 When any new type of instance catalog class is created, this registers it
24 in a `registry` class attribute, available to all derived instance
25 catalogs.
26 """
27 @staticmethod
28 def convert_to_underscores(name):
29 """convert, e.g. CatalogName to catalog_name"""
30 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
31 return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
33 def __new__(cls, name, bases, dct):
34 # check if attribute catalog_type is specified.
35 # If not, create a default
36 if 'registry' in dct: 36 ↛ 37line 36 didn't jump to line 37, because the condition on line 36 was never true
37 warnings.warn("registry class attribute should not be "
38 "over-ridden in InstanceCatalog classes. "
39 "Proceed with caution")
40 if 'catalog_type' not in dct:
41 dct['catalog_type'] = cls.convert_to_underscores(name)
43 dct['_cached_columns'] = {}
44 dct['_compound_columns'] = {}
45 dct['_compound_column_names'] = {}
47 return super(InstanceCatalogMeta, cls).__new__(cls, name, bases, dct)
49 def __init__(cls, name, bases, dct):
50 # check if 'registry' is specified.
51 # if not, then this is the base class: add the registry
52 if not hasattr(cls, 'registry'):
53 cls.registry = {}
55 # add this class to the registry
56 if cls.catalog_type in cls.registry: 56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true
57 raise ValueError("Catalog Type %s is duplicated"
58 % cls.catalog_type)
59 cls.registry[cls.catalog_type] = cls
61 # add methods for default columns
62 for default in cls.default_columns:
63 setattr(cls, 'default_%s'%(default[0]), 63 ↛ exitline 63 didn't jump to the function exit
64 lambda self, value=default[1], type=default[2]:
65 np.array([value for i in range(len(self._current_chunk))], dtype=type))
67 # store compound columns and check for collisions
68 #
69 # We create a forward and backward mapping.
70 # The dictionary cls._compound_columns maps the compound column
71 # name to the multiple individual columns it represents.
72 # The dictionary cls._compound_column_names maps the individual
73 # column names to the compound column that contains them
74 for key in dir(cls):
75 if not key.startswith('get_'):
76 continue
77 compound_getter = getattr(cls, key)
78 if not hasattr(compound_getter, '_compound_column'):
79 continue
81 for col in compound_getter._colnames:
82 try:
83 getter = 'get_'+col
84 except TypeError:
85 raise ValueError("column names in compound "
86 "decorator must be strings")
88 if hasattr(cls, getter): 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true
89 raise ValueError("column name '%s' in compound getter "
90 "'%s' conflicts with getter '%s'"
91 % (col, key, getter))
93 elif col in cls._compound_column_names: 93 ↛ 94line 93 didn't jump to line 94, because the condition on line 93 was never true
94 raise ValueError("duplicate compound column name: '%s'"
95 % col)
97 else:
98 cls._compound_column_names[col] = key
99 cls._compound_columns[key] = compound_getter._colnames
101 return super(InstanceCatalogMeta, cls).__init__(name, bases, dct)
104class _MimicRecordArray(object):
105 """An object used for introspection of the database colums.
107 This mimics a numpy record array, but when a column is referenced,
108 it logs the reference and returns zeros.
109 """
110 def __init__(self):
111 self.referenced_columns = set()
113 def __getitem__(self, column):
114 self.referenced_columns.add(column)
115 return np.empty(0)
117 def __len__(self):
118 return 0
121class InstanceCatalog(with_metaclass(InstanceCatalogMeta, object)):
122 """ Base class for instance catalogs generated by simulations.
124 Instance catalogs include a dictionary of numpy arrays which contains
125 core data. Additional arrays can be appended as ancillary data.
127 Catalog types and Object types are defined in the CatalogDescription class
128 catalogType = TRIM, SCIENCE, PHOTCAL, DIASOURCE, MISC, INVALID
129 objectType = Point, Moving, Sersic, Image, Artefact, MISC
130 catalogTable is name of the database table queried
131 dataArray dictionary of numpy arrays of data
132 """
134 # These are the class attributes to be specified in any derived class:
135 catalog_type = 'instance_catalog'
136 column_outputs = None
137 specFileMap = defaultSpecMap
138 default_columns = []
139 cannot_be_null = None # will be a list of columns which, if null, cause a row not to be printed by write_catalog()
140 # Note: these columns will be filtered on even if they are not included in column_outputs
142 default_formats = {'S': '%s', 'f': '%.4f', 'i': '%i'}
143 override_formats = {}
144 transformations = {}
145 delimiter = ", "
146 comment_char = "#"
147 endline = "\n"
148 _pre_screen = False # if true, write_catalog() will check database query results against
149 # cannot_be_null before calculating getter columns
151 @classmethod
152 def new_catalog(cls, catalog_type, *args, **kwargs):
153 """Return a new catalog of the given catalog type"""
154 if catalog_type in cls.registry:
155 return cls.registry[catalog_type](*args, **kwargs)
156 elif inspect.isclass(catalog_type) and issubclass(catalog_type, InstanceCatalog):
157 return catalog_type(*args, **kwargs)
158 else:
159 raise ValueError("Unrecognized catalog_type: %s"
160 % str(catalog_type))
162 @classmethod
163 def is_compound_column(cls, column_name):
164 """Return true if the given column name is a compound column"""
165 getfunc = "get_%s" % column_name
166 if hasattr(cls, getfunc):
167 if hasattr(getattr(cls, getfunc), '_compound_column'):
168 return True
169 return False
171 def iter_column_names(self):
172 """Iterate the column names, expanding any compound columns"""
174 for column in self._column_outputs:
175 if self.is_compound_column(column):
176 for col in getattr(getattr(self, "get_" + column), '_colnames'):
177 yield col
178 else:
179 yield column
181 def __init__(self, db_obj, obs_metadata=None, column_outputs=None,
182 constraint=None, specFileMap=None, cannot_be_null=None):
184 """
185 @param [in] db_obj is an instantiation of the CatalogDBObject class,
186 which provide connection to a specific database table
188 see sims_catalogs_generation/python/lsst/sims/catalogs/generation/db/dbConnection.py
190 @param [in] obs_metadata is an instantiation of the ObservationMetaData class
191 characterizing a specific telescope observation
193 see sims_catalogs_generation/python/lsst/sims/catalogs/generation/db/ObservationMetaData.py
195 @param [in] column_outputs is a list of column names to be output
196 in the catalog. This is optional and will be appended to the list
197 of column_outputs defined int he class definition.
199 @param [in] cannot_be_null is a list of column names indicating columns
200 which cannot have the values Null, None, or NaN. Rows running afoul
201 of this criterion will not be written by the write_catalog() method
202 (though they may appear in the iterator returned by iter_catalog()).
203 Note: these columns will be filtered on, even if they do not appear in
204 column_outputs.
206 @param [in] constraint is an optional SQL constraint to be applied to the
207 database query
209 @param [in] specFileMap is an instantiation of the SpecMap class
211 (defined in sims_catalogs_measures/python/sims/catalogs/measures/instance/fileMaps.py)
213 that maps database entries for SED names to actual file paths. If set to None,
214 the class definition of InstanceCatalog ensures that it will be set to
215 defaultSpecMap, which is the correct mapping for the LSST sims_sed_library
216 """
218 self.verbose = db_obj.verbose
220 self.db_obj = db_obj
221 self._current_chunk = None
223 # this dict will contain information telling the user where the columns in
224 # the catalog come from
225 self._column_origins = {}
227 if obs_metadata is not None:
228 if not isinstance(obs_metadata, ObservationMetaData):
229 raise ValueError("You passed InstanceCatalog something that was not ObservationMetaData")
231 self.obs_metadata = copy.deepcopy(obs_metadata)
232 else:
233 self.obs_metadata = ObservationMetaData()
235 if self.column_outputs is not None:
236 self._column_outputs = copy.deepcopy(self.column_outputs)
238 if column_outputs is not None:
239 if self.column_outputs is None:
240 self._column_outputs = copy.deepcopy(column_outputs)
241 else:
242 for col in column_outputs:
243 if col not in self._column_outputs:
244 self._column_outputs.append(col)
246 # Because cannot_be_null can both be declared at class definition
247 # and at instantiation, we need to be able to combine the two inputs
248 # into something the InstanceCatalog will actually use to filter
249 # rows. self._cannot_be_null is a member variable that contains
250 # the contents both of self.cannot_be_null (set at class definition)
251 # and the cannot_be_null kwarg passed to __init__(). self._cannot_be_null
252 # is what the catalog actually uses in self._filter_chunk
253 self._cannot_be_null = None
254 if self.cannot_be_null is not None:
255 self._cannot_be_null = copy.deepcopy(self.cannot_be_null)
257 if cannot_be_null is not None:
258 if self.cannot_be_null is None:
259 self._cannot_be_null = copy.deepcopy(cannot_be_null)
260 else:
261 for col in cannot_be_null:
262 if col not in self._cannot_be_null:
263 self._cannot_be_null.append(col)
265 self._actually_calculated_columns = [] # a list of all the columns referenced by self.column_by_name
266 self.constraint = constraint
268 if specFileMap is not None:
269 self.specFileMap = specFileMap
271 self.refIdCol = self.db_obj.getIdColKey()
273 self._column_cache = {}
275 # self._column_origins_switch tells column_by_name to log where it is getting
276 # the columns in self._column_origins (we only want to do that once)
277 self._column_origins_switch = True
279 # now we will create and populate a list containing the names of
280 # all of the columns which this InstanceCatalog can return.
281 # Note: this needs to happen before self._check_requirements()
282 # is called in case any getters depend on the contents of
283 # _all_available_columns. That way, self._check_requirements()
284 # can verify that the getter will run the way it is actually
285 # being called.
286 self._all_available_columns = []
288 for name in self.db_obj.columnMap.keys():
289 if name not in self._all_available_columns:
290 self._all_available_columns.append(name)
292 for name in self._compound_column_names:
293 if name not in self._all_available_columns:
294 self._all_available_columns.append(name)
296 for name in self._compound_columns:
297 if name not in self._all_available_columns:
298 self._all_available_columns.append(name)
300 for name in dir(self):
301 if name[:4] == 'get_':
302 columnName = name[4:]
303 if columnName not in self._all_available_columns:
304 self._all_available_columns.append(columnName)
305 elif name[:8] == 'default_':
306 columnName = name[8:]
307 if columnName not in self._all_available_columns:
308 self._all_available_columns.append(columnName)
310 if not hasattr(self, '_column_outputs'):
311 self._column_outputs = []
313 # because asking for a compound_column means asking for
314 # its individual sub-columns, which means those columns
315 # will get listed twice in the catalog
316 for name in self._all_available_columns:
317 if name not in self._compound_columns:
318 self._column_outputs.append(name)
320 self._check_requirements()
322 def _set_current_chunk(self, chunk, column_cache=None):
323 """Set the current chunk and clear the column cache"""
324 self._current_chunk = chunk
325 if column_cache is None:
326 self._column_cache = {}
327 else:
328 self._column_cache = column_cache
330 def _delete_current_chunk(self):
331 """
332 Set the column cache and _current_chunk to None.
333 This is just going to be called by the
334 CompoundInstanceCatalog._write_compound method to try to control
335 memory bloat as multiple copies of the returned database query
336 accumulate in the different InstanceCatalogs being written.
337 """
338 self._column_cache = {}
339 self._current_chunk = None
341 def db_required_columns(self):
342 """Get the list of columns required to be in the database object."""
343 saved_cache = self._cached_columns
344 saved_chunk = self._current_chunk
345 self._set_current_chunk(_MimicRecordArray())
347 for col_name in self.iter_column_names():
348 # just call the column: this will log queries to the database.
349 self.column_by_name(col_name)
351 # now do the same thing for columns specified in _cannot_be_null
352 # (in case the catalog is filtered on columns that are not meant
353 # to be written to the catalog)
354 if self._cannot_be_null is not None:
355 for col_name in self._cannot_be_null:
356 self.column_by_name(col_name)
358 db_required_columns = list(self._current_chunk.referenced_columns)
360 default_columns_set = set(el[0] for el in self.default_columns)
361 required_columns_set = set(db_required_columns)
362 required_columns_with_defaults = default_columns_set & required_columns_set
364 self._set_current_chunk(saved_chunk, saved_cache)
366 return db_required_columns, list(required_columns_with_defaults)
368 def column_by_name(self, column_name, *args, **kwargs):
369 """Given a column name, return the column data"""
371 if (isinstance(self._current_chunk, _MimicRecordArray) and
372 column_name not in self._actually_calculated_columns):
374 self._actually_calculated_columns.append(column_name)
376 getfunc = "get_%s" % column_name
377 if hasattr(self, getfunc):
378 function = getattr(self, getfunc)
380 if self._column_origins_switch:
381 self._column_origins[column_name] = self._get_class_that_defined_method(function)
383 return function(*args, **kwargs)
384 elif column_name in self._compound_column_names:
385 getfunc = self._compound_column_names[column_name]
386 function = getattr(self, getfunc)
388 if self._column_origins_switch and column_name:
389 self._column_origins[column_name] = self._get_class_that_defined_method(function)
391 compound_column = function(*args, **kwargs)
392 return compound_column[column_name]
393 elif (isinstance(self._current_chunk, _MimicRecordArray) or
394 column_name in self._current_chunk.dtype.names):
396 if self._column_origins_switch:
397 self._column_origins[column_name] = 'the database'
399 return self._current_chunk[column_name]
400 else:
402 if self._column_origins_switch:
403 self._column_origins[column_name] = 'default column'
405 return getattr(self, "default_%s"%column_name)(*args, **kwargs)
407 def _check_requirements(self):
408 """Check whether the supplied db_obj has the necessary column names"""
410 missing_cols = []
411 self._active_columns = []
412 cols, defaults = self.db_required_columns()
414 for col in cols:
415 if col not in self.db_obj.columnMap:
416 missing_cols.append(col)
417 else:
418 self._active_columns.append(col)
420 self._column_origins_switch = False # do not want to log column origins any more
422 if len(missing_cols) > 0:
423 nodefault = []
424 for col in missing_cols:
425 if col not in defaults:
426 nodefault.append(col)
427 else:
428 # Because some earlier part of the code copies default columns
429 # into the same place as columns that exist natively in the
430 # database, this is where we have to mark columns that are
431 # set by default
432 self._column_origins[col] = 'default column'
434 if len(nodefault) > 0:
435 raise ValueError("Required columns missing from database: "
436 "({0})".format(', '.join(nodefault)))
438 if self.verbose:
439 self.print_column_origins()
441 def _make_line_template(self, chunk_cols):
442 templ_list = []
443 for i, col in enumerate(self.iter_column_names()):
444 templ = self.override_formats.get(col, None)
446 if templ is None:
447 typ = chunk_cols[i].dtype.kind
448 templ = self.default_formats.get(typ)
450 if templ is None:
451 if self.verbose:
452 warnings.warn("Using raw formatting for column '%s' "
453 "with type %s" % (col, chunk_cols[i].dtype))
454 templ = "%s"
455 templ_list.append(templ)
457 return self.delimiter.join(templ_list) + self.endline
459 def write_header(self, file_handle):
460 column_names = list(self.iter_column_names())
461 templ = [self.comment_char, ]
462 templ += ["%s" for col in column_names]
463 file_handle.write("{0}".format(self.comment_char + self.delimiter.join(column_names)) +
464 self.endline)
466 def write_catalog(self, filename, chunk_size=None,
467 write_header=True, write_mode='w'):
468 """
469 Write query self.db_obj and write the resulting InstanceCatalog to
470 an ASCII output file
472 @param [in] filename is the name of the ASCII file to be written
474 @param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog
475 to query the database in manageable chunks (in case returning the whole catalog
476 takes too much memory)
478 @param [in] write_header a boolean specifying whether or not to add a header
479 to the output catalog (default True)
481 @param [in] write_mode is 'w' if you want to overwrite the output file or
482 'a' if you want to append to an existing output file (default: 'w')
483 """
485 self._write_pre_process()
487 self._query_and_write(filename, chunk_size=chunk_size,
488 write_header=write_header,
489 write_mode=write_mode,
490 obs_metadata=self.obs_metadata,
491 constraint=self.constraint)
493 def _query_and_write(self, filename, chunk_size=None, write_header=True,
494 write_mode='w', obs_metadata=None, constraint=None):
495 """
496 This method queries db_obj, and then writes the resulting recarray
497 to the specified ASCII output file.
499 @param [in] filename is the name of the ASCII file to be written
501 @param [in] obs_metadata is an ObservationMetaData instantiation
502 characterizing the telescope pointing (optional)
504 @param [in] constraint is an optional SQL constraint applied to the database query.
506 @param [in] chunk_size is an optional parameter telling the CompoundInstanceCatalog
507 to query the database in manageable chunks (in case returning the whole catalog
508 takes too much memory)
510 @param [in] write_header a boolean specifying whether or not to add a header
511 to the output catalog (default True)
513 @param [in] write_mode is 'w' if you want to overwrite the output file or
514 'a' if you want to append to an existing output file (default: 'w')
515 """
517 with open(filename, write_mode) as file_handle:
518 if write_header:
519 self.write_header(file_handle)
521 query_result = self.db_obj.query_columns(colnames=self._active_columns,
522 obs_metadata=obs_metadata,
523 constraint=constraint,
524 chunk_size=chunk_size)
526 for chunk in query_result:
527 self._write_recarray(chunk, file_handle)
529 def _write_pre_process(self):
530 """
531 This function verifies the catalog's required columns, initializes
532 some member variables that are required for the catalog-writing process.
533 """
534 db_required_columns, required_columns_with_defaults = self.db_required_columns()
535 self._template = None
537 def _update_current_chunk(self, good_dexes):
538 """
539 Update self._current_chunk and self._column_cache to only include the rows
540 specified by good_dexes (which will be a list of indexes).
541 """
542 # In the event that self._column_cache has already been created,
543 # update the cache so that only valid rows remain therein
544 new_cache = {}
545 if len(self._column_cache) > 0:
546 for col_name in self._column_cache:
547 if col_name in self._compound_column_names:
548 # this is a sub-column of a compound column;
549 # ignore it, we will update the cache when we come
550 # to the compound column
551 continue
552 elif 'get_'+col_name in self._compound_columns:
553 super_col = self._column_cache[col_name]
554 new_cache[col_name] = OrderedDict([(key, super_col[key][good_dexes]) for key in super_col])
555 else:
556 new_cache[col_name] = self._column_cache[col_name][good_dexes]
558 self._set_current_chunk(self._current_chunk[good_dexes], column_cache=new_cache)
560 def _filter_chunk(self, chunk):
561 """
562 Take a chunk of database rows and select only those that match the criteria
563 set by self._cannot_be_null. Set self._current_chunk to be the rows that pass
564 this test. Return a numpy array of the indices of those rows relative to
565 the original chunk.
566 """
567 final_dexes = np.arange(len(chunk), dtype=int)
569 if self._pre_screen and self._cannot_be_null is not None:
570 # go through the database query results and remove all of those
571 # rows that have already run afoul of self._cannot_be_null
572 for col_name in self._cannot_be_null:
573 if col_name in chunk.dtype.names:
574 if chunk[col_name].dtype == float:
575 good_dexes = np.where(np.isfinite(chunk[col_name]))
576 else:
577 str_vec = np.char.lower(chunk[col_name].astype('str'))
578 good_dexes = np.where(np.logical_and(str_vec != 'none',
579 np.logical_and(str_vec != 'nan', str_vec != 'null')))
580 chunk = chunk[good_dexes]
581 final_dexes = final_dexes[good_dexes]
583 self._set_current_chunk(chunk)
585 # If some columns are specified as cannot_be_null, loop over those columns,
586 # removing rows that run afoul of that criterion from the chunk.
587 if self._cannot_be_null is not None:
588 filter_switch = None
589 for filter_col in self._cannot_be_null:
590 filter_vals = self.column_by_name(filter_col)
591 if filter_vals.dtype == float:
592 local_switch = np.isfinite(filter_vals)
593 else:
594 try:
595 filter_vals = filter_vals.astype(float)
596 local_switch = np.isfinite(filter_vals)
597 except ValueError:
598 filter_vals = np.char.lower(filter_vals.astype('str'))
599 local_switch = np.logical_and(filter_vals != 'none',
600 np.logical_and(filter_vals != 'nan', filter_vals != 'null'))
601 if filter_switch is None:
602 filter_switch = local_switch
603 else:
604 filter_switch &= local_switch
606 good_dexes = np.where(filter_switch)
607 final_dexes = final_dexes[good_dexes]
609 if len(good_dexes[0]) < len(chunk):
610 self._update_current_chunk(good_dexes)
612 return final_dexes
614 def _write_current_chunk(self, file_handle):
615 """
616 write self._current_chunk to the file specified by file_handle
617 """
618 if len(self._current_chunk) is 0:
619 return
621 list_of_transform_keys = list(self.transformations.keys())
623 chunk_cols = [self.transformations[col](self.column_by_name(col))
624 if col in list_of_transform_keys else
625 self.column_by_name(col)
626 for col in self.iter_column_names()]
628 # Create the template with the first chunk
629 if self._template is None:
630 self._template = self._make_line_template(chunk_cols)
632 # use a generator expression for lines rather than a list
633 # for memory efficiency
634 file_handle.writelines(self._template % line for line in zip(*chunk_cols))
636 def _write_recarray(self, chunk, file_handle):
637 """
638 This method takes a recarray (usually returned by querying db_obj),
639 and writes it to the catalog. This method also handles any transformation
640 of columns that needs to happen before they are written to the catalog.
642 @param [in] chunk is the recarray of queried columns to be formatted
643 and written to the catalog.
645 @param [in] file_handle is a file handle pointing to the file where
646 the catalog is being written.
647 """
648 self._filter_chunk(chunk)
649 self._write_current_chunk(file_handle)
651 def iter_catalog(self, chunk_size=None):
652 """
653 Iterate over the lines of a catalog.
655 chunk_size controls the number of rows returned at a
656 time from the database (smaller chunk_size will result
657 in less memory usage but slower performance).
659 Catalog rows will be returned as lists.
660 """
661 self.db_required_columns()
663 query_result = self.db_obj.query_columns(colnames=self._active_columns,
664 obs_metadata=self.obs_metadata,
665 constraint=self.constraint,
666 chunk_size=chunk_size)
668 list_of_transform_keys = list(self.transformations.keys())
670 for chunk in query_result:
671 self._filter_chunk(chunk)
672 chunk_cols = [self.transformations[col](self.column_by_name(col))
673 if col in list_of_transform_keys else
674 self.column_by_name(col)
675 for col in self.iter_column_names()]
676 for line in zip(*chunk_cols):
677 yield line
679 def iter_catalog_chunks(self, chunk_size=None):
680 """
681 Iterate over catalog contents one chunk at a time.
683 chunk_size controls the number of catalog rows contained
684 in each chunk.
686 The iterator will return a chunk of the database (a list of lists
687 containing the contents of the datbase chunk). The first dimension
688 of the chunk corresponds to the columns of the catalog, i.e. chunk[0]
689 is a list containing the 0th column of the catalog.
691 The iterator will also yield a colMap, which is a dict mapping the
692 names of the columns to their index value in the chunk.
694 Usage:
696 for chunk, colMap in cat.iter_catalog_chunks(chunk_size=1000):
697 for ix in range(len(chunk[0])):
698 print chunk[0][ix], chunk[1][ix], chunk[2][ix]
700 will print out the first three columns of the catalog, row by row
701 """
702 self.db_required_columns()
704 query_result = self.db_obj.query_columns(colnames=self._active_columns,
705 obs_metadata=self.obs_metadata,
706 constraint=self.constraint,
707 chunk_size=chunk_size)
709 list_of_transform_keys = list(self.transformations.keys())
711 for chunk in query_result:
712 self._filter_chunk(chunk)
713 chunk_cols = [self.transformations[col](self.column_by_name(col))
714 if col in list_of_transform_keys else
715 self.column_by_name(col)
716 for col in self.iter_column_names()]
717 chunkColMap = dict([(col, i) for i, col in enumerate(self.iter_column_names())])
718 yield chunk_cols, chunkColMap
720 def get_objId(self):
721 return self.column_by_name(self.refIdCol)
723 def get_uniqueId(self, nShift=10):
724 arr = self.column_by_name(self.refIdCol)
725 if len(arr) > 0:
726 return np.left_shift(self.column_by_name(self.refIdCol), nShift) + \
727 self.db_obj.getObjectTypeId()
728 else:
729 return arr
731 def _get_class_that_defined_method(self, meth):
732 """
733 This method will return the name of the class that first defined the
734 input method.
736 This is taken verbatim from
737 http://stackoverflow.com/questions/961048/get-class-that-defined-method
738 """
740 for cls in inspect.getmro(meth.__self__.__class__):
741 if meth.__name__ in cls.__dict__:
742 return cls
744 return None
746 def print_column_origins(self):
747 """
748 Print the origins of the columns in this catalog
749 """
751 print('\nwhere the columns in ', self.__class__, ' come from')
752 for column_name in self._column_origins:
753 print(column_name, self._column_origins[column_name])
755 print('\n')