Coverage for python/lsst/sims/catalogs/definitions/ParallelCatalogWriter.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import print_function
2import copy
5__all__ = ["parallelCatalogWriter"]
8def parallelCatalogWriter(catalog_dict, chunk_size=None, constraint=None,
9 write_mode='w', write_header=True):
10 """
11 This method will take several InstanceCatalog classes that are meant
12 to be based on the same CatalogDBObject and write them out in parallel
13 from a single database query. The imagined use-case is simultaneously
14 writing out a PhoSim InstanceCatalog as well as the truth catalog with
15 the pre-calculated positions and magnitudes of the sources.
17 Parameters
18 ----------
19 catalog_dict is a dict keyed on the names of the files to be written.
20 The values are the InstanceCatalogs to be written. These are full
21 instantiations of InstanceCatalogs, not just InstanceCatalog classes
22 as with the CompoundInstanceCatalog. They cannot be CompoundInstanceCatalogs
24 constraint is an optional SQL constraint to be applied to the database query.
25 Note: constraints applied to individual catalogs will be ignored.
27 chunk_size is an int which optionally specifies the number of rows to be
28 returned from db_obj at a time
30 write_mode is either 'w' (write) or 'a' (append), determining whether or
31 not the writer will overwrite existing catalog files (assuming they exist)
33 write_header is a boolean that controls whether or not to write the header
34 in the catalogs.
36 Output
37 ------
38 This method does not return anything, it just writes the files that are the
39 keys of catalog_dict
40 """
42 list_of_file_names = list(catalog_dict.keys())
43 ref_cat = catalog_dict[list_of_file_names[0]]
44 for ix, file_name in enumerate(list_of_file_names):
45 if ix>0:
46 cat = catalog_dict[file_name]
47 try:
48 assert cat.obs_metadata == ref_cat.obs_metadata
49 except:
50 print(cat.obs_metadata)
51 print(ref_cat.obs_metadata)
52 raise RuntimeError('Catalogs passed to parallelCatalogWriter have different '
53 'ObservationMetaData. I do not know how to deal with that.')
55 try:
56 assert cat.db_obj.connection == ref_cat.db_obj.connection
57 except:
58 msg = ('Cannot build these catalogs in parallel. '
59 'The two databases are different. Connection info is:\n'
60 'database: %s vs. %s\n' % (cat.db_obj.connection.database, ref_cat.db_obj.database)
61 + 'host: %s vs. %s\n' % (cat.db_obj.connection.host, ref_cat.db_obj.connection.host)
62 + 'port: %s vs. %s\n' % (cat.db_obj.connection.port, ref_cat.db_obj.connection.port)
63 + 'driver: %s vs. %s\n' % (cat.db_obj.connection.driver, ref_cat.db_obj.connection.driver)
64 + 'table: %s vs. %s\n' % (cat.db_obj.tableid, ref_cat.db_obj.tableid)
65 + 'objid: %s vs. %s\n' % (cat.db_obj.objid, ref_cat.db_obj.objid))
67 raise RuntimeError(msg)
69 for file_name in list_of_file_names:
70 cat = catalog_dict[file_name]
71 cat._write_pre_process()
73 active_columns = None
74 for file_name in catalog_dict:
75 cat = catalog_dict[file_name]
76 if active_columns is None:
77 active_columns = copy.deepcopy(cat._active_columns)
78 else:
79 for col_name in cat._active_columns:
80 if col_name not in active_columns:
81 active_columns.append(col_name)
83 query_result = ref_cat.db_obj.query_columns(colnames=active_columns,
84 obs_metadata=ref_cat.obs_metadata,
85 constraint=constraint,
86 chunk_size=chunk_size)
87 local_write_mode = write_mode
88 if write_header:
89 for file_name in catalog_dict:
90 with open(file_name, local_write_mode) as file_handle:
91 catalog_dict[file_name].write_header(file_handle)
92 local_write_mode = 'a'
94 for master_chunk in query_result:
96 for i_file, file_name in enumerate(list_of_file_names):
97 chunk = master_chunk
98 cat = catalog_dict[file_name]
99 good_dexes = cat._filter_chunk(chunk)
100 if len(good_dexes) < len(chunk):
101 chunk = chunk[good_dexes]
103 with open(file_name, local_write_mode) as file_handle:
104 catalog_dict[file_name]._write_current_chunk(file_handle)
106 local_write_mode = 'a'