Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from __future__ import print_function 

2import copy 

3 

4 

5__all__ = ["parallelCatalogWriter"] 

6 

7 

8def parallelCatalogWriter(catalog_dict, chunk_size=None, constraint=None, 

9 write_mode='w', write_header=True): 

10 """ 

11 This method will take several InstanceCatalog classes that are meant 

12 to be based on the same CatalogDBObject and write them out in parallel 

13 from a single database query. The imagined use-case is simultaneously 

14 writing out a PhoSim InstanceCatalog as well as the truth catalog with 

15 the pre-calculated positions and magnitudes of the sources. 

16 

17 Parameters 

18 ---------- 

19 catalog_dict is a dict keyed on the names of the files to be written. 

20 The values are the InstanceCatalogs to be written. These are full 

21 instantiations of InstanceCatalogs, not just InstanceCatalog classes 

22 as with the CompoundInstanceCatalog. They cannot be CompoundInstanceCatalogs 

23 

24 constraint is an optional SQL constraint to be applied to the database query. 

25 Note: constraints applied to individual catalogs will be ignored. 

26 

27 chunk_size is an int which optionally specifies the number of rows to be 

28 returned from db_obj at a time 

29 

30 write_mode is either 'w' (write) or 'a' (append), determining whether or 

31 not the writer will overwrite existing catalog files (assuming they exist) 

32 

33 write_header is a boolean that controls whether or not to write the header 

34 in the catalogs. 

35 

36 Output 

37 ------ 

38 This method does not return anything, it just writes the files that are the 

39 keys of catalog_dict 

40 """ 

41 

42 list_of_file_names = list(catalog_dict.keys()) 

43 ref_cat = catalog_dict[list_of_file_names[0]] 

44 for ix, file_name in enumerate(list_of_file_names): 

45 if ix>0: 

46 cat = catalog_dict[file_name] 

47 try: 

48 assert cat.obs_metadata == ref_cat.obs_metadata 

49 except: 

50 print(cat.obs_metadata) 

51 print(ref_cat.obs_metadata) 

52 raise RuntimeError('Catalogs passed to parallelCatalogWriter have different ' 

53 'ObservationMetaData. I do not know how to deal with that.') 

54 

55 try: 

56 assert cat.db_obj.connection == ref_cat.db_obj.connection 

57 except: 

58 msg = ('Cannot build these catalogs in parallel. ' 

59 'The two databases are different. Connection info is:\n' 

60 'database: %s vs. %s\n' % (cat.db_obj.connection.database, ref_cat.db_obj.database) 

61 + 'host: %s vs. %s\n' % (cat.db_obj.connection.host, ref_cat.db_obj.connection.host) 

62 + 'port: %s vs. %s\n' % (cat.db_obj.connection.port, ref_cat.db_obj.connection.port) 

63 + 'driver: %s vs. %s\n' % (cat.db_obj.connection.driver, ref_cat.db_obj.connection.driver) 

64 + 'table: %s vs. %s\n' % (cat.db_obj.tableid, ref_cat.db_obj.tableid) 

65 + 'objid: %s vs. %s\n' % (cat.db_obj.objid, ref_cat.db_obj.objid)) 

66 

67 raise RuntimeError(msg) 

68 

69 for file_name in list_of_file_names: 

70 cat = catalog_dict[file_name] 

71 cat._write_pre_process() 

72 

73 active_columns = None 

74 for file_name in catalog_dict: 

75 cat = catalog_dict[file_name] 

76 if active_columns is None: 

77 active_columns = copy.deepcopy(cat._active_columns) 

78 else: 

79 for col_name in cat._active_columns: 

80 if col_name not in active_columns: 

81 active_columns.append(col_name) 

82 

83 query_result = ref_cat.db_obj.query_columns(colnames=active_columns, 

84 obs_metadata=ref_cat.obs_metadata, 

85 constraint=constraint, 

86 chunk_size=chunk_size) 

87 local_write_mode = write_mode 

88 if write_header: 

89 for file_name in catalog_dict: 

90 with open(file_name, local_write_mode) as file_handle: 

91 catalog_dict[file_name].write_header(file_handle) 

92 local_write_mode = 'a' 

93 

94 for master_chunk in query_result: 

95 

96 for i_file, file_name in enumerate(list_of_file_names): 

97 chunk = master_chunk 

98 cat = catalog_dict[file_name] 

99 good_dexes = cat._filter_chunk(chunk) 

100 if len(good_dexes) < len(chunk): 

101 chunk = chunk[good_dexes] 

102 

103 with open(file_name, local_write_mode) as file_handle: 

104 catalog_dict[file_name]._write_current_chunk(file_handle) 

105 

106 local_write_mode = 'a'