Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

from __future__ import print_function 

import copy 

 

 

__all__ = ["parallelCatalogWriter"] 

 

 

def parallelCatalogWriter(catalog_dict, chunk_size=None, constraint=None, 

write_mode='w', write_header=True): 

""" 

This method will take several InstanceCatalog classes that are meant 

to be based on the same CatalogDBObject and write them out in parallel 

from a single database query. The imagined use-case is simultaneously 

writing out a PhoSim InstanceCatalog as well as the truth catalog with 

the pre-calculated positions and magnitudes of the sources. 

 

Parameters 

---------- 

catalog_dict is a dict keyed on the names of the files to be written. 

The values are the InstanceCatalogs to be written. These are full 

instantiations of InstanceCatalogs, not just InstanceCatalog classes 

as with the CompoundInstanceCatalog. They cannot be CompoundInstanceCatalogs 

 

constraint is an optional SQL constraint to be applied to the database query. 

Note: constraints applied to individual catalogs will be ignored. 

 

chunk_size is an int which optionally specifies the number of rows to be 

returned from db_obj at a time 

 

write_mode is either 'w' (write) or 'a' (append), determining whether or 

not the writer will overwrite existing catalog files (assuming they exist) 

 

write_header is a boolean that controls whether or not to write the header 

in the catalogs. 

 

Output 

------ 

This method does not return anything, it just writes the files that are the 

keys of catalog_dict 

""" 

 

list_of_file_names = list(catalog_dict.keys()) 

ref_cat = catalog_dict[list_of_file_names[0]] 

for ix, file_name in enumerate(list_of_file_names): 

if ix>0: 

cat = catalog_dict[file_name] 

try: 

assert cat.obs_metadata == ref_cat.obs_metadata 

except: 

print(cat.obs_metadata) 

print(ref_cat.obs_metadata) 

raise RuntimeError('Catalogs passed to parallelCatalogWriter have different ' 

'ObservationMetaData. I do not know how to deal with that.') 

 

try: 

assert cat.db_obj.connection == ref_cat.db_obj.connection 

except: 

msg = ('Cannot build these catalogs in parallel. ' 

'The two databases are different. Connection info is:\n' 

'database: %s vs. %s\n' % (cat.db_obj.connection.database, ref_cat.db_obj.database) 

+ 'host: %s vs. %s\n' % (cat.db_obj.connection.host, ref_cat.db_obj.connection.host) 

+ 'port: %s vs. %s\n' % (cat.db_obj.connection.port, ref_cat.db_obj.connection.port) 

+ 'driver: %s vs. %s\n' % (cat.db_obj.connection.driver, ref_cat.db_obj.connection.driver) 

+ 'table: %s vs. %s\n' % (cat.db_obj.tableid, ref_cat.db_obj.tableid) 

+ 'objid: %s vs. %s\n' % (cat.db_obj.objid, ref_cat.db_obj.objid)) 

 

raise RuntimeError(msg) 

 

for file_name in list_of_file_names: 

cat = catalog_dict[file_name] 

cat._write_pre_process() 

 

active_columns = None 

for file_name in catalog_dict: 

cat = catalog_dict[file_name] 

if active_columns is None: 

active_columns = copy.deepcopy(cat._active_columns) 

else: 

for col_name in cat._active_columns: 

if col_name not in active_columns: 

active_columns.append(col_name) 

 

query_result = ref_cat.db_obj.query_columns(colnames=active_columns, 

obs_metadata=ref_cat.obs_metadata, 

constraint=constraint, 

chunk_size=chunk_size) 

local_write_mode = write_mode 

if write_header: 

for file_name in catalog_dict: 

with open(file_name, local_write_mode) as file_handle: 

catalog_dict[file_name].write_header(file_handle) 

local_write_mode = 'a' 

 

for master_chunk in query_result: 

 

for i_file, file_name in enumerate(list_of_file_names): 

chunk = master_chunk 

cat = catalog_dict[file_name] 

good_dexes = cat._filter_chunk(chunk) 

if len(good_dexes) < len(chunk): 

chunk = chunk[good_dexes] 

 

with open(file_name, local_write_mode) as file_handle: 

catalog_dict[file_name]._write_current_chunk(file_handle) 

 

local_write_mode = 'a'