Coverage for python/lsst/meas/algorithms/readTextCatalogTask.py: 40%

30 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 11:27 +0000

1# 

2# LSST Data Management System 

3# 

4# Copyright 2008-2017 AURA/LSST. 

5# 

6# This product includes software developed by the 

7# LSST Project (http://www.lsst.org/). 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the LSST License Statement and 

20# the GNU General Public License along with this program. If not, 

21# see <https://www.lsstcorp.org/LegalNotices/>. 

22# 

23 

24__all__ = ["ReadTextCatalogConfig", "ReadTextCatalogTask"] 

25 

26import numpy as np 

27from astropy.table import Table 

28 

29import lsst.pex.config as pexConfig 

30import lsst.pipe.base as pipeBase 

31 

32 

33class ReadTextCatalogConfig(pexConfig.Config): 

34 header_lines = pexConfig.Field( 

35 dtype=int, 

36 default=0, 

37 doc='Number of lines to skip when reading the text reference file.' 

38 ) 

39 colnames = pexConfig.ListField( 

40 dtype=str, 

41 default=[], 

42 doc="An ordered list of column names to use in ingesting the catalog. " 

43 "With an empty list, column names will be discovered from the first line " 

44 "after the skipped header lines." 

45 ) 

46 delimiter = pexConfig.Field( 

47 dtype=str, 

48 default=',', 

49 doc='Delimiter to use when reading text reference files. Comma is default.' 

50 ) 

51 format = pexConfig.Field( 

52 dtype=str, 

53 default='csv', 

54 doc=("Format of files to read, from the astropy.table I/O list here:" 

55 "http://docs.astropy.org/en/stable/io/unified.html#built-in-table-readers-writers") 

56 ) 

57 fill_values = pexConfig.ListField( 

58 dtype=str, 

59 default=None, 

60 optional=True, 

61 doc=("A list giving [<match_string>, <fill_value>], which is used to mask" 

62 " the given values in the input file. '0' is suggested for the fill value in order to prevent" 

63 " changing the column datatype. The default behavior is to fill empty data with zeros. See " 

64 "https://docs.astropy.org/en/stable/io/ascii/read.html#bad-or-missing-values for more details." 

65 "Use `replace_missing_floats_with_nan` to change floats to NaN instead of <fill_value>.") 

66 ) 

67 replace_missing_floats_with_nan = pexConfig.Field( 

68 dtype=bool, 

69 default=False, 

70 doc="If True, replace missing data in float columns with NaN instead of zero. If `fill_values` is " 

71 "set, this parameter with replace the floats identified as missing by `fill_values`, and the fill" 

72 " value from `fill_values` will be overridden with NaN for floats." 

73 ) 

74 

75 

76class ReadTextCatalogTask(pipeBase.Task): 

77 """Read an object catalog from a text file 

78 """ 

79 _DefaultName = 'readCatalog' 

80 ConfigClass = ReadTextCatalogConfig 

81 

82 def run(self, filename): 

83 """Read an object catalog from the specified text file 

84 

85 Parameters 

86 ---------- 

87 filename : `string` 

88 Path to specified text file 

89 

90 Returns 

91 ------- 

92 A numpy structured array containing the specified columns 

93 """ 

94 kwargs = {} 

95 if self.config.colnames: 

96 # Wrap in list() to avoid transferring a pex_config proxy object. 

97 kwargs['names'] = list(self.config.colnames) 

98 # if we specify the column names, then we need to just ignore the header lines. 

99 kwargs['data_start'] = self.config.header_lines 

100 else: 

101 # if we don't specify column names, start the header at this line. 

102 kwargs['header_start'] = self.config.header_lines 

103 

104 if self.config.fill_values: 

105 kwargs['fill_values'] = [list(self.config.fill_values)] 

106 

107 table = Table.read(filename, format=self.config.format, 

108 delimiter=self.config.delimiter, 

109 **kwargs) 

110 

111 # convert to a numpy array for backwards compatibility with other readers 

112 arr = np.array(table.as_array()) 

113 

114 if self.config.replace_missing_floats_with_nan: 

115 for column in table.columns: 

116 if (table.dtype[column] == np.float32) or (table.dtype[column] == np.float64): 

117 arr[column][table.mask[column]] = np.nan 

118 

119 return arr