lsst.meas.algorithms  15.0-15-g2f3508ec+1
readTextCatalogTask.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 #
4 # Copyright 2008-2017 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 
24 __all__ = ["ReadTextCatalogConfig", "ReadTextCatalogTask"]
25 
26 import numpy as np
27 
28 import lsst.pex.config as pexConfig
29 import lsst.pipe.base as pipeBase
30 
31 
32 class ReadTextCatalogConfig(pexConfig.Config):
33  header_lines = pexConfig.Field(
34  dtype=int,
35  default=0,
36  doc='Number of lines to skip when reading the text reference file.'
37  )
38  colnames = pexConfig.ListField(
39  dtype=str,
40  default=[],
41  doc="An ordered list of column names to use in ingesting the catalog. "
42  "With an empty list, column names will be discovered from the first line "
43  "after the skipped header lines."
44  )
45  delimiter = pexConfig.Field(
46  dtype=str,
47  default=',',
48  doc='Delimiter to use when reading text reference files. Comma is default.'
49  )
50 
51 
57 
58 
59 class ReadTextCatalogTask(pipeBase.Task):
60  """!Read an object catalog from a text file
61 
62  @anchor ReadTextCatalogTask_
63 
64  @section meas_algorithms_readTextCatalog_Contents Contents
65 
66  - @ref meas_algorithms_readTextCatalog_Purpose
67  - @ref meas_algorithms_readTextCatalog_Initialize
68  - @ref meas_algorithms_readTextCatalog_Config
69  - @ref meas_algorithms_readTextCatalog_Example
70 
71  @section meas_algorithms_readTextCatalog_Purpose Description
72 
73  Read an object catalog from a text file. Designed to read foreign catalogs
74  so they can be written out in a form suitable for IngestIndexedReferenceTask.
75 
76  @section meas_algorithms_readTextCatalog_Initialize Task initialisation
77 
78  @copydoc \_\_init\_\_
79 
80  @section meas_algorithms_readTextCatalog_Config Configuration parameters
81 
82  See @ref ReadTextCatalogConfig
83 
84  @section meas_algorithms_readTextCatalog_Example A complete example of using ReadTextCatalogTask
85 
86  Given a file named `table.csv` containing the following:
87 
88  ra dec flux
89  5.5, -45.2, 12453
90  19.6, 34.2, 32123
91 
92  you can read this file with the following code:
93 
94  from lsst.meas.algorithms.readTextCatalogTask import ReadTextCatalogTask
95  task = ReadTextCatalogTask()
96  catalogArray = task.run("table.csv")
97 
98  The resulting `catalogArray` is a numpy structured array containing three fields
99  ("ra", "dec" and "flux") and two rows of data. For more complex cases,
100  config parameters allow you to specify the names of the columns (instead of using automatic discovery)
101  and set the number of rows to skip.
102  """
103  _DefaultName = 'readCatalog'
104  ConfigClass = ReadTextCatalogConfig
105 
106  def run(self, filename):
107  """Read an object catalog from the specified text file
108 
109  @param[in] filename path to text file
110  @return a numpy structured array containing the specified columns
111  """
112  names = True
113  if self.config.colnames:
114  names = self.config.colnames
115  arr = np.genfromtxt(filename, dtype=None, skip_header=self.config.header_lines,
116  delimiter=self.config.delimiter,
117  names=names)
118  # This is to explicitly convert the bytes type into unicode for any column that is read in as bytes
119  # string
120  newDtype = []
121  for name in arr.dtype.names:
122  value = arr.dtype[name]
123  if value.kind == 'S':
124  value = np.dtype('|U{}'.format(value.itemsize))
125  newDtype.append((name, value))
126  arr = arr.astype(newDtype)
127 
128  # Just in case someone has only one line in the file.
129  return np.atleast_1d(arr)