lsst.meas.algorithms  13.0-24-g22030a45+4
readTextCatalogTask.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 #
4 # Copyright 2008-2017 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 from __future__ import absolute_import, division, print_function
24 
25 __all__ = ["ReadTextCatalogConfig", "ReadTextCatalogTask"]
26 
27 import numpy as np
28 
29 import lsst.pex.config as pexConfig
30 import lsst.pipe.base as pipeBase
31 
32 
33 class ReadTextCatalogConfig(pexConfig.Config):
34  header_lines = pexConfig.Field(
35  dtype=int,
36  default=0,
37  doc='Number of lines to skip when reading the text reference file.'
38  )
39  colnames = pexConfig.ListField(
40  dtype=str,
41  default=[],
42  doc="An ordered list of column names to use in ingesting the catalog. "
43  "With an empty list, column names will be discovered from the first line "
44  "after the skipped header lines."
45  )
46  delimiter = pexConfig.Field(
47  dtype=str,
48  default=',',
49  doc='Delimiter to use when reading text reference files. Comma is default.'
50  )
51 
52 
58 
59 
60 class ReadTextCatalogTask(pipeBase.Task):
61  """!Read an object catalog from a text file
62 
63  @anchor ReadTextCatalogTask_
64 
65  @section meas_algorithms_readTextCatalog_Contents Contents
66 
67  - @ref meas_algorithms_readTextCatalog_Purpose
68  - @ref meas_algorithms_readTextCatalog_Initialize
69  - @ref meas_algorithms_readTextCatalog_Config
70  - @ref meas_algorithms_readTextCatalog_Example
71 
72  @section meas_algorithms_readTextCatalog_Purpose Description
73 
74  Read an object catalog from a text file. Designed to read foreign catalogs
75  so they can be written out in a form suitable for IngestIndexedReferenceTask.
76 
77  @section meas_algorithms_readTextCatalog_Initialize Task initialisation
78 
79  @copydoc \_\_init\_\_
80 
81  @section meas_algorithms_readTextCatalog_Config Configuration parameters
82 
83  See @ref ReadTextCatalogConfig
84 
85  @section meas_algorithms_readTextCatalog_Example A complete example of using ReadTextCatalogTask
86 
87  Given a file named `table.csv` containing the following:
88 
89  ra dec flux
90  5.5, -45.2, 12453
91  19.6, 34.2, 32123
92 
93  you can read this file with the following code:
94 
95  from lsst.meas.algorithms.readTextCatalogTask import ReadTextCatalogTask
96  task = ReadTextCatalogTask()
97  catalogArray = task.run("table.csv")
98 
99  The resulting `catalogArray` is a numpy structured array containing three fields
100  ("ra", "dec" and "flux") and two rows of data. For more complex cases,
101  config parameters allow you to specify the names of the columns (instead of using automatic discovery)
102  and set the number of rows to skip.
103  """
104  _DefaultName = 'readCatalog'
105  ConfigClass = ReadTextCatalogConfig
106 
107  def run(self, filename):
108  """Read an object catalog from the specified text file
109 
110  @param[in] filename path to text file
111  @return a numpy structured array containing the specified columns
112  """
113  names = True
114  if self.config.colnames:
115  names = self.config.colnames
116  arr = np.genfromtxt(filename, dtype=None, skip_header=self.config.header_lines,
117  delimiter=self.config.delimiter,
118  names=names)
119  # This is to explicitly convert the bytes type into unicode for any column that is read in as bytes
120  # string
121  newDtype = []
122  for name in arr.dtype.names:
123  value = arr.dtype[name]
124  if value.kind == 'S':
125  value = np.dtype('|U{}'.format(value.itemsize))
126  newDtype.append((name, value))
127  arr = arr.astype(newDtype)
128 
129  # Just in case someone has only one line in the file.
130  return np.atleast_1d(arr)