lsst.pipe.tasks  13.0-66-gfbf2f2ce+5
ingestCalibs.py
Go to the documentation of this file.
1 from __future__ import absolute_import, division, print_function
2 from builtins import zip
3 import collections
4 import datetime
5 import itertools
6 import sqlite3
7 from glob import glob
8 import lsst.afw.image as afwImage
9 from lsst.pex.config import Config, Field, ListField, ConfigurableField
10 from lsst.pipe.base import InputOnlyArgumentParser
11 from lsst.pipe.tasks.ingest import RegisterTask, ParseTask, RegisterConfig, IngestTask
12 
13 
14 def _convertToDate(dateString):
15  """Convert a string into a date object"""
16  return datetime.datetime.strptime(dateString, "%Y-%m-%d").date()
17 
18 
20  """Task that will parse the filename and/or its contents to get the
21  required information to populate the calibration registry."""
22 
23  def getCalibType(self, filename):
24  """Return a a known calibration dataset type using
25  the observation type in the header keyword OBSTYPE
26 
27  @param filename: Input filename
28  """
29  md = afwImage.readMetadata(filename, self.config.hdu)
30  if not md.exists("OBSTYPE"):
31  raise RuntimeError("Unable to find the required header keyword OBSTYPE in %s, hdu %d" %
32  (filename, self.config.hdu))
33  obstype = md.get("OBSTYPE").strip().lower()
34  if "flat" in obstype:
35  obstype = "flat"
36  elif "zero" in obstype or "bias" in obstype:
37  obstype = "bias"
38  elif "dark" in obstype:
39  obstype = "dark"
40  elif "fringe" in obstype:
41  obstype = "fringe"
42  return obstype
43 
44  def getDestination(self, butler, info, filename):
45  """Get destination for the file
46 
47  @param butler Data butler
48  @param info File properties, used as dataId for the butler
49  @param filename Input filename
50  @return Destination filename
51  """
52  # 'tempinfo' was added as part of DM-5466 to strip Nones from info.
53  # The Butler should handle this behind-the-scenes in the future.
54  # Please reference DM-9873 and delete this comment once it is resolved.
55  tempinfo = {k:v for (k, v) in info.items() if v is not None}
56  calibType = self.getCalibType(filename)
57  raw = butler.get(calibType + "_filename", tempinfo)[0]
58  # Ensure filename is devoid of cfitsio directions about HDUs
59  c = raw.find("[")
60  if c > 0:
61  raw = raw[:c]
62  return raw
63 
64 
66  """Configuration for the CalibsRegisterTask"""
67  tables = ListField(dtype=str, default=["bias", "dark", "flat", "fringe"], doc="Names of tables")
68  calibDate = Field(dtype=str, default="calibDate", doc="Name of column for calibration date")
69  validStart = Field(dtype=str, default="validStart", doc="Name of column for validity start")
70  validEnd = Field(dtype=str, default="validEnd", doc="Name of column for validity stop")
71  detector = ListField(dtype=str, default=["filter", "ccd"],
72  doc="Columns that identify individual detectors")
73  validityUntilSuperseded = ListField(dtype=str, default=["defect"],
74  doc="Tables for which to set validity for a calib from when it is "
75  "taken until it is superseded by the next; validity in other tables "
76  "is calculated by applying the validity range.")
77 
78 
80  """Task that will generate the calibration registry for the Mapper"""
81  ConfigClass = CalibsRegisterConfig
82 
83  def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3"):
84  """Open the registry and return the connection handle"""
85  return RegisterTask.openRegistry(self, directory, create, dryrun, name)
86 
87  def createTable(self, conn):
88  """Create the registry tables"""
89  for table in self.config.tables:
90  RegisterTask.createTable(self, conn, table=table)
91 
92  def addRow(self, conn, info, *args, **kwargs):
93  """Add a row to the file table"""
94  info[self.config.validStart] = None
95  info[self.config.validEnd] = None
96  RegisterTask.addRow(self, conn, info, *args, **kwargs)
97 
98  def updateValidityRanges(self, conn, validity):
99  """Loop over all tables, filters, and ccdnums,
100  and update the validity ranges in the registry.
101 
102  @param conn: Database connection
103  @param validity: Validity range (days)
104  """
105  conn.row_factory = sqlite3.Row
106  cursor = conn.cursor()
107  for table in self.config.tables:
108  sql = "SELECT DISTINCT %s FROM %s" % (", ".join(self.config.detector), table)
109  cursor.execute(sql)
110  rows = cursor.fetchall()
111  for row in rows:
112  self.fixSubsetValidity(conn, table, row, validity)
113 
114  def fixSubsetValidity(self, conn, table, detectorData, validity):
115  """Update the validity ranges among selected rows in the registry.
116 
117  For defects, the products are valid from their start date until
118  they are superseded by subsequent defect data.
119  For other calibration products, the validity ranges are checked and
120  if there are overlaps, a midpoint is used to fix the overlaps,
121  so that the calibration data with whose date is nearest the date
122  of the observation is used.
123 
124  @param conn: Database connection
125  @param table: Name of table to be selected
126  @param detectorData: Values identifying a detector (from columns in self.config.detector)
127  @param validity: Validity range (days)
128  """
129  columns = ", ".join([self.config.calibDate, self.config.validStart, self.config.validEnd])
130  sql = "SELECT id, %s FROM %s" % (columns, table)
131  sql += " WHERE " + " AND ".join(col + "=?" for col in self.config.detector)
132  sql += " ORDER BY " + self.config.calibDate
133  cursor = conn.cursor()
134  cursor.execute(sql, detectorData)
135  rows = cursor.fetchall()
136 
137  try:
138  valids = collections.OrderedDict([(_convertToDate(row[self.config.calibDate]), [None, None]) for
139  row in rows])
140  except Exception as e:
141  det = " ".join("%s=%s" % (k, v) for k, v in zip(self.config.detector, detectorData))
142  # Sqlite returns unicode strings, which cannot be passed through SWIG.
143  self.log.warn(str("Skipped setting the validity overlaps for %s %s: missing calibration dates" %
144  (table, det)))
145  return
146  dates = list(valids.keys())
147  if table in self.config.validityUntilSuperseded:
148  # A calib is valid until it is superseded
149  for thisDate, nextDate in zip(dates[:-1], dates[1:]):
150  valids[thisDate][0] = thisDate
151  valids[thisDate][1] = nextDate - datetime.timedelta(1)
152  valids[dates[-1]][0] = dates[-1]
153  valids[dates[-1]][1] = _convertToDate("2037-12-31") # End of UNIX time
154  else:
155  # A calib is valid within the validity range (in days) specified.
156  for dd in dates:
157  valids[dd] = [dd - datetime.timedelta(validity), dd + datetime.timedelta(validity)]
158  # Fix the dates so that they do not overlap, which can cause the butler to find a
159  # non-unique calib.
160  midpoints = [t1 + (t2 - t1)//2 for t1, t2 in zip(dates[:-1], dates[1:])]
161  for i, (date, midpoint) in enumerate(zip(dates[:-1], midpoints)):
162  if valids[date][1] > midpoint:
163  nextDate = dates[i + 1]
164  valids[nextDate][0] = midpoint + datetime.timedelta(1)
165  valids[date][1] = midpoint
166  del midpoints
167  del dates
168  # Update the validity data in the registry
169  for row in rows:
170  calibDate = _convertToDate(row[self.config.calibDate])
171  validStart = valids[calibDate][0].isoformat()
172  validEnd = valids[calibDate][1].isoformat()
173  sql = "UPDATE %s" % table
174  sql += " SET %s=?, %s=?" % (self.config.validStart, self.config.validEnd)
175  sql += " WHERE id=?"
176  conn.execute(sql, (validStart, validEnd, row["id"]))
177 
178 
179 class IngestCalibsArgumentParser(InputOnlyArgumentParser):
180  """Argument parser to support ingesting calibration images into the repository"""
181 
182  def __init__(self, *args, **kwargs):
183  InputOnlyArgumentParser.__init__(self, *args, **kwargs)
184  self.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
185  default=False, help="Don't perform any action?")
186  self.add_argument("--mode", choices=["move", "copy", "link", "skip"], default="move",
187  help="Mode of delivering the files to their destination")
188  self.add_argument("--create", action="store_true", help="Create new registry?")
189  self.add_argument("--validity", type=int, required=True, help="Calibration validity period (days)")
190  self.add_argument("--calibType", type=str, default=None,
191  choices=[None, "bias", "dark", "flat", "fringe", "defect"],
192  help="Type of the calibration data to be ingested;" +
193  " if omitted, the type is determined from" +
194  " the file header information")
195  self.add_argument("--ignore-ingested", dest="ignoreIngested", action="store_true",
196  help="Don't register files that have already been registered")
197  self.add_argument("files", nargs="+", help="Names of file")
198 
199 
200 class IngestCalibsConfig(Config):
201  """Configuration for IngestCalibsTask"""
202  parse = ConfigurableField(target=CalibsParseTask, doc="File parsing")
203  register = ConfigurableField(target=CalibsRegisterTask, doc="Registry entry")
204  allowError = Field(dtype=bool, default=False, doc="Allow error in ingestion?")
205  clobber = Field(dtype=bool, default=False, doc="Clobber existing file?")
206 
207 
209  """Task that generates registry for calibration images"""
210  ConfigClass = IngestCalibsConfig
211  ArgumentParser = IngestCalibsArgumentParser
212  _DefaultName = "ingestCalibs"
213 
214  def run(self, args):
215  """Ingest all specified files and add them to the registry"""
216  calibRoot = args.calib if args.calib is not None else args.output
217  filenameList = self.expandFiles(args.files)
218  with self.register.openRegistry(calibRoot, create=args.create, dryrun=args.dryrun) as registry:
219  for infile in filenameList:
220  fileInfo, hduInfoList = self.parse.getInfo(infile)
221  if args.calibType is None:
222  calibType = self.parse.getCalibType(infile)
223  else:
224  calibType = args.calibType
225  if calibType not in self.register.config.tables:
226  self.log.warn(str("Skipped adding %s of observation type '%s' to registry "
227  "(must be one of %s)" %
228  (infile, calibType, ", ".join(self.register.config.tables))))
229  continue
230  if args.mode != 'skip':
231  outfile = self.parse.getDestination(args.butler, fileInfo, infile)
232  ingested = self.ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
233  if not ingested:
234  self.log.warn(str("Failed to ingest %s of observation type '%s'" %
235  (infile, calibType)))
236  continue
237  if self.register.check(registry, fileInfo, table=calibType):
238  if args.ignoreIngested:
239  continue
240 
241  self.log.warn("%s: already ingested: %s" % (infile, fileInfo))
242  for info in hduInfoList:
243  self.register.addRow(registry, info, dryrun=args.dryrun,
244  create=args.create, table=calibType)
245  if not args.dryrun:
246  self.register.updateValidityRanges(registry, args.validity)
247  else:
248  self.log.info("Would update validity ranges here, but dryrun")
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:401
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
Definition: ingest.py:473
def addRow(self, conn, info, args, kwargs)
Definition: ingestCalibs.py:92
def getDestination(self, butler, info, filename)
Definition: ingestCalibs.py:44
def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3")
Definition: ingestCalibs.py:83
def fixSubsetValidity(self, conn, table, detectorData, validity)