lsst.pipe.tasks  14.0-34-g85a33b94+4
ingestCalibs.py
Go to the documentation of this file.
1 from __future__ import absolute_import, division, print_function
2 from builtins import zip
3 import collections
4 import datetime
5 import itertools
6 import sqlite3
7 from glob import glob
8 import lsst.afw.image as afwImage
9 from lsst.pex.config import Config, Field, ListField, ConfigurableField
10 from lsst.pipe.base import InputOnlyArgumentParser
11 from lsst.pipe.tasks.ingest import RegisterTask, ParseTask, RegisterConfig, IngestTask
12 
13 
14 def _convertToDate(dateString):
15  """Convert a string into a date object"""
16  return datetime.datetime.strptime(dateString, "%Y-%m-%d").date()
17 
18 
20  """Task that will parse the filename and/or its contents to get the
21  required information to populate the calibration registry."""
22 
23  def getCalibType(self, filename):
24  """Return a a known calibration dataset type using
25  the observation type in the header keyword OBSTYPE
26 
27  @param filename: Input filename
28  """
29  md = afwImage.readMetadata(filename, self.config.hdu)
30  if not md.exists("OBSTYPE"):
31  raise RuntimeError("Unable to find the required header keyword OBSTYPE in %s, hdu %d" %
32  (filename, self.config.hdu))
33  obstype = md.get("OBSTYPE").strip().lower()
34  if "flat" in obstype:
35  obstype = "flat"
36  elif "zero" in obstype or "bias" in obstype:
37  obstype = "bias"
38  elif "dark" in obstype:
39  obstype = "dark"
40  elif "fringe" in obstype:
41  obstype = "fringe"
42  elif "sky" in obstype:
43  obstype = "sky"
44  return obstype
45 
46  def getDestination(self, butler, info, filename):
47  """Get destination for the file
48 
49  @param butler Data butler
50  @param info File properties, used as dataId for the butler
51  @param filename Input filename
52  @return Destination filename
53  """
54  # 'tempinfo' was added as part of DM-5466 to strip Nones from info.
55  # The Butler should handle this behind-the-scenes in the future.
56  # Please reference DM-9873 and delete this comment once it is resolved.
57  tempinfo = {k:v for (k, v) in info.items() if v is not None}
58  calibType = self.getCalibType(filename)
59  raw = butler.get(calibType + "_filename", tempinfo)[0]
60  # Ensure filename is devoid of cfitsio directions about HDUs
61  c = raw.find("[")
62  if c > 0:
63  raw = raw[:c]
64  return raw
65 
66 
68  """Configuration for the CalibsRegisterTask"""
69  tables = ListField(dtype=str, default=["bias", "dark", "flat", "fringe", "sky"], doc="Names of tables")
70  calibDate = Field(dtype=str, default="calibDate", doc="Name of column for calibration date")
71  validStart = Field(dtype=str, default="validStart", doc="Name of column for validity start")
72  validEnd = Field(dtype=str, default="validEnd", doc="Name of column for validity stop")
73  detector = ListField(dtype=str, default=["filter", "ccd"],
74  doc="Columns that identify individual detectors")
75  validityUntilSuperseded = ListField(dtype=str, default=["defect"],
76  doc="Tables for which to set validity for a calib from when it is "
77  "taken until it is superseded by the next; validity in other tables "
78  "is calculated by applying the validity range.")
79 
80 
82  """Task that will generate the calibration registry for the Mapper"""
83  ConfigClass = CalibsRegisterConfig
84 
85  def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3"):
86  """Open the registry and return the connection handle"""
87  return RegisterTask.openRegistry(self, directory, create, dryrun, name)
88 
89  def createTable(self, conn):
90  """Create the registry tables"""
91  for table in self.config.tables:
92  RegisterTask.createTable(self, conn, table=table)
93 
94  def addRow(self, conn, info, *args, **kwargs):
95  """Add a row to the file table"""
96  info[self.config.validStart] = None
97  info[self.config.validEnd] = None
98  RegisterTask.addRow(self, conn, info, *args, **kwargs)
99 
100  def updateValidityRanges(self, conn, validity):
101  """Loop over all tables, filters, and ccdnums,
102  and update the validity ranges in the registry.
103 
104  @param conn: Database connection
105  @param validity: Validity range (days)
106  """
107  conn.row_factory = sqlite3.Row
108  cursor = conn.cursor()
109  for table in self.config.tables:
110  sql = "SELECT DISTINCT %s FROM %s" % (", ".join(self.config.detector), table)
111  cursor.execute(sql)
112  rows = cursor.fetchall()
113  for row in rows:
114  self.fixSubsetValidity(conn, table, row, validity)
115 
116  def fixSubsetValidity(self, conn, table, detectorData, validity):
117  """Update the validity ranges among selected rows in the registry.
118 
119  For defects, the products are valid from their start date until
120  they are superseded by subsequent defect data.
121  For other calibration products, the validity ranges are checked and
122  if there are overlaps, a midpoint is used to fix the overlaps,
123  so that the calibration data with whose date is nearest the date
124  of the observation is used.
125 
126  @param conn: Database connection
127  @param table: Name of table to be selected
128  @param detectorData: Values identifying a detector (from columns in self.config.detector)
129  @param validity: Validity range (days)
130  """
131  columns = ", ".join([self.config.calibDate, self.config.validStart, self.config.validEnd])
132  sql = "SELECT id, %s FROM %s" % (columns, table)
133  sql += " WHERE " + " AND ".join(col + "=?" for col in self.config.detector)
134  sql += " ORDER BY " + self.config.calibDate
135  cursor = conn.cursor()
136  cursor.execute(sql, detectorData)
137  rows = cursor.fetchall()
138 
139  try:
140  valids = collections.OrderedDict([(_convertToDate(row[self.config.calibDate]), [None, None]) for
141  row in rows])
142  except Exception as e:
143  det = " ".join("%s=%s" % (k, v) for k, v in zip(self.config.detector, detectorData))
144  # Sqlite returns unicode strings, which cannot be passed through SWIG.
145  self.log.warn(str("Skipped setting the validity overlaps for %s %s: missing calibration dates" %
146  (table, det)))
147  return
148  dates = list(valids.keys())
149  if table in self.config.validityUntilSuperseded:
150  # A calib is valid until it is superseded
151  for thisDate, nextDate in zip(dates[:-1], dates[1:]):
152  valids[thisDate][0] = thisDate
153  valids[thisDate][1] = nextDate - datetime.timedelta(1)
154  valids[dates[-1]][0] = dates[-1]
155  valids[dates[-1]][1] = _convertToDate("2037-12-31") # End of UNIX time
156  else:
157  # A calib is valid within the validity range (in days) specified.
158  for dd in dates:
159  valids[dd] = [dd - datetime.timedelta(validity), dd + datetime.timedelta(validity)]
160  # Fix the dates so that they do not overlap, which can cause the butler to find a
161  # non-unique calib.
162  midpoints = [t1 + (t2 - t1)//2 for t1, t2 in zip(dates[:-1], dates[1:])]
163  for i, (date, midpoint) in enumerate(zip(dates[:-1], midpoints)):
164  if valids[date][1] > midpoint:
165  nextDate = dates[i + 1]
166  valids[nextDate][0] = midpoint + datetime.timedelta(1)
167  valids[date][1] = midpoint
168  del midpoints
169  del dates
170  # Update the validity data in the registry
171  for row in rows:
172  calibDate = _convertToDate(row[self.config.calibDate])
173  validStart = valids[calibDate][0].isoformat()
174  validEnd = valids[calibDate][1].isoformat()
175  sql = "UPDATE %s" % table
176  sql += " SET %s=?, %s=?" % (self.config.validStart, self.config.validEnd)
177  sql += " WHERE id=?"
178  conn.execute(sql, (validStart, validEnd, row["id"]))
179 
180 
181 class IngestCalibsArgumentParser(InputOnlyArgumentParser):
182  """Argument parser to support ingesting calibration images into the repository"""
183 
184  def __init__(self, *args, **kwargs):
185  InputOnlyArgumentParser.__init__(self, *args, **kwargs)
186  self.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
187  default=False, help="Don't perform any action?")
188  self.add_argument("--mode", choices=["move", "copy", "link", "skip"], default="move",
189  help="Mode of delivering the files to their destination")
190  self.add_argument("--create", action="store_true", help="Create new registry?")
191  self.add_argument("--validity", type=int, required=True, help="Calibration validity period (days)")
192  self.add_argument("--calibType", type=str, default=None,
193  choices=[None, "bias", "dark", "flat", "fringe", "sky", "defect"],
194  help="Type of the calibration data to be ingested;" +
195  " if omitted, the type is determined from" +
196  " the file header information")
197  self.add_argument("--ignore-ingested", dest="ignoreIngested", action="store_true",
198  help="Don't register files that have already been registered")
199  self.add_argument("files", nargs="+", help="Names of file")
200 
201 
202 class IngestCalibsConfig(Config):
203  """Configuration for IngestCalibsTask"""
204  parse = ConfigurableField(target=CalibsParseTask, doc="File parsing")
205  register = ConfigurableField(target=CalibsRegisterTask, doc="Registry entry")
206  allowError = Field(dtype=bool, default=False, doc="Allow error in ingestion?")
207  clobber = Field(dtype=bool, default=False, doc="Clobber existing file?")
208 
209 
211  """Task that generates registry for calibration images"""
212  ConfigClass = IngestCalibsConfig
213  ArgumentParser = IngestCalibsArgumentParser
214  _DefaultName = "ingestCalibs"
215 
216  def run(self, args):
217  """Ingest all specified files and add them to the registry"""
218  calibRoot = args.calib if args.calib is not None else args.output
219  filenameList = self.expandFiles(args.files)
220  with self.register.openRegistry(calibRoot, create=args.create, dryrun=args.dryrun) as registry:
221  for infile in filenameList:
222  fileInfo, hduInfoList = self.parse.getInfo(infile)
223  if args.calibType is None:
224  calibType = self.parse.getCalibType(infile)
225  else:
226  calibType = args.calibType
227  if calibType not in self.register.config.tables:
228  self.log.warn(str("Skipped adding %s of observation type '%s' to registry "
229  "(must be one of %s)" %
230  (infile, calibType, ", ".join(self.register.config.tables))))
231  continue
232  if args.mode != 'skip':
233  outfile = self.parse.getDestination(args.butler, fileInfo, infile)
234  ingested = self.ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
235  if not ingested:
236  self.log.warn(str("Failed to ingest %s of observation type '%s'" %
237  (infile, calibType)))
238  continue
239  if self.register.check(registry, fileInfo, table=calibType):
240  if args.ignoreIngested:
241  continue
242 
243  self.log.warn("%s: already ingested: %s" % (infile, fileInfo))
244  for info in hduInfoList:
245  self.register.addRow(registry, info, dryrun=args.dryrun,
246  create=args.create, table=calibType)
247  if not args.dryrun:
248  self.register.updateValidityRanges(registry, args.validity)
249  else:
250  self.log.info("Would update validity ranges here, but dryrun")
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:404
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
Definition: ingest.py:476
def addRow(self, conn, info, args, kwargs)
Definition: ingestCalibs.py:94
def getDestination(self, butler, info, filename)
Definition: ingestCalibs.py:46
def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3")
Definition: ingestCalibs.py:85
def fixSubsetValidity(self, conn, table, detectorData, validity)