lsst.pipe.tasks  19.0.0-7-gf796fef9+6
ingestCalibs.py
Go to the documentation of this file.
1 import collections
2 import datetime
3 import sqlite3
4 from dateutil import parser
5 
6 from lsst.afw.fits import readMetadata
7 from lsst.pex.config import Config, Field, ListField, ConfigurableField
8 from lsst.pipe.base import InputOnlyArgumentParser
9 from lsst.pipe.tasks.ingest import RegisterTask, ParseTask, RegisterConfig, IngestTask
10 
11 
12 def _convertToDate(dateString):
13  """Convert a string into a date object"""
14  return parser.parse(dateString).date()
15 
16 
18  """Task that will parse the filename and/or its contents to get the
19  required information to populate the calibration registry."""
20 
21  def getCalibType(self, filename):
22  """Return a a known calibration dataset type using
23  the observation type in the header keyword OBSTYPE
24 
25  @param filename: Input filename
26  """
27  md = readMetadata(filename, self.config.hdu)
28  if not md.exists("OBSTYPE"):
29  raise RuntimeError("Unable to find the required header keyword OBSTYPE in %s, hdu %d" %
30  (filename, self.config.hdu))
31  obstype = md.getScalar("OBSTYPE").strip().lower()
32  if "flat" in obstype:
33  obstype = "flat"
34  elif "zero" in obstype or "bias" in obstype:
35  obstype = "bias"
36  elif "dark" in obstype:
37  obstype = "dark"
38  elif "fringe" in obstype:
39  obstype = "fringe"
40  elif "sky" in obstype:
41  obstype = "sky"
42  elif "illumcor" in obstype:
43  obstype = "illumcor"
44  elif "defects" in obstype:
45  obstype = "defects"
46  elif "qe_curve" in obstype:
47  obstype = "qe_curve"
48  return obstype
49 
50  def getDestination(self, butler, info, filename):
51  """Get destination for the file
52 
53  @param butler Data butler
54  @param info File properties, used as dataId for the butler
55  @param filename Input filename
56  @return Destination filename
57  """
58  # 'tempinfo' was added as part of DM-5466 to strip Nones from info.
59  # The Butler should handle this behind-the-scenes in the future.
60  # Please reference DM-9873 and delete this comment once it is resolved.
61  tempinfo = {k: v for (k, v) in info.items() if v is not None}
62  calibType = self.getCalibType(filename)
63  raw = butler.get(calibType + "_filename", tempinfo)[0]
64  # Ensure filename is devoid of cfitsio directions about HDUs
65  c = raw.find("[")
66  if c > 0:
67  raw = raw[:c]
68  return raw
69 
70 
72  """Configuration for the CalibsRegisterTask"""
73  tables = ListField(dtype=str, default=["bias", "dark", "flat", "fringe", "sky", "defects", "qe_curve"],
74  doc="Names of tables")
75  calibDate = Field(dtype=str, default="calibDate", doc="Name of column for calibration date")
76  validStart = Field(dtype=str, default="validStart", doc="Name of column for validity start")
77  validEnd = Field(dtype=str, default="validEnd", doc="Name of column for validity stop")
78  detector = ListField(dtype=str, default=["filter", "ccd"],
79  doc="Columns that identify individual detectors")
80  validityUntilSuperseded = ListField(dtype=str, default=["defects", "qe_curve"],
81  doc="Tables for which to set validity for a calib from when it is "
82  "taken until it is superseded by the next; validity in other tables "
83  "is calculated by applying the validity range.")
84 
85 
87  """Task that will generate the calibration registry for the Mapper"""
88  ConfigClass = CalibsRegisterConfig
89 
90  def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3"):
91  """Open the registry and return the connection handle"""
92  return RegisterTask.openRegistry(self, directory, create, dryrun, name)
93 
94  def createTable(self, conn, forceCreateTables=False):
95  """Create the registry tables"""
96  for table in self.config.tables:
97  RegisterTask.createTable(self, conn, table=table, forceCreateTables=forceCreateTables)
98 
99  def addRow(self, conn, info, *args, **kwargs):
100  """Add a row to the file table"""
101  info[self.config.validStart] = None
102  info[self.config.validEnd] = None
103  RegisterTask.addRow(self, conn, info, *args, **kwargs)
104 
105  def updateValidityRanges(self, conn, validity, tables=None):
106  """Loop over all tables, filters, and ccdnums,
107  and update the validity ranges in the registry.
108 
109  @param conn: Database connection
110  @param validity: Validity range (days)
111  """
112  conn.row_factory = sqlite3.Row
113  cursor = conn.cursor()
114  if tables is None:
115  tables = self.config.tables
116  for table in tables:
117  sql = "SELECT DISTINCT %s FROM %s" % (", ".join(self.config.detector), table)
118  cursor.execute(sql)
119  rows = cursor.fetchall()
120  for row in rows:
121  self.fixSubsetValidity(conn, table, row, validity)
122 
123  def fixSubsetValidity(self, conn, table, detectorData, validity):
124  """Update the validity ranges among selected rows in the registry.
125 
126  For defects and qe_curve, the products are valid from their start date until
127  they are superseded by subsequent defect data.
128  For other calibration products, the validity ranges are checked and
129  if there are overlaps, a midpoint is used to fix the overlaps,
130  so that the calibration data with whose date is nearest the date
131  of the observation is used.
132 
133  @param conn: Database connection
134  @param table: Name of table to be selected
135  @param detectorData: Values identifying a detector (from columns in self.config.detector)
136  @param validity: Validity range (days)
137  """
138  columns = ", ".join([self.config.calibDate, self.config.validStart, self.config.validEnd])
139  sql = "SELECT id, %s FROM %s" % (columns, table)
140  sql += " WHERE " + " AND ".join(col + "=?" for col in self.config.detector)
141  sql += " ORDER BY " + self.config.calibDate
142  cursor = conn.cursor()
143  cursor.execute(sql, detectorData)
144  rows = cursor.fetchall()
145 
146  try:
147  valids = collections.OrderedDict([(_convertToDate(row[self.config.calibDate]), [None, None]) for
148  row in rows])
149  except Exception:
150  det = " ".join("%s=%s" % (k, v) for k, v in zip(self.config.detector, detectorData))
151  # Sqlite returns unicode strings, which cannot be passed through SWIG.
152  self.log.warn(str("Skipped setting the validity overlaps for %s %s: missing calibration dates" %
153  (table, det)))
154  return
155  dates = list(valids.keys())
156  if table in self.config.validityUntilSuperseded:
157  # A calib is valid until it is superseded
158  for thisDate, nextDate in zip(dates[:-1], dates[1:]):
159  valids[thisDate][0] = thisDate
160  valids[thisDate][1] = nextDate - datetime.timedelta(1)
161  valids[dates[-1]][0] = dates[-1]
162  valids[dates[-1]][1] = _convertToDate("2037-12-31") # End of UNIX time
163  else:
164  # A calib is valid within the validity range (in days) specified.
165  for dd in dates:
166  valids[dd] = [dd - datetime.timedelta(validity), dd + datetime.timedelta(validity)]
167  # Fix the dates so that they do not overlap, which can cause the butler to find a
168  # non-unique calib.
169  midpoints = [t1 + (t2 - t1)//2 for t1, t2 in zip(dates[:-1], dates[1:])]
170  for i, (date, midpoint) in enumerate(zip(dates[:-1], midpoints)):
171  if valids[date][1] > midpoint:
172  nextDate = dates[i + 1]
173  valids[nextDate][0] = midpoint + datetime.timedelta(1)
174  valids[date][1] = midpoint
175  del midpoints
176  del dates
177  # Update the validity data in the registry
178  for row in rows:
179  calibDate = _convertToDate(row[self.config.calibDate])
180  validStart = valids[calibDate][0].isoformat()
181  validEnd = valids[calibDate][1].isoformat()
182  sql = "UPDATE %s" % table
183  sql += " SET %s=?, %s=?" % (self.config.validStart, self.config.validEnd)
184  sql += " WHERE id=?"
185  conn.execute(sql, (validStart, validEnd, row["id"]))
186 
187 
188 class IngestCalibsArgumentParser(InputOnlyArgumentParser):
189  """Argument parser to support ingesting calibration images into the repository"""
190 
191  def __init__(self, *args, **kwargs):
192  InputOnlyArgumentParser.__init__(self, *args, **kwargs)
193  self.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
194  default=False, help="Don't perform any action?")
195  self.add_argument("--mode", choices=["move", "copy", "link", "skip"], default="move",
196  help="Mode of delivering the files to their destination")
197  self.add_argument("--create", action="store_true", help="Create new registry?")
198  self.add_argument("--validity", type=int, required=True, help="Calibration validity period (days)")
199  self.add_argument("--ignore-ingested", dest="ignoreIngested", action="store_true",
200  help="Don't register files that have already been registered")
201  self.add_argument("files", nargs="+", help="Names of file")
202 
203 
204 class IngestCalibsConfig(Config):
205  """Configuration for IngestCalibsTask"""
206  parse = ConfigurableField(target=CalibsParseTask, doc="File parsing")
207  register = ConfigurableField(target=CalibsRegisterTask, doc="Registry entry")
208  allowError = Field(dtype=bool, default=False, doc="Allow error in ingestion?")
209  clobber = Field(dtype=bool, default=False, doc="Clobber existing file?")
210 
211 
213  """Task that generates registry for calibration images"""
214  ConfigClass = IngestCalibsConfig
215  ArgumentParser = IngestCalibsArgumentParser
216  _DefaultName = "ingestCalibs"
217 
218  def run(self, args):
219  """Ingest all specified files and add them to the registry"""
220  calibRoot = args.calib if args.calib is not None else args.output
221  filenameList = self.expandFiles(args.files)
222  with self.register.openRegistry(calibRoot, create=args.create, dryrun=args.dryrun) as registry:
223  calibTypes = set()
224  for infile in filenameList:
225  fileInfo, hduInfoList = self.parse.getInfo(infile)
226  calibType = self.parse.getCalibType(infile)
227  if calibType not in self.register.config.tables:
228  self.log.warn(str("Skipped adding %s of observation type '%s' to registry "
229  "(must be one of %s)" %
230  (infile, calibType, ", ".join(self.register.config.tables))))
231  continue
232  calibTypes.add(calibType)
233  if args.mode != 'skip':
234  outfile = self.parse.getDestination(args.butler, fileInfo, infile)
235  ingested = self.ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
236  if not ingested:
237  self.log.warn(str("Failed to ingest %s of observation type '%s'" %
238  (infile, calibType)))
239  continue
240  if self.register.check(registry, fileInfo, table=calibType):
241  if args.ignoreIngested:
242  continue
243 
244  self.log.warn("%s: already ingested: %s" % (infile, fileInfo))
245  for info in hduInfoList:
246  self.register.addRow(registry, info, dryrun=args.dryrun,
247  create=args.create, table=calibType)
248  if not args.dryrun:
249  self.register.updateValidityRanges(registry, args.validity, tables=calibTypes)
250  else:
251  self.log.info("Would update validity ranges here, but dryrun")
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:474
def createTable(self, conn, forceCreateTables=False)
Definition: ingestCalibs.py:94
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
Definition: ingest.py:546
def addRow(self, conn, info, args, kwargs)
Definition: ingestCalibs.py:99
def getDestination(self, butler, info, filename)
Definition: ingestCalibs.py:50
def updateValidityRanges(self, conn, validity, tables=None)
def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3")
Definition: ingestCalibs.py:90
def fixSubsetValidity(self, conn, table, detectorData, validity)