lsst.pipe.tasks  21.0.0-115-g99380953+42c4f6a8e4
ingestCalibs.py
Go to the documentation of this file.
1 import collections
2 import datetime
3 import sqlite3
4 from dateutil import parser
5 
6 from lsst.afw.fits import readMetadata
7 from lsst.pex.config import Config, Field, ListField, ConfigurableField
8 from lsst.pipe.base import InputOnlyArgumentParser
9 from lsst.pipe.tasks.ingest import RegisterTask, ParseTask, RegisterConfig, IngestTask
10 
11 
12 def _convertToDate(dateString):
13  """Convert a string into a date object"""
14  return parser.parse(dateString).date()
15 
16 
18  """Task that will parse the filename and/or its contents to get the
19  required information to populate the calibration registry."""
20 
21  def getCalibType(self, filename):
22  """Return a a known calibration dataset type using
23  the observation type in the header keyword OBSTYPE
24 
25  @param filename: Input filename
26  """
27  md = readMetadata(filename, self.config.hdu)
28  if not md.exists("OBSTYPE"):
29  raise RuntimeError("Unable to find the required header keyword OBSTYPE in %s, hdu %d" %
30  (filename, self.config.hdu))
31  obstype = md.getScalar("OBSTYPE").strip().lower()
32  if "flat" in obstype:
33  obstype = "flat"
34  elif "zero" in obstype or "bias" in obstype:
35  obstype = "bias"
36  elif "dark" in obstype:
37  obstype = "dark"
38  elif "fringe" in obstype:
39  obstype = "fringe"
40  elif "sky" in obstype:
41  obstype = "sky"
42  elif "illumcor" in obstype:
43  obstype = "illumcor"
44  elif "defects" in obstype:
45  obstype = "defects"
46  elif "qe_curve" in obstype:
47  obstype = "qe_curve"
48  elif "linearizer" in obstype:
49  obstype = "linearizer"
50  elif "crosstalk" in obstype:
51  obstype = "crosstalk"
52  elif "BFK" in obstype:
53  obstype = "bfk"
54  return obstype
55 
56  def getDestination(self, butler, info, filename):
57  """Get destination for the file
58 
59  @param butler Data butler
60  @param info File properties, used as dataId for the butler
61  @param filename Input filename
62  @return Destination filename
63  """
64  # 'tempinfo' was added as part of DM-5466 to strip Nones from info.
65  # The Butler should handle this behind-the-scenes in the future.
66  # Please reference DM-9873 and delete this comment once it is resolved.
67  tempinfo = {k: v for (k, v) in info.items() if v is not None}
68  calibType = self.getCalibTypegetCalibType(filename)
69  raw = butler.get(calibType + "_filename", tempinfo)[0]
70  # Ensure filename is devoid of cfitsio directions about HDUs
71  c = raw.find("[")
72  if c > 0:
73  raw = raw[:c]
74  return raw
75 
76 
78  """Configuration for the CalibsRegisterTask"""
79  tables = ListField(dtype=str, default=["bias", "dark", "flat", "fringe", "sky", "defects", "qe_curve",
80  "linearizer", "crosstalk", "bfk"], doc="Names of tables")
81  calibDate = Field(dtype=str, default="calibDate", doc="Name of column for calibration date")
82  validStart = Field(dtype=str, default="validStart", doc="Name of column for validity start")
83  validEnd = Field(dtype=str, default="validEnd", doc="Name of column for validity stop")
84  detector = ListField(dtype=str, default=["filter", "ccd"],
85  doc="Columns that identify individual detectors")
86  validityUntilSuperseded = ListField(dtype=str, default=["defects", "qe_curve", "linearizer", "crosstalk",
87  "bfk"],
88  doc="Tables for which to set validity for a calib from when it is "
89  "taken until it is superseded by the next; validity in other tables "
90  "is calculated by applying the validity range.")
91  incrementValidEnd = Field(
92  dtype=bool,
93  default=True,
94  doc="Fix the off-by-one error by incrementing validEnd. See "
95  "fixSubsetValidity for more details.",
96  )
97 
98 
100  """Task that will generate the calibration registry for the Mapper"""
101  ConfigClass = CalibsRegisterConfig
102 
103  def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3"):
104  """Open the registry and return the connection handle"""
105  return RegisterTask.openRegistry(self, directory, create, dryrun, name)
106 
107  def createTable(self, conn, forceCreateTables=False):
108  """Create the registry tables"""
109  for table in self.config.tables:
110  RegisterTask.createTable(self, conn, table=table, forceCreateTables=forceCreateTables)
111 
112  def addRow(self, conn, info, *args, **kwargs):
113  """Add a row to the file table"""
114  info[self.config.validStart] = None
115  info[self.config.validEnd] = None
116  RegisterTask.addRow(self, conn, info, *args, **kwargs)
117 
118  def updateValidityRanges(self, conn, validity, tables=None):
119  """Loop over all tables, filters, and ccdnums,
120  and update the validity ranges in the registry.
121 
122  @param conn: Database connection
123  @param validity: Validity range (days)
124  """
125  conn.row_factory = sqlite3.Row
126  cursor = conn.cursor()
127  if tables is None:
128  tables = self.config.tables
129  for table in tables:
130  sql = "SELECT DISTINCT %s FROM %s" % (", ".join(self.config.detector), table)
131  cursor.execute(sql)
132  rows = cursor.fetchall()
133  for row in rows:
134  self.fixSubsetValidityfixSubsetValidity(conn, table, row, validity)
135 
136  def fixSubsetValidity(self, conn, table, detectorData, validity):
137  """Update the validity ranges among selected rows in the registry.
138 
139  For defects and qe_curve, the products are valid from their start date until
140  they are superseded by subsequent defect data.
141  For other calibration products, the validity ranges are checked and
142  if there are overlaps, a midpoint is used to fix the overlaps,
143  so that the calibration data with whose date is nearest the date
144  of the observation is used.
145 
146  DM generated calibrations contain a CALIB_ID header
147  keyword. These calibrations likely require the
148  incrementValidEnd configuration option set to True. Other
149  calibrations generate the calibDate via the DATE-OBS header
150  keyword, and likely require incrementValidEnd=False.
151 
152  @param conn: Database connection
153  @param table: Name of table to be selected
154  @param detectorData: Values identifying a detector (from columns in self.config.detector)
155  @param validity: Validity range (days)
156  """
157  columns = ", ".join([self.config.calibDate, self.config.validStart, self.config.validEnd])
158  sql = "SELECT id, %s FROM %s" % (columns, table)
159  sql += " WHERE " + " AND ".join(col + "=?" for col in self.config.detector)
160  sql += " ORDER BY " + self.config.calibDate
161  cursor = conn.cursor()
162  cursor.execute(sql, detectorData)
163  rows = cursor.fetchall()
164 
165  try:
166  valids = collections.OrderedDict([(_convertToDate(row[self.config.calibDate]), [None, None]) for
167  row in rows])
168  except Exception:
169  det = " ".join("%s=%s" % (k, v) for k, v in zip(self.config.detector, detectorData))
170  # Sqlite returns unicode strings, which cannot be passed through SWIG.
171  self.log.warn(str("Skipped setting the validity overlaps for %s %s: missing calibration dates" %
172  (table, det)))
173  return
174  dates = list(valids.keys())
175  if table in self.config.validityUntilSuperseded:
176  # A calib is valid until it is superseded
177  for thisDate, nextDate in zip(dates[:-1], dates[1:]):
178  valids[thisDate][0] = thisDate
179  valids[thisDate][1] = nextDate
180  valids[dates[-1]][0] = dates[-1]
181  valids[dates[-1]][1] = _convertToDate("2037-12-31") # End of UNIX time
182  else:
183  # A calib is valid within the validity range (in days) specified.
184  for dd in dates:
185  valids[dd] = [dd - datetime.timedelta(validity), dd + datetime.timedelta(validity)]
186  # Fix the dates so that they do not overlap, which can cause the butler to find a
187  # non-unique calib.
188  midpoints = [t1 + (t2 - t1)//2 for t1, t2 in zip(dates[:-1], dates[1:])]
189  for i, (date, midpoint) in enumerate(zip(dates[:-1], midpoints)):
190  if valids[date][1] > midpoint:
191  nextDate = dates[i + 1]
192  valids[nextDate][0] = midpoint + datetime.timedelta(1)
193  if self.config.incrementValidEnd:
194  valids[date][1] = midpoint + datetime.timedelta(1)
195  else:
196  valids[date][1] = midpoint
197  del midpoints
198  del dates
199  # Update the validity data in the registry
200  for row in rows:
201  calibDate = _convertToDate(row[self.config.calibDate])
202  validStart = valids[calibDate][0].isoformat()
203  validEnd = valids[calibDate][1].isoformat()
204  sql = "UPDATE %s" % table
205  sql += " SET %s=?, %s=?" % (self.config.validStart, self.config.validEnd)
206  sql += " WHERE id=?"
207  conn.execute(sql, (validStart, validEnd, row["id"]))
208 
209 
210 class IngestCalibsArgumentParser(InputOnlyArgumentParser):
211  """Argument parser to support ingesting calibration images into the repository"""
212 
213  def __init__(self, *args, **kwargs):
214  InputOnlyArgumentParser.__init__(self, *args, **kwargs)
215  self.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
216  default=False, help="Don't perform any action?")
217  self.add_argument("--mode", choices=["move", "copy", "link", "skip"], default="move",
218  help="Mode of delivering the files to their destination")
219  self.add_argument("--create", action="store_true", help="Create new registry?")
220  self.add_argument("--validity", type=int, required=True, help="Calibration validity period (days)")
221  self.add_argument("--ignore-ingested", dest="ignoreIngested", action="store_true",
222  help="Don't register files that have already been registered")
223  self.add_argument("files", nargs="+", help="Names of file")
224 
225 
226 class IngestCalibsConfig(Config):
227  """Configuration for IngestCalibsTask"""
228  parse = ConfigurableField(target=CalibsParseTask, doc="File parsing")
229  register = ConfigurableField(target=CalibsRegisterTask, doc="Registry entry")
230  allowError = Field(dtype=bool, default=False, doc="Allow error in ingestion?")
231  clobber = Field(dtype=bool, default=False, doc="Clobber existing file?")
232 
233 
235  """Task that generates registry for calibration images"""
236  ConfigClass = IngestCalibsConfig
237  ArgumentParser = IngestCalibsArgumentParser
238  _DefaultName = "ingestCalibs"
239 
240  def run(self, args):
241  """Ingest all specified files and add them to the registry"""
242  calibRoot = args.calib if args.calib is not None else args.output
243  filenameList = self.expandFilesexpandFiles(args.files)
244  with self.register.openRegistry(calibRoot, create=args.create, dryrun=args.dryrun) as registry:
245  calibTypes = set()
246  for infile in filenameList:
247  fileInfo, hduInfoList = self.parse.getInfo(infile)
248  calibType = self.parse.getCalibType(infile)
249  if calibType not in self.register.config.tables:
250  self.log.warn(str("Skipped adding %s of observation type '%s' to registry "
251  "(must be one of %s)" %
252  (infile, calibType, ", ".join(self.register.config.tables))))
253  continue
254  calibTypes.add(calibType)
255  if args.mode != 'skip':
256  outfile = self.parse.getDestination(args.butler, fileInfo, infile)
257  ingested = self.ingestingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
258  if not ingested:
259  self.log.warn(str("Failed to ingest %s of observation type '%s'" %
260  (infile, calibType)))
261  continue
262  if self.register.check(registry, fileInfo, table=calibType):
263  if args.ignoreIngested:
264  continue
265 
266  self.log.warn("%s: already ingested: %s" % (infile, fileInfo))
267  for info in hduInfoList:
268  self.register.addRow(registry, info, dryrun=args.dryrun,
269  create=args.create, table=calibType)
270  if not args.dryrun:
271  self.register.updateValidityRanges(registry, args.validity, tables=calibTypes)
272  else:
273  self.log.info("Would update validity ranges here, but dryrun")
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
Definition: ingest.py:557
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:478
def getDestination(self, butler, info, filename)
Definition: ingestCalibs.py:56
def fixSubsetValidity(self, conn, table, detectorData, validity)
def addRow(self, conn, info, *args, **kwargs)
def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3")
def createTable(self, conn, forceCreateTables=False)
def updateValidityRanges(self, conn, validity, tables=None)