lsst.pipe.tasks  21.0.0-142-gef555c1e+42c9bccae2
ingestCalibs.py
Go to the documentation of this file.
1 import collections
2 import datetime
3 import sqlite3
4 from dateutil import parser
5 
6 from lsst.afw.fits import readMetadata
7 from lsst.pex.config import Config, Field, ListField, ConfigurableField
8 from lsst.pipe.base import InputOnlyArgumentParser
9 from lsst.pipe.tasks.ingest import RegisterTask, ParseTask, RegisterConfig, IngestTask
10 
11 
12 def _convertToDate(dateString):
13  """Convert a string into a date object"""
14  return parser.parse(dateString).date()
15 
16 
18  """Task that will parse the filename and/or its contents to get the
19  required information to populate the calibration registry."""
20 
21  def getCalibType(self, filename):
22  """Return a a known calibration dataset type using
23  the observation type in the header keyword OBSTYPE
24 
25  @param filename: Input filename
26  """
27  md = readMetadata(filename, self.config.hdu)
28  if not md.exists("OBSTYPE"):
29  raise RuntimeError("Unable to find the required header keyword OBSTYPE in %s, hdu %d" %
30  (filename, self.config.hdu))
31  obstype = md.getScalar("OBSTYPE").strip().lower()
32  if "flat" in obstype:
33  obstype = "flat"
34  elif "zero" in obstype or "bias" in obstype:
35  obstype = "bias"
36  elif "dark" in obstype:
37  obstype = "dark"
38  elif "fringe" in obstype:
39  obstype = "fringe"
40  elif "sky" in obstype:
41  obstype = "sky"
42  elif "illumcor" in obstype:
43  obstype = "illumcor"
44  elif "defects" in obstype:
45  obstype = "defects"
46  elif "qe_curve" in obstype:
47  obstype = "qe_curve"
48  elif "linearizer" in obstype:
49  obstype = "linearizer"
50  elif "crosstalk" in obstype:
51  obstype = "crosstalk"
52  elif "BFK" in obstype:
53  obstype = "bfk"
54  return obstype
55 
56  def getDestination(self, butler, info, filename):
57  """Get destination for the file
58 
59  @param butler Data butler
60  @param info File properties, used as dataId for the butler
61  @param filename Input filename
62  @return Destination filename
63  """
64  # 'tempinfo' was added as part of DM-5466 to strip Nones from info.
65  # The Butler should handle this behind-the-scenes in the future.
66  # Please reference DM-9873 and delete this comment once it is resolved.
67  tempinfo = {k: v for (k, v) in info.items() if v is not None}
68  calibType = self.getCalibTypegetCalibType(filename)
69  raw = butler.get(calibType + "_filename", tempinfo)[0]
70  # Ensure filename is devoid of cfitsio directions about HDUs
71  c = raw.find("[")
72  if c > 0:
73  raw = raw[:c]
74  return raw
75 
76 
78  """Configuration for the CalibsRegisterTask"""
79  tables = ListField(dtype=str, default=["bias", "dark", "flat", "fringe", "sky", "defects", "qe_curve",
80  "linearizer", "crosstalk", "bfk"], doc="Names of tables")
81  calibDate = Field(dtype=str, default="calibDate", doc="Name of column for calibration date")
82  validStart = Field(dtype=str, default="validStart", doc="Name of column for validity start")
83  validEnd = Field(dtype=str, default="validEnd", doc="Name of column for validity stop")
84  detector = ListField(dtype=str, default=["filter", "ccd"],
85  doc="Columns that identify individual detectors")
86  validityUntilSuperseded = ListField(dtype=str, default=["defects", "qe_curve", "linearizer", "crosstalk",
87  "bfk"],
88  doc="Tables for which to set validity for a calib from when it is "
89  "taken until it is superseded by the next; validity in other tables "
90  "is calculated by applying the validity range.")
91  incrementValidEnd = Field(
92  dtype=bool,
93  default=True,
94  doc="Fix the off-by-one error by incrementing validEnd. See "
95  "fixSubsetValidity for more details.",
96  )
97 
98 
100  """Task that will generate the calibration registry for the Mapper"""
101  ConfigClass = CalibsRegisterConfig
102 
103  def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3"):
104  """Open the registry and return the connection handle"""
105  return RegisterTask.openRegistry(self, directory, create, dryrun, name)
106 
107  def createTable(self, conn, forceCreateTables=False):
108  """Create the registry tables"""
109  for table in self.config.tables:
110  RegisterTask.createTable(self, conn, table=table, forceCreateTables=forceCreateTables)
111 
112  def addRow(self, conn, info, *args, **kwargs):
113  """Add a row to the file table"""
114  info[self.config.validStart] = None
115  info[self.config.validEnd] = None
116  RegisterTask.addRow(self, conn, info, *args, **kwargs)
117 
118  def updateValidityRanges(self, conn, validity, tables=None):
119  """Loop over all tables, filters, and ccdnums,
120  and update the validity ranges in the registry.
121 
122  @param conn: Database connection
123  @param validity: Validity range (days)
124  """
125  conn.row_factory = sqlite3.Row
126  cursor = conn.cursor()
127  if tables is None:
128  tables = self.config.tables
129  for table in tables:
130  sql = "SELECT DISTINCT %s FROM %s" % (", ".join(self.config.detector), table)
131  cursor.execute(sql)
132  rows = cursor.fetchall()
133  for row in rows:
134  self.fixSubsetValidityfixSubsetValidity(conn, table, row, validity)
135 
136  def fixSubsetValidity(self, conn, table, detectorData, validity):
137  """Update the validity ranges among selected rows in the registry.
138 
139  For defects and qe_curve, the products are valid from their start date until
140  they are superseded by subsequent defect data.
141  For other calibration products, the validity ranges are checked and
142  if there are overlaps, a midpoint is used to fix the overlaps,
143  so that the calibration data with whose date is nearest the date
144  of the observation is used.
145 
146  DM generated calibrations contain a CALIB_ID header
147  keyword. These calibrations likely require the
148  incrementValidEnd configuration option set to True. Other
149  calibrations generate the calibDate via the DATE-OBS header
150  keyword, and likely require incrementValidEnd=False.
151 
152  @param conn: Database connection
153  @param table: Name of table to be selected
154  @param detectorData: Values identifying a detector (from columns in self.config.detector)
155  @param validity: Validity range (days)
156  """
157  columns = ", ".join([self.config.calibDate, self.config.validStart, self.config.validEnd])
158  sql = "SELECT id, %s FROM %s" % (columns, table)
159  sql += " WHERE " + " AND ".join(col + "=?" for col in self.config.detector)
160  sql += " ORDER BY " + self.config.calibDate
161  cursor = conn.cursor()
162  cursor.execute(sql, detectorData)
163  rows = cursor.fetchall()
164 
165  try:
166  valids = collections.OrderedDict([(_convertToDate(row[self.config.calibDate]), [None, None]) for
167  row in rows])
168  except Exception:
169  det = " ".join("%s=%s" % (k, v) for k, v in zip(self.config.detector, detectorData))
170  self.log.warning("Skipped setting the validity overlaps for %s %s: missing calibration dates",
171  table, det)
172  return
173  dates = list(valids.keys())
174  if table in self.config.validityUntilSuperseded:
175  # A calib is valid until it is superseded
176  for thisDate, nextDate in zip(dates[:-1], dates[1:]):
177  valids[thisDate][0] = thisDate
178  valids[thisDate][1] = nextDate
179  valids[dates[-1]][0] = dates[-1]
180  valids[dates[-1]][1] = _convertToDate("2037-12-31") # End of UNIX time
181  else:
182  # A calib is valid within the validity range (in days) specified.
183  for dd in dates:
184  valids[dd] = [dd - datetime.timedelta(validity), dd + datetime.timedelta(validity)]
185  # Fix the dates so that they do not overlap, which can cause the butler to find a
186  # non-unique calib.
187  midpoints = [t1 + (t2 - t1)//2 for t1, t2 in zip(dates[:-1], dates[1:])]
188  for i, (date, midpoint) in enumerate(zip(dates[:-1], midpoints)):
189  if valids[date][1] > midpoint:
190  nextDate = dates[i + 1]
191  valids[nextDate][0] = midpoint + datetime.timedelta(1)
192  if self.config.incrementValidEnd:
193  valids[date][1] = midpoint + datetime.timedelta(1)
194  else:
195  valids[date][1] = midpoint
196  del midpoints
197  del dates
198  # Update the validity data in the registry
199  for row in rows:
200  calibDate = _convertToDate(row[self.config.calibDate])
201  validStart = valids[calibDate][0].isoformat()
202  validEnd = valids[calibDate][1].isoformat()
203  sql = "UPDATE %s" % table
204  sql += " SET %s=?, %s=?" % (self.config.validStart, self.config.validEnd)
205  sql += " WHERE id=?"
206  conn.execute(sql, (validStart, validEnd, row["id"]))
207 
208 
209 class IngestCalibsArgumentParser(InputOnlyArgumentParser):
210  """Argument parser to support ingesting calibration images into the repository"""
211 
212  def __init__(self, *args, **kwargs):
213  InputOnlyArgumentParser.__init__(self, *args, **kwargs)
214  self.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
215  default=False, help="Don't perform any action?")
216  self.add_argument("--mode", choices=["move", "copy", "link", "skip"], default="move",
217  help="Mode of delivering the files to their destination")
218  self.add_argument("--create", action="store_true", help="Create new registry?")
219  self.add_argument("--validity", type=int, required=True, help="Calibration validity period (days)")
220  self.add_argument("--ignore-ingested", dest="ignoreIngested", action="store_true",
221  help="Don't register files that have already been registered")
222  self.add_argument("files", nargs="+", help="Names of file")
223 
224 
225 class IngestCalibsConfig(Config):
226  """Configuration for IngestCalibsTask"""
227  parse = ConfigurableField(target=CalibsParseTask, doc="File parsing")
228  register = ConfigurableField(target=CalibsRegisterTask, doc="Registry entry")
229  allowError = Field(dtype=bool, default=False, doc="Allow error in ingestion?")
230  clobber = Field(dtype=bool, default=False, doc="Clobber existing file?")
231 
232 
234  """Task that generates registry for calibration images"""
235  ConfigClass = IngestCalibsConfig
236  ArgumentParser = IngestCalibsArgumentParser
237  _DefaultName = "ingestCalibs"
238 
239  def run(self, args):
240  """Ingest all specified files and add them to the registry"""
241  calibRoot = args.calib if args.calib is not None else args.output
242  filenameList = self.expandFilesexpandFiles(args.files)
243  with self.register.openRegistry(calibRoot, create=args.create, dryrun=args.dryrun) as registry:
244  calibTypes = set()
245  for infile in filenameList:
246  fileInfo, hduInfoList = self.parse.getInfo(infile)
247  calibType = self.parse.getCalibType(infile)
248  if calibType not in self.register.config.tables:
249  self.log.warning("Skipped adding %s of observation type '%s' to registry "
250  "(must be one of %s)",
251  infile, calibType, ", ".join(self.register.config.tables))
252  continue
253  calibTypes.add(calibType)
254  if args.mode != 'skip':
255  outfile = self.parse.getDestination(args.butler, fileInfo, infile)
256  ingested = self.ingestingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
257  if not ingested:
258  self.log.warning("Failed to ingest %s of observation type '%s'",
259  infile, calibType)
260  continue
261  if self.register.check(registry, fileInfo, table=calibType):
262  if args.ignoreIngested:
263  continue
264 
265  self.log.warning("%s: already ingested: %s", infile, fileInfo)
266  for info in hduInfoList:
267  self.register.addRow(registry, info, dryrun=args.dryrun,
268  create=args.create, table=calibType)
269  if not args.dryrun:
270  self.register.updateValidityRanges(registry, args.validity, tables=calibTypes)
271  else:
272  self.log.info("Would update validity ranges here, but dryrun")
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
Definition: ingest.py:557
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:478
def getDestination(self, butler, info, filename)
Definition: ingestCalibs.py:56
def fixSubsetValidity(self, conn, table, detectorData, validity)
def addRow(self, conn, info, *args, **kwargs)
def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3")
def createTable(self, conn, forceCreateTables=False)
def updateValidityRanges(self, conn, validity, tables=None)