lsst.pipe.tasks  20.0.0-30-g1d38f5b5+3d4683c44c
ingestCalibs.py
Go to the documentation of this file.
1 import collections
2 import datetime
3 import sqlite3
4 from dateutil import parser
5 
6 from lsst.afw.fits import readMetadata
7 from lsst.pex.config import Config, Field, ListField, ConfigurableField
8 from lsst.pipe.base import InputOnlyArgumentParser
9 from lsst.pipe.tasks.ingest import RegisterTask, ParseTask, RegisterConfig, IngestTask
10 
11 
12 def _convertToDate(dateString):
13  """Convert a string into a date object"""
14  return parser.parse(dateString).date()
15 
16 
18  """Task that will parse the filename and/or its contents to get the
19  required information to populate the calibration registry."""
20 
21  def getCalibType(self, filename):
22  """Return a a known calibration dataset type using
23  the observation type in the header keyword OBSTYPE
24 
25  @param filename: Input filename
26  """
27  md = readMetadata(filename, self.config.hdu)
28  if not md.exists("OBSTYPE"):
29  raise RuntimeError("Unable to find the required header keyword OBSTYPE in %s, hdu %d" %
30  (filename, self.config.hdu))
31  obstype = md.getScalar("OBSTYPE").strip().lower()
32  if "flat" in obstype:
33  obstype = "flat"
34  elif "zero" in obstype or "bias" in obstype:
35  obstype = "bias"
36  elif "dark" in obstype:
37  obstype = "dark"
38  elif "fringe" in obstype:
39  obstype = "fringe"
40  elif "sky" in obstype:
41  obstype = "sky"
42  elif "illumcor" in obstype:
43  obstype = "illumcor"
44  elif "defects" in obstype:
45  obstype = "defects"
46  elif "qe_curve" in obstype:
47  obstype = "qe_curve"
48  elif "linearizer" in obstype:
49  obstype = "linearizer"
50  elif "crosstalk" in obstype:
51  obstype = "crosstalk"
52  return obstype
53 
54  def getDestination(self, butler, info, filename):
55  """Get destination for the file
56 
57  @param butler Data butler
58  @param info File properties, used as dataId for the butler
59  @param filename Input filename
60  @return Destination filename
61  """
62  # 'tempinfo' was added as part of DM-5466 to strip Nones from info.
63  # The Butler should handle this behind-the-scenes in the future.
64  # Please reference DM-9873 and delete this comment once it is resolved.
65  tempinfo = {k: v for (k, v) in info.items() if v is not None}
66  calibType = self.getCalibType(filename)
67  raw = butler.get(calibType + "_filename", tempinfo)[0]
68  # Ensure filename is devoid of cfitsio directions about HDUs
69  c = raw.find("[")
70  if c > 0:
71  raw = raw[:c]
72  return raw
73 
74 
76  """Configuration for the CalibsRegisterTask"""
77  tables = ListField(dtype=str, default=["bias", "dark", "flat", "fringe", "sky", "defects", "qe_curve",
78  "linearizer", "crosstalk"], doc="Names of tables")
79  calibDate = Field(dtype=str, default="calibDate", doc="Name of column for calibration date")
80  validStart = Field(dtype=str, default="validStart", doc="Name of column for validity start")
81  validEnd = Field(dtype=str, default="validEnd", doc="Name of column for validity stop")
82  detector = ListField(dtype=str, default=["filter", "ccd"],
83  doc="Columns that identify individual detectors")
84  validityUntilSuperseded = ListField(dtype=str, default=["defects", "qe_curve", "linearizer", "crosstalk"],
85  doc="Tables for which to set validity for a calib from when it is "
86  "taken until it is superseded by the next; validity in other tables "
87  "is calculated by applying the validity range.")
88 
89 
91  """Task that will generate the calibration registry for the Mapper"""
92  ConfigClass = CalibsRegisterConfig
93 
94  def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3"):
95  """Open the registry and return the connection handle"""
96  return RegisterTask.openRegistry(self, directory, create, dryrun, name)
97 
98  def createTable(self, conn, forceCreateTables=False):
99  """Create the registry tables"""
100  for table in self.config.tables:
101  RegisterTask.createTable(self, conn, table=table, forceCreateTables=forceCreateTables)
102 
103  def addRow(self, conn, info, *args, **kwargs):
104  """Add a row to the file table"""
105  info[self.config.validStart] = None
106  info[self.config.validEnd] = None
107  RegisterTask.addRow(self, conn, info, *args, **kwargs)
108 
109  def updateValidityRanges(self, conn, validity, tables=None):
110  """Loop over all tables, filters, and ccdnums,
111  and update the validity ranges in the registry.
112 
113  @param conn: Database connection
114  @param validity: Validity range (days)
115  """
116  conn.row_factory = sqlite3.Row
117  cursor = conn.cursor()
118  if tables is None:
119  tables = self.config.tables
120  for table in tables:
121  sql = "SELECT DISTINCT %s FROM %s" % (", ".join(self.config.detector), table)
122  cursor.execute(sql)
123  rows = cursor.fetchall()
124  for row in rows:
125  self.fixSubsetValidity(conn, table, row, validity)
126 
127  def fixSubsetValidity(self, conn, table, detectorData, validity):
128  """Update the validity ranges among selected rows in the registry.
129 
130  For defects and qe_curve, the products are valid from their start date until
131  they are superseded by subsequent defect data.
132  For other calibration products, the validity ranges are checked and
133  if there are overlaps, a midpoint is used to fix the overlaps,
134  so that the calibration data with whose date is nearest the date
135  of the observation is used.
136 
137  @param conn: Database connection
138  @param table: Name of table to be selected
139  @param detectorData: Values identifying a detector (from columns in self.config.detector)
140  @param validity: Validity range (days)
141  """
142  columns = ", ".join([self.config.calibDate, self.config.validStart, self.config.validEnd])
143  sql = "SELECT id, %s FROM %s" % (columns, table)
144  sql += " WHERE " + " AND ".join(col + "=?" for col in self.config.detector)
145  sql += " ORDER BY " + self.config.calibDate
146  cursor = conn.cursor()
147  cursor.execute(sql, detectorData)
148  rows = cursor.fetchall()
149 
150  try:
151  valids = collections.OrderedDict([(_convertToDate(row[self.config.calibDate]), [None, None]) for
152  row in rows])
153  except Exception:
154  det = " ".join("%s=%s" % (k, v) for k, v in zip(self.config.detector, detectorData))
155  # Sqlite returns unicode strings, which cannot be passed through SWIG.
156  self.log.warn(str("Skipped setting the validity overlaps for %s %s: missing calibration dates" %
157  (table, det)))
158  return
159  dates = list(valids.keys())
160  if table in self.config.validityUntilSuperseded:
161  # A calib is valid until it is superseded
162  for thisDate, nextDate in zip(dates[:-1], dates[1:]):
163  valids[thisDate][0] = thisDate
164  valids[thisDate][1] = nextDate
165  valids[dates[-1]][0] = dates[-1]
166  valids[dates[-1]][1] = _convertToDate("2037-12-31") # End of UNIX time
167  else:
168  # A calib is valid within the validity range (in days) specified.
169  for dd in dates:
170  valids[dd] = [dd - datetime.timedelta(validity), dd + datetime.timedelta(validity)]
171  # Fix the dates so that they do not overlap, which can cause the butler to find a
172  # non-unique calib.
173  midpoints = [t1 + (t2 - t1)//2 for t1, t2 in zip(dates[:-1], dates[1:])]
174  for i, (date, midpoint) in enumerate(zip(dates[:-1], midpoints)):
175  if valids[date][1] > midpoint:
176  nextDate = dates[i + 1]
177  valids[nextDate][0] = midpoint + datetime.timedelta(1)
178  valids[date][1] = midpoint + datetime.timedelta(1)
179  del midpoints
180  del dates
181  # Update the validity data in the registry
182  for row in rows:
183  calibDate = _convertToDate(row[self.config.calibDate])
184  validStart = valids[calibDate][0].isoformat()
185  validEnd = valids[calibDate][1].isoformat()
186  sql = "UPDATE %s" % table
187  sql += " SET %s=?, %s=?" % (self.config.validStart, self.config.validEnd)
188  sql += " WHERE id=?"
189  conn.execute(sql, (validStart, validEnd, row["id"]))
190 
191 
192 class IngestCalibsArgumentParser(InputOnlyArgumentParser):
193  """Argument parser to support ingesting calibration images into the repository"""
194 
195  def __init__(self, *args, **kwargs):
196  InputOnlyArgumentParser.__init__(self, *args, **kwargs)
197  self.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
198  default=False, help="Don't perform any action?")
199  self.add_argument("--mode", choices=["move", "copy", "link", "skip"], default="move",
200  help="Mode of delivering the files to their destination")
201  self.add_argument("--create", action="store_true", help="Create new registry?")
202  self.add_argument("--validity", type=int, required=True, help="Calibration validity period (days)")
203  self.add_argument("--ignore-ingested", dest="ignoreIngested", action="store_true",
204  help="Don't register files that have already been registered")
205  self.add_argument("files", nargs="+", help="Names of file")
206 
207 
208 class IngestCalibsConfig(Config):
209  """Configuration for IngestCalibsTask"""
210  parse = ConfigurableField(target=CalibsParseTask, doc="File parsing")
211  register = ConfigurableField(target=CalibsRegisterTask, doc="Registry entry")
212  allowError = Field(dtype=bool, default=False, doc="Allow error in ingestion?")
213  clobber = Field(dtype=bool, default=False, doc="Clobber existing file?")
214 
215 
217  """Task that generates registry for calibration images"""
218  ConfigClass = IngestCalibsConfig
219  ArgumentParser = IngestCalibsArgumentParser
220  _DefaultName = "ingestCalibs"
221 
222  def run(self, args):
223  """Ingest all specified files and add them to the registry"""
224  calibRoot = args.calib if args.calib is not None else args.output
225  filenameList = self.expandFiles(args.files)
226  with self.register.openRegistry(calibRoot, create=args.create, dryrun=args.dryrun) as registry:
227  calibTypes = set()
228  for infile in filenameList:
229  fileInfo, hduInfoList = self.parse.getInfo(infile)
230  calibType = self.parse.getCalibType(infile)
231  if calibType not in self.register.config.tables:
232  self.log.warn(str("Skipped adding %s of observation type '%s' to registry "
233  "(must be one of %s)" %
234  (infile, calibType, ", ".join(self.register.config.tables))))
235  continue
236  calibTypes.add(calibType)
237  if args.mode != 'skip':
238  outfile = self.parse.getDestination(args.butler, fileInfo, infile)
239  ingested = self.ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
240  if not ingested:
241  self.log.warn(str("Failed to ingest %s of observation type '%s'" %
242  (infile, calibType)))
243  continue
244  if self.register.check(registry, fileInfo, table=calibType):
245  if args.ignoreIngested:
246  continue
247 
248  self.log.warn("%s: already ingested: %s" % (infile, fileInfo))
249  for info in hduInfoList:
250  self.register.addRow(registry, info, dryrun=args.dryrun,
251  create=args.create, table=calibType)
252  if not args.dryrun:
253  self.register.updateValidityRanges(registry, args.validity, tables=calibTypes)
254  else:
255  self.log.info("Would update validity ranges here, but dryrun")
lsst.pipe.tasks.ingestCalibs.IngestCalibsTask
Definition: ingestCalibs.py:216
lsst.pipe.tasks.ingestCalibs.CalibsRegisterTask
Definition: ingestCalibs.py:90
lsst.pipe.tasks.ingestCalibs.CalibsParseTask.getDestination
def getDestination(self, butler, info, filename)
Definition: ingestCalibs.py:54
lsst.pipe.tasks.ingestCalibs.CalibsRegisterTask.createTable
def createTable(self, conn, forceCreateTables=False)
Definition: ingestCalibs.py:98
lsst.pipe.tasks.ingest.IngestTask.expandFiles
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
Definition: ingest.py:537
lsst.pipe.tasks.ingest.IngestTask
Definition: ingest.py:386
lsst.pipe.tasks.ingest.ParseTask
Definition: ingest.py:66
lsst.pipe.tasks.ingestCalibs.CalibsRegisterConfig
Definition: ingestCalibs.py:75
lsst.pipe.tasks.ingest
Definition: ingest.py:1
lsst.pipe.tasks.ingestCalibs.CalibsRegisterTask.fixSubsetValidity
def fixSubsetValidity(self, conn, table, detectorData, validity)
Definition: ingestCalibs.py:127
lsst.pipe.tasks.ingestCalibs.IngestCalibsArgumentParser
Definition: ingestCalibs.py:192
lsst.pipe.tasks.ingest.RegisterTask
Definition: ingest.py:260
lsst::pex::config
lsst.pipe.tasks.ingestCalibs.IngestCalibsArgumentParser.__init__
def __init__(self, *args, **kwargs)
Definition: ingestCalibs.py:195
lsst.pipe.tasks.ingestCalibs.CalibsRegisterTask.addRow
def addRow(self, conn, info, *args, **kwargs)
Definition: ingestCalibs.py:103
lsst.pipe.tasks.ingest.IngestTask.ingest
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:458
lsst.pipe.tasks.ingestCalibs.IngestCalibsConfig
Definition: ingestCalibs.py:208
lsst.pipe.tasks.ingestCalibs.CalibsRegisterTask.updateValidityRanges
def updateValidityRanges(self, conn, validity, tables=None)
Definition: ingestCalibs.py:109
lsst::afw::fits
lsst.pipe.tasks.ingestCalibs.CalibsParseTask
Definition: ingestCalibs.py:17
lsst.pipe.tasks.ingestCalibs.CalibsRegisterTask.openRegistry
def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3")
Definition: ingestCalibs.py:94
lsst.pipe::base
lsst.pipe.tasks.ingest.RegisterConfig
Definition: ingest.py:203
lsst.pipe.tasks.ingestCalibs.CalibsParseTask.getCalibType
def getCalibType(self, filename)
Definition: ingestCalibs.py:21
lsst.pipe.tasks.ingestCalibs.IngestCalibsTask.run
def run(self, args)
Definition: ingestCalibs.py:222