lsst.pipe.tasks g0099ee1360+6048f86b6d
ingestCalibs.py
Go to the documentation of this file.
1import collections
2import datetime
3import sqlite3
4from dateutil import parser
5
6from lsst.afw.fits import readMetadata
7from lsst.pex.config import Config, Field, ListField, ConfigurableField
8from lsst.pipe.base import InputOnlyArgumentParser
9from lsst.pipe.tasks.ingest import RegisterTask, ParseTask, RegisterConfig, IngestTask
10
11
12def _convertToDate(dateString):
13 """Convert a string into a date object"""
14 return parser.parse(dateString).date()
15
16
18 """Task that will parse the filename and/or its contents to get the
19 required information to populate the calibration registry."""
20
21 def getCalibType(self, filename):
22 """Return a a known calibration dataset type using
23 the observation type in the header keyword OBSTYPE
24
25 @param filename: Input filename
26 """
27 md = readMetadata(filename, self.config.hdu)
28 if not md.exists("OBSTYPE"):
29 raise RuntimeError("Unable to find the required header keyword OBSTYPE in %s, hdu %d" %
30 (filename, self.config.hdu))
31 obstype = md.getScalar("OBSTYPE").strip().lower()
32 if "flat" in obstype:
33 obstype = "flat"
34 elif "zero" in obstype or "bias" in obstype:
35 obstype = "bias"
36 elif "dark" in obstype:
37 obstype = "dark"
38 elif "fringe" in obstype:
39 obstype = "fringe"
40 elif "sky" in obstype:
41 obstype = "sky"
42 elif "illumcor" in obstype:
43 obstype = "illumcor"
44 elif "defects" in obstype:
45 obstype = "defects"
46 elif "qe_curve" in obstype:
47 obstype = "qe_curve"
48 elif "linearizer" in obstype:
49 obstype = "linearizer"
50 elif "crosstalk" in obstype:
51 obstype = "crosstalk"
52 elif "BFK" in obstype:
53 obstype = "bfk"
54 return obstype
55
56 def getDestination(self, butler, info, filename):
57 """Get destination for the file
58
59 @param butler Data butler
60 @param info File properties, used as dataId for the butler
61 @param filename Input filename
62 @return Destination filename
63 """
64 # 'tempinfo' was added as part of DM-5466 to strip Nones from info.
65 # The Butler should handle this behind-the-scenes in the future.
66 # Please reference DM-9873 and delete this comment once it is resolved.
67 tempinfo = {k: v for (k, v) in info.items() if v is not None}
68 calibType = self.getCalibTypegetCalibType(filename)
69 raw = butler.get(calibType + "_filename", tempinfo)[0]
70 # Ensure filename is devoid of cfitsio directions about HDUs
71 c = raw.find("[")
72 if c > 0:
73 raw = raw[:c]
74 return raw
75
76
78 """Configuration for the CalibsRegisterTask"""
79 tables = ListField(dtype=str, default=["bias", "dark", "flat", "fringe", "sky", "defects", "qe_curve",
80 "linearizer", "crosstalk", "bfk"], doc="Names of tables")
81 calibDate = Field(dtype=str, default="calibDate", doc="Name of column for calibration date")
82 validStart = Field(dtype=str, default="validStart", doc="Name of column for validity start")
83 validEnd = Field(dtype=str, default="validEnd", doc="Name of column for validity stop")
84 detector = ListField(dtype=str, default=["filter", "ccd"],
85 doc="Columns that identify individual detectors")
86 validityUntilSuperseded = ListField(dtype=str, default=["defects", "qe_curve", "linearizer", "crosstalk",
87 "bfk"],
88 doc="Tables for which to set validity for a calib from when it is "
89 "taken until it is superseded by the next; validity in other tables "
90 "is calculated by applying the validity range.")
91 incrementValidEnd = Field(
92 dtype=bool,
93 default=True,
94 doc="Fix the off-by-one error by incrementing validEnd. See "
95 "fixSubsetValidity for more details.",
96 )
97
98
100 """Task that will generate the calibration registry for the Mapper"""
101 ConfigClass = CalibsRegisterConfig
102
103 def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3"):
104 """Open the registry and return the connection handle"""
105 return RegisterTask.openRegistry(self, directory, create, dryrun, name)
106
107 def createTable(self, conn, forceCreateTables=False):
108 """Create the registry tables"""
109 for table in self.config.tables:
110 RegisterTask.createTable(self, conn, table=table, forceCreateTables=forceCreateTables)
111
112 def addRow(self, conn, info, *args, **kwargs):
113 """Add a row to the file table"""
114 info[self.config.validStart] = None
115 info[self.config.validEnd] = None
116 RegisterTask.addRow(self, conn, info, *args, **kwargs)
117
118 def updateValidityRanges(self, conn, validity, tables=None):
119 """Loop over all tables, filters, and ccdnums,
120 and update the validity ranges in the registry.
121
122 @param conn: Database connection
123 @param validity: Validity range (days)
124 """
125 conn.row_factory = sqlite3.Row
126 cursor = conn.cursor()
127 if tables is None:
128 tables = self.config.tables
129 for table in tables:
130 sql = "SELECT DISTINCT %s FROM %s" % (", ".join(self.config.detector), table)
131 cursor.execute(sql)
132 rows = cursor.fetchall()
133 for row in rows:
134 self.fixSubsetValidityfixSubsetValidity(conn, table, row, validity)
135
136 def fixSubsetValidity(self, conn, table, detectorData, validity):
137 """Update the validity ranges among selected rows in the registry.
138
139 For defects and qe_curve, the products are valid from their start date until
140 they are superseded by subsequent defect data.
141 For other calibration products, the validity ranges are checked and
142 if there are overlaps, a midpoint is used to fix the overlaps,
143 so that the calibration data with whose date is nearest the date
144 of the observation is used.
145
146 DM generated calibrations contain a CALIB_ID header
147 keyword. These calibrations likely require the
148 incrementValidEnd configuration option set to True. Other
149 calibrations generate the calibDate via the DATE-OBS header
150 keyword, and likely require incrementValidEnd=False.
151
152 @param conn: Database connection
153 @param table: Name of table to be selected
154 @param detectorData: Values identifying a detector (from columns in self.config.detector)
155 @param validity: Validity range (days)
156 """
157 columns = ", ".join([self.config.calibDate, self.config.validStart, self.config.validEnd])
158 sql = "SELECT id, %s FROM %s" % (columns, table)
159 sql += " WHERE " + " AND ".join(col + "=?" for col in self.config.detector)
160 sql += " ORDER BY " + self.config.calibDate
161 cursor = conn.cursor()
162 cursor.execute(sql, detectorData)
163 rows = cursor.fetchall()
164
165 try:
166 valids = collections.OrderedDict([(_convertToDate(row[self.config.calibDate]), [None, None]) for
167 row in rows])
168 except Exception:
169 det = " ".join("%s=%s" % (k, v) for k, v in zip(self.config.detector, detectorData))
170 self.log.warning("Skipped setting the validity overlaps for %s %s: missing calibration dates",
171 table, det)
172 return
173 dates = list(valids.keys())
174 if table in self.config.validityUntilSuperseded:
175 # A calib is valid until it is superseded
176 for thisDate, nextDate in zip(dates[:-1], dates[1:]):
177 valids[thisDate][0] = thisDate
178 valids[thisDate][1] = nextDate
179 valids[dates[-1]][0] = dates[-1]
180 valids[dates[-1]][1] = _convertToDate("2037-12-31") # End of UNIX time
181 else:
182 # A calib is valid within the validity range (in days) specified.
183 for dd in dates:
184 valids[dd] = [dd - datetime.timedelta(validity), dd + datetime.timedelta(validity)]
185 # Fix the dates so that they do not overlap, which can cause the butler to find a
186 # non-unique calib.
187 midpoints = [t1 + (t2 - t1)//2 for t1, t2 in zip(dates[:-1], dates[1:])]
188 for i, (date, midpoint) in enumerate(zip(dates[:-1], midpoints)):
189 if valids[date][1] > midpoint:
190 nextDate = dates[i + 1]
191 valids[nextDate][0] = midpoint + datetime.timedelta(1)
192 if self.config.incrementValidEnd:
193 valids[date][1] = midpoint + datetime.timedelta(1)
194 else:
195 valids[date][1] = midpoint
196 del midpoints
197 del dates
198 # Update the validity data in the registry
199 for row in rows:
200 calibDate = _convertToDate(row[self.config.calibDate])
201 validStart = valids[calibDate][0].isoformat()
202 validEnd = valids[calibDate][1].isoformat()
203 sql = "UPDATE %s" % table
204 sql += " SET %s=?, %s=?" % (self.config.validStart, self.config.validEnd)
205 sql += " WHERE id=?"
206 conn.execute(sql, (validStart, validEnd, row["id"]))
207
208
209class IngestCalibsArgumentParser(InputOnlyArgumentParser):
210 """Argument parser to support ingesting calibration images into the repository"""
211
212 def __init__(self, *args, **kwargs):
213 InputOnlyArgumentParser.__init__(self, *args, **kwargs)
214 self.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
215 default=False, help="Don't perform any action?")
216 self.add_argument("--mode", choices=["move", "copy", "link", "skip"], default="move",
217 help="Mode of delivering the files to their destination")
218 self.add_argument("--create", action="store_true", help="Create new registry?")
219 self.add_argument("--validity", type=int, required=True, help="Calibration validity period (days)")
220 self.add_argument("--ignore-ingested", dest="ignoreIngested", action="store_true",
221 help="Don't register files that have already been registered")
222 self.add_argument("files", nargs="+", help="Names of file")
223
224
225class IngestCalibsConfig(Config):
226 """Configuration for IngestCalibsTask"""
227 parse = ConfigurableField(target=CalibsParseTask, doc="File parsing")
228 register = ConfigurableField(target=CalibsRegisterTask, doc="Registry entry")
229 allowError = Field(dtype=bool, default=False, doc="Allow error in ingestion?")
230 clobber = Field(dtype=bool, default=False, doc="Clobber existing file?")
231
232
234 """Task that generates registry for calibration images"""
235 ConfigClass = IngestCalibsConfig
236 ArgumentParser = IngestCalibsArgumentParser
237 _DefaultName = "ingestCalibs"
238
239 def run(self, args):
240 """Ingest all specified files and add them to the registry"""
241 calibRoot = args.calib if args.calib is not None else args.output
242 filenameList = self.expandFilesexpandFiles(args.files)
243 with self.register.openRegistry(calibRoot, create=args.create, dryrun=args.dryrun) as registry:
244 calibTypes = set()
245 for infile in filenameList:
246 fileInfo, hduInfoList = self.parse.getInfo(infile)
247 calibType = self.parse.getCalibType(infile)
248 if calibType not in self.register.config.tables:
249 self.log.warning("Skipped adding %s of observation type '%s' to registry "
250 "(must be one of %s)",
251 infile, calibType, ", ".join(self.register.config.tables))
252 continue
253 calibTypes.add(calibType)
254 if args.mode != 'skip':
255 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
256 ingested = self.ingestingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
257 if not ingested:
258 self.log.warning("Failed to ingest %s of observation type '%s'",
259 infile, calibType)
260 continue
261 if self.register.check(registry, fileInfo, table=calibType):
262 if args.ignoreIngested:
263 continue
264
265 self.log.warning("%s: already ingested: %s", infile, fileInfo)
266 for info in hduInfoList:
267 self.register.addRow(registry, info, dryrun=args.dryrun,
268 create=args.create, table=calibType)
269 if not args.dryrun:
270 self.register.updateValidityRanges(registry, args.validity, tables=calibTypes)
271 else:
272 self.log.info("Would update validity ranges here, but dryrun")
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
Definition: ingest.py:557
def ingest(self, infile, outfile, mode="move", dryrun=False)
Definition: ingest.py:478
def getDestination(self, butler, info, filename)
Definition: ingestCalibs.py:56
def fixSubsetValidity(self, conn, table, detectorData, validity)
def addRow(self, conn, info, *args, **kwargs)
def openRegistry(self, directory, create=False, dryrun=False, name="calibRegistry.sqlite3")
def createTable(self, conn, forceCreateTables=False)
def updateValidityRanges(self, conn, validity, tables=None)