1 from __future__
import absolute_import, division, print_function
2 from past.builtins
import basestring
3 from builtins
import object
11 import sqlite
as sqlite3
12 from fnmatch
import fnmatch
14 from contextlib
import contextmanager
16 from lsst.pex.config
import Config, Field, DictField, ListField, ConfigurableField
17 import lsst.pex.exceptions
18 from lsst.pipe.base
import Task, InputOnlyArgumentParser
19 import lsst.afw.image
as afwImage
23 """Argument parser to support ingesting images into the image repository"""
26 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
27 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
28 help=
"Don't perform any action?")
29 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
30 help=
"Mode of delivering the files to their destination")
31 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
32 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
33 self.add_argument(
"--badFile", nargs=
"*", default=[],
34 help=
"Names of bad files (no path; wildcards allowed)")
35 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
39 """Configuration for ParseTask"""
40 translation = DictField(keytype=str, itemtype=str, default={},
41 doc=
"Translation table for property --> header")
42 translators = DictField(keytype=str, itemtype=str, default={},
43 doc=
"Properties and name of translator method")
44 defaults = DictField(keytype=str, itemtype=str, default={},
45 doc=
"Default values if header is not present")
46 hdu = Field(dtype=int, default=0, doc=
"HDU to read for metadata")
47 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
51 """Task that will parse the filename and/or its contents to get the required information
52 for putting the file in the correct location and populating the registry."""
53 ConfigClass = ParseConfig
56 """Get information about the image from the filename and its contents
58 Here, we open the image and parse the header, but one could also look at the filename itself
59 and derive information from that, or set values from the configuration.
61 @param filename Name of file to inspect
62 @return File properties; list of file properties for each extension
64 md = afwImage.readMetadata(filename, self.config.hdu)
66 if len(self.config.extnames) == 0:
68 return phuInfo, [phuInfo]
70 extnames = set(self.config.extnames)
73 while len(extnames) > 0:
76 md = afwImage.readMetadata(filename, extnum)
78 self.log.warn(
"Error reading %s extensions %s" % (filename, extnames))
84 hduInfo[
"hdu"] = extnum
85 infoList.append(hduInfo)
87 return phuInfo, infoList
91 """ Get the name of an extension.
92 @param md: PropertySet like one obtained from afwImage.readMetadata)
93 @return Name of the extension if it exists. None otherwise.
97 ext = md.get(
"EXTNAME")
99 except lsst.pex.exceptions.Exception:
103 """Attempt to pull the desired information out of the header
105 This is done through two mechanisms:
106 * translation: a property is set directly from the relevant header keyword
107 * translator: a property is set with the result of calling a method
109 The translator methods receive the header metadata and should return the
110 appropriate value, or None if the value cannot be determined.
112 @param md FITS header
113 @param info File properties, to be supplemented
116 for p, h
in self.config.translation.items():
119 if isinstance(value, basestring):
120 value = value.strip()
122 elif p
in self.config.defaults:
123 info[p] = self.config.defaults[p]
125 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
126 for p, t
in self.config.translators.items():
127 func = getattr(self, t)
130 except Exception
as e:
131 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
133 if value
is not None:
138 """Convert a full DATE-OBS to a mere date
140 Besides being an example of a translator, this is also generally useful.
141 It will only be used if listed as a translator in the configuration.
143 date = md.get(
"DATE-OBS").strip()
150 """Translate a full filter description into a mere filter name
152 Besides being an example of a translator, this is also generally useful.
153 It will only be used if listed as a translator in the configuration.
155 filterName = md.get(
"FILTER").strip()
156 filterName = filterName.strip()
157 c = filterName.find(
" ")
159 filterName = filterName[:c]
163 """Get destination for the file
165 @param butler Data butler
166 @param info File properties, used as dataId for the butler
167 @param filename Input filename
168 @return Destination filename
170 raw = butler.get(
"raw_filename", info)[0]
179 """Configuration for the RegisterTask"""
180 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
181 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
182 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
183 default={
'object':
'text',
192 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
193 default=[
"visit",
"ccd"])
194 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
195 doc=
"List of columns for raw_visit table")
196 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
197 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry")
201 """Context manager to provide a registry
203 An existing registry is copied, so that it may continue
204 to be used while we add to this new registry. Finally,
205 the new registry is moved into the right place.
208 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
209 """Construct a context manager
211 @param registryName: Name of registry file
212 @param createTableFunc: Function to create tables
213 @param forceCreateTables: Force the (re-)creation of tables?
214 @param permissions: Permissions to set on database file
219 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
224 if os.path.exists(registryName):
226 os.chmod(self.
updateName, os.stat(registryName).st_mode)
227 shutil.copyfile(registryName, self.
updateName)
231 if not haveTable
or forceCreateTables:
232 createTableFunc(self.
conn)
236 """Provide the 'as' value"""
253 """A context manager that doesn't provide any context
255 Useful for dry runs where we don't want to actually do anything real.
260 class RegisterTask(Task):
261 """Task that will generate the registry for the Mapper"""
262 ConfigClass = RegisterConfig
264 typemap = {
'text': str,
'int': int,
'double': float}
266 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
267 """Open the registry and return the connection handle.
269 @param directory Directory in which the registry file will be placed
270 @param create Clobber any existing registry and create a new one?
271 @param dryrun Don't do anything permanent?
272 @param name Filename of the registry
273 @return Database connection
278 registryName = os.path.join(directory, name)
283 """Create the registry tables
285 One table (typically 'raw') contains information on all files, and the
286 other (typically 'raw_visit') contains information on all visits.
288 @param conn Database connection
289 @param table Name of table to create in database
292 table = self.config.table
293 cmd =
"create table %s (id integer primary key autoincrement, " % table
294 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
295 if len(self.config.unique) > 0:
296 cmd +=
", unique(" +
",".join(self.config.unique) +
")"
298 conn.cursor().execute(cmd)
300 cmd =
"create table %s_visit (" % table
301 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
302 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")"
304 conn.cursor().execute(cmd)
308 def check(self, conn, info, table=None):
309 """Check for the presence of a row already
311 Not sure this is required, given the 'ignore' configuration option.
314 table = self.config.table
315 if self.config.ignore
or len(self.config.unique) == 0:
317 cursor = conn.cursor()
318 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
319 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
320 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
322 cursor.execute(sql, values)
323 if cursor.fetchone()[0] > 0:
327 def addRow(self, conn, info, dryrun=False, create=False, table=None):
328 """Add a row to the file table (typically 'raw').
330 @param conn Database connection
331 @param info File properties to add to database
332 @param table Name of table in database
335 table = self.config.table
336 sql =
"INSERT INTO %s (%s) SELECT " % (table,
",".join(self.config.columns))
337 sql +=
",".join([self.
placeHolder] * len(self.config.columns))
338 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
340 if self.config.ignore:
341 sql +=
" WHERE NOT EXISTS (SELECT 1 FROM %s WHERE " % self.config.table
342 sql +=
" AND ".join([
"%s=%s" % (col, self.
placeHolder)
for col
in self.config.unique])
344 values += [info[col]
for col
in self.config.unique]
347 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
349 conn.cursor().execute(sql, values)
352 """Generate the visits table (typically 'raw_visits') from the
353 file table (typically 'raw').
355 @param conn Database connection
356 @param table Name of table in database
359 table = self.config.table
360 sql =
"INSERT INTO %s_visit SELECT DISTINCT " % table
361 sql +=
",".join(self.config.visit)
362 sql +=
" FROM %s AS vv1" % table
363 sql +=
" WHERE NOT EXISTS "
364 sql +=
"(SELECT vv2.visit FROM %s_visit AS vv2 WHERE vv1.visit = vv2.visit)" % (table,)
366 print(
"Would execute: %s" % sql)
368 conn.cursor().execute(sql)
372 """Configuration for IngestTask"""
373 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
374 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
375 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
376 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
380 """Task that will ingest images into the data repository"""
381 ConfigClass = IngestConfig
382 ArgumentParser = IngestArgumentParser
383 _DefaultName =
"ingest"
386 super(IngestTask, self).
__init__(*args, **kwargs)
387 self.makeSubtask(
"parse")
388 self.makeSubtask(
"register")
392 """Parse the command-line arguments and run the Task"""
393 config = cls.ConfigClass()
394 parser = cls.ArgumentParser(name=cls._DefaultName)
395 args = parser.parse_args(config)
396 task = cls(config=args.config)
399 def ingest(self, infile, outfile, mode="move", dryrun=False):
400 """Ingest a file into the image repository.
402 @param infile Name of input file
403 @param outfile Name of output file (file in repository)
404 @param mode Mode of ingest (copy/link/move/skip)
405 @param dryrun Only report what would occur?
406 @param Success boolean
411 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
414 outdir = os.path.dirname(outfile)
415 if not os.path.isdir(outdir):
420 if not os.path.isdir(outdir):
422 if self.config.clobber
and os.path.lexists(outfile):
426 shutil.copyfile(infile, outfile)
428 os.symlink(os.path.abspath(infile), outfile)
431 os.rename(infile, outfile)
433 raise AssertionError(
"Unknown mode: %s" % mode)
434 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
435 except Exception
as e:
436 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
437 if not self.config.allowError:
443 """Return whether the file qualifies as bad
445 We match against the list of bad file patterns.
447 filename = os.path.basename(filename)
450 for badFile
in badFileList:
451 if fnmatch(filename, badFile):
456 """Return whether the file information qualifies as bad
458 We match against the list of bad data identifiers.
462 for badId
in badIdList:
463 if all(info[key] == value
for key, value
in badId.items()):
468 """Ingest all specified files and add them to the registry"""
469 filenameList = sum([glob(filename)
for filename
in args.files], [])
471 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
472 with context
as registry:
473 for infile
in filenameList:
475 self.log.info(
"Skipping declared bad file %s" % infile)
478 fileInfo, hduInfoList = self.parse.getInfo(infile)
479 except Exception
as e:
480 if not self.config.allowError:
482 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
484 if self.
isBadId(fileInfo, args.badId.idList):
485 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
487 if self.register.check(registry, fileInfo):
488 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
489 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
490 ingested = self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
493 for info
in hduInfoList:
494 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
495 self.register.addVisits(registry, dryrun=args.dryrun)
499 """Can I copy a file? Raise an exception is space constraints not met.
501 @param fromPath Path from which the file is being copied
502 @param toPath Path to which the file is being copied
504 req = os.stat(fromPath).st_size
505 st = os.statvfs(os.path.dirname(toPath))
506 avail = st.f_bavail * st.f_frsize
508 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))