1 from __future__
import absolute_import, division, print_function
2 from past.builtins
import basestring
3 from builtins
import object
11 import sqlite
as sqlite3
12 from fnmatch
import fnmatch
14 from contextlib
import contextmanager
16 from lsst.pex.config
import Config, Field, DictField, ListField, ConfigurableField
17 import lsst.pex.exceptions
18 from lsst.pipe.base
import Task, InputOnlyArgumentParser
19 import lsst.afw.image
as afwImage
23 """Argument parser to support ingesting images into the image repository"""
26 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
27 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
28 help=
"Don't perform any action?")
29 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
30 help=
"Mode of delivering the files to their destination")
31 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
32 self.add_argument(
"--ignore-ingested", dest=
"ignoreIngested", action=
"store_true",
33 help=
"Don't register files that have already been registered")
34 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
35 self.add_argument(
"--badFile", nargs=
"*", default=[],
36 help=
"Names of bad files (no path; wildcards allowed)")
37 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
41 """Configuration for ParseTask"""
42 translation = DictField(keytype=str, itemtype=str, default={},
43 doc=
"Translation table for property --> header")
44 translators = DictField(keytype=str, itemtype=str, default={},
45 doc=
"Properties and name of translator method")
46 defaults = DictField(keytype=str, itemtype=str, default={},
47 doc=
"Default values if header is not present")
48 hdu = Field(dtype=int, default=0, doc=
"HDU to read for metadata")
49 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
53 """Task that will parse the filename and/or its contents to get the required information
54 for putting the file in the correct location and populating the registry."""
55 ConfigClass = ParseConfig
58 """Get information about the image from the filename and its contents
60 Here, we open the image and parse the header, but one could also look at the filename itself
61 and derive information from that, or set values from the configuration.
63 @param filename Name of file to inspect
64 @return File properties; list of file properties for each extension
66 md = afwImage.readMetadata(filename, self.config.hdu)
68 if len(self.config.extnames) == 0:
70 return phuInfo, [phuInfo]
72 extnames = set(self.config.extnames)
75 while len(extnames) > 0:
78 md = afwImage.readMetadata(filename, extnum)
80 self.log.warn(
"Error reading %s extensions %s" % (filename, extnames))
86 hduInfo[
"hdu"] = extnum
87 infoList.append(hduInfo)
89 return phuInfo, infoList
93 """ Get the name of an extension.
94 @param md: PropertySet like one obtained from afwImage.readMetadata)
95 @return Name of the extension if it exists. None otherwise.
99 ext = md.get(
"EXTNAME")
101 except lsst.pex.exceptions.Exception:
105 """Attempt to pull the desired information out of the header
107 This is done through two mechanisms:
108 * translation: a property is set directly from the relevant header keyword
109 * translator: a property is set with the result of calling a method
111 The translator methods receive the header metadata and should return the
112 appropriate value, or None if the value cannot be determined.
114 @param md FITS header
115 @param info File properties, to be supplemented
118 for p, h
in self.config.translation.items():
121 if isinstance(value, basestring):
122 value = value.strip()
124 elif p
in self.config.defaults:
125 info[p] = self.config.defaults[p]
127 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
128 for p, t
in self.config.translators.items():
129 func = getattr(self, t)
132 except Exception
as e:
133 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
135 if value
is not None:
140 """Convert a full DATE-OBS to a mere date
142 Besides being an example of a translator, this is also generally useful.
143 It will only be used if listed as a translator in the configuration.
145 date = md.get(
"DATE-OBS").strip()
152 """Translate a full filter description into a mere filter name
154 Besides being an example of a translator, this is also generally useful.
155 It will only be used if listed as a translator in the configuration.
157 filterName = md.get(
"FILTER").strip()
158 filterName = filterName.strip()
159 c = filterName.find(
" ")
161 filterName = filterName[:c]
165 """Get destination for the file
167 @param butler Data butler
168 @param info File properties, used as dataId for the butler
169 @param filename Input filename
170 @return Destination filename
172 raw = butler.get(
"raw_filename", info)[0]
181 """Configuration for the RegisterTask"""
182 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
183 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
184 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
185 default={
'object':
'text',
194 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
195 default=[
"visit",
"ccd"])
196 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
197 doc=
"List of columns for raw_visit table")
198 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
199 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry")
203 """Context manager to provide a registry
205 An existing registry is copied, so that it may continue
206 to be used while we add to this new registry. Finally,
207 the new registry is moved into the right place.
210 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
211 """Construct a context manager
213 @param registryName: Name of registry file
214 @param createTableFunc: Function to create tables
215 @param forceCreateTables: Force the (re-)creation of tables?
216 @param permissions: Permissions to set on database file
221 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
226 if os.path.exists(registryName):
228 os.chmod(self.
updateName, os.stat(registryName).st_mode)
229 shutil.copyfile(registryName, self.
updateName)
233 if not haveTable
or forceCreateTables:
234 createTableFunc(self.
conn)
238 """Provide the 'as' value"""
255 """A context manager that doesn't provide any context
257 Useful for dry runs where we don't want to actually do anything real.
262 class RegisterTask(Task):
263 """Task that will generate the registry for the Mapper"""
264 ConfigClass = RegisterConfig
266 typemap = {
'text': str,
'int': int,
'double': float}
268 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
269 """Open the registry and return the connection handle.
271 @param directory Directory in which the registry file will be placed
272 @param create Clobber any existing registry and create a new one?
273 @param dryrun Don't do anything permanent?
274 @param name Filename of the registry
275 @return Database connection
280 registryName = os.path.join(directory, name)
285 """Create the registry tables
287 One table (typically 'raw') contains information on all files, and the
288 other (typically 'raw_visit') contains information on all visits.
290 @param conn Database connection
291 @param table Name of table to create in database
294 table = self.config.table
295 cmd =
"create table %s (id integer primary key autoincrement, " % table
296 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
297 if len(self.config.unique) > 0:
298 cmd +=
", unique(" +
",".join(self.config.unique) +
")"
300 conn.cursor().execute(cmd)
302 cmd =
"create table %s_visit (" % table
303 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
304 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")"
306 conn.cursor().execute(cmd)
310 def check(self, conn, info, table=None):
311 """Check for the presence of a row already
313 Not sure this is required, given the 'ignore' configuration option.
316 table = self.config.table
317 if self.config.ignore
or len(self.config.unique) == 0:
319 cursor = conn.cursor()
320 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
321 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
322 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
324 cursor.execute(sql, values)
325 if cursor.fetchone()[0] > 0:
329 def addRow(self, conn, info, dryrun=False, create=False, table=None):
330 """Add a row to the file table (typically 'raw').
332 @param conn Database connection
333 @param info File properties to add to database
334 @param table Name of table in database
337 table = self.config.table
338 sql =
"INSERT INTO %s (%s) SELECT " % (table,
",".join(self.config.columns))
339 sql +=
",".join([self.
placeHolder] * len(self.config.columns))
340 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
342 if self.config.ignore:
343 sql +=
" WHERE NOT EXISTS (SELECT 1 FROM %s WHERE " % table
344 sql +=
" AND ".join([
"%s=%s" % (col, self.
placeHolder)
for col
in self.config.unique])
346 values += [info[col]
for col
in self.config.unique]
349 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
351 conn.cursor().execute(sql, values)
354 """Generate the visits table (typically 'raw_visits') from the
355 file table (typically 'raw').
357 @param conn Database connection
358 @param table Name of table in database
361 table = self.config.table
362 sql =
"INSERT INTO %s_visit SELECT DISTINCT " % table
363 sql +=
",".join(self.config.visit)
364 sql +=
" FROM %s AS vv1" % table
365 sql +=
" WHERE NOT EXISTS "
366 sql +=
"(SELECT vv2.visit FROM %s_visit AS vv2 WHERE vv1.visit = vv2.visit)" % (table,)
368 print(
"Would execute: %s" % sql)
370 conn.cursor().execute(sql)
374 """Configuration for IngestTask"""
375 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
376 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
377 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
378 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
382 """Task that will ingest images into the data repository"""
383 ConfigClass = IngestConfig
384 ArgumentParser = IngestArgumentParser
385 _DefaultName =
"ingest"
388 super(IngestTask, self).
__init__(*args, **kwargs)
389 self.makeSubtask(
"parse")
390 self.makeSubtask(
"register")
394 """Parse the command-line arguments and run the Task"""
395 config = cls.ConfigClass()
396 parser = cls.ArgumentParser(name=cls._DefaultName)
397 args = parser.parse_args(config)
398 task = cls(config=args.config)
401 def ingest(self, infile, outfile, mode="move", dryrun=False):
402 """Ingest a file into the image repository.
404 @param infile Name of input file
405 @param outfile Name of output file (file in repository)
406 @param mode Mode of ingest (copy/link/move/skip)
407 @param dryrun Only report what would occur?
408 @param Success boolean
413 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
416 outdir = os.path.dirname(outfile)
417 if not os.path.isdir(outdir):
422 if not os.path.isdir(outdir):
424 if os.path.lexists(outfile):
425 if self.config.clobber:
428 raise RuntimeError(
"File %s already exists; consider --config clobber=True" % outfile)
432 shutil.copyfile(infile, outfile)
434 os.symlink(os.path.abspath(infile), outfile)
437 os.rename(infile, outfile)
439 raise AssertionError(
"Unknown mode: %s" % mode)
440 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
441 except Exception
as e:
442 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
443 if not self.config.allowError:
449 """Return whether the file qualifies as bad
451 We match against the list of bad file patterns.
453 filename = os.path.basename(filename)
456 for badFile
in badFileList:
457 if fnmatch(filename, badFile):
462 """Return whether the file information qualifies as bad
464 We match against the list of bad data identifiers.
468 for badId
in badIdList:
469 if all(info[key] == value
for key, value
in badId.items()):
474 """!Expand a set of filenames and globs, returning a list of filenames
476 \param fileNameList A list of files and glob patterns
478 N.b. globs obey Posix semantics, so a pattern that matches nothing is returned unchanged
481 for globPattern
in fileNameList:
482 files = glob(globPattern)
485 self.log.warn(
"%s doesn't match any file" % globPattern)
488 filenameList.extend(files)
493 """Ingest all specified files and add them to the registry"""
496 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
497 with context
as registry:
498 for infile
in filenameList:
501 self.log.info(
"Skipping declared bad file %s" % infile)
504 fileInfo, hduInfoList = self.parse.getInfo(infile)
505 except Exception
as e:
506 if not self.config.allowError:
508 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
510 if self.
isBadId(fileInfo, args.badId.idList):
511 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
513 if self.register.check(registry, fileInfo):
514 if args.ignoreIngested:
517 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
518 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
519 ingested = self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
522 for info
in hduInfoList:
523 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
524 except Exception
as exc:
525 self.log.warn(
"Failed to ingest file %s: %s", infile, exc)
526 self.register.addVisits(registry, dryrun=args.dryrun)
530 """Can I copy a file? Raise an exception is space constraints not met.
532 @param fromPath Path from which the file is being copied
533 @param toPath Path to which the file is being copied
535 req = os.stat(fromPath).st_size
536 st = os.statvfs(os.path.dirname(toPath))
537 avail = st.f_bavail * st.f_frsize
539 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))
def expandFiles
Expand a set of filenames and globs, returning a list of filenames.