1 from __future__
import absolute_import, division, print_function
2 from past.builtins
import basestring
3 from builtins
import object
11 import sqlite
as sqlite3
12 from fnmatch
import fnmatch
14 from contextlib
import contextmanager
16 from lsst.pex.config
import Config, Field, DictField, ListField, ConfigurableField
17 import lsst.pex.exceptions
18 from lsst.pipe.base
import Task, InputOnlyArgumentParser
19 import lsst.afw.image
as afwImage
23 """Argument parser to support ingesting images into the image repository""" 26 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
27 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
28 help=
"Don't perform any action?")
29 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
30 help=
"Mode of delivering the files to their destination")
31 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
32 self.add_argument(
"--ignore-ingested", dest=
"ignoreIngested", action=
"store_true",
33 help=
"Don't register files that have already been registered")
34 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
35 self.add_argument(
"--badFile", nargs=
"*", default=[],
36 help=
"Names of bad files (no path; wildcards allowed)")
37 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
41 """Configuration for ParseTask""" 42 translation = DictField(keytype=str, itemtype=str, default={},
43 doc=
"Translation table for property --> header")
44 translators = DictField(keytype=str, itemtype=str, default={},
45 doc=
"Properties and name of translator method")
46 defaults = DictField(keytype=str, itemtype=str, default={},
47 doc=
"Default values if header is not present")
48 hdu = Field(dtype=int, default=0, doc=
"HDU to read for metadata")
49 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
53 """Task that will parse the filename and/or its contents to get the required information 54 for putting the file in the correct location and populating the registry.""" 55 ConfigClass = ParseConfig
58 """Get information about the image from the filename and its contents 60 Here, we open the image and parse the header, but one could also look at the filename itself 61 and derive information from that, or set values from the configuration. 63 @param filename Name of file to inspect 64 @return File properties; list of file properties for each extension 66 md = afwImage.readMetadata(filename, self.config.hdu)
68 if len(self.config.extnames) == 0:
70 return phuInfo, [phuInfo]
72 extnames = set(self.config.extnames)
75 while len(extnames) > 0:
78 md = afwImage.readMetadata(filename, extnum)
80 self.log.warn(
"Error reading %s extensions %s" % (filename, extnames))
86 hduInfo[
"hdu"] = extnum
87 infoList.append(hduInfo)
89 return phuInfo, infoList
93 """ Get the name of an extension. 94 @param md: PropertySet like one obtained from afwImage.readMetadata) 95 @return Name of the extension if it exists. None otherwise. 99 ext = md.get(
"EXTNAME")
101 except lsst.pex.exceptions.Exception:
105 """Attempt to pull the desired information out of the header 107 This is done through two mechanisms: 108 * translation: a property is set directly from the relevant header keyword 109 * translator: a property is set with the result of calling a method 111 The translator methods receive the header metadata and should return the 112 appropriate value, or None if the value cannot be determined. 114 @param md FITS header 115 @param info File properties, to be supplemented 118 for p, h
in self.config.translation.items():
121 if isinstance(value, basestring):
122 value = value.strip()
124 elif p
in self.config.defaults:
125 info[p] = self.config.defaults[p]
127 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
128 for p, t
in self.config.translators.items():
129 func = getattr(self, t)
132 except Exception
as e:
133 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
135 if value
is not None:
140 """Convert a full DATE-OBS to a mere date 142 Besides being an example of a translator, this is also generally useful. 143 It will only be used if listed as a translator in the configuration. 145 date = md.get(
"DATE-OBS").strip()
152 """Translate a full filter description into a mere filter name 154 Besides being an example of a translator, this is also generally useful. 155 It will only be used if listed as a translator in the configuration. 157 filterName = md.get(
"FILTER").strip()
158 filterName = filterName.strip()
159 c = filterName.find(
" ")
161 filterName = filterName[:c]
165 """Get destination for the file 167 @param butler Data butler 168 @param info File properties, used as dataId for the butler 169 @param filename Input filename 170 @return Destination filename 172 raw = butler.get(
"raw_filename", info)[0]
181 """Configuration for the RegisterTask""" 182 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
183 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
184 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
185 default={
'object':
'text',
194 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
195 default=[
"visit",
"ccd"])
196 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
197 doc=
"List of columns for raw_visit table")
198 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
199 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry")
203 """Context manager to provide a registry 205 An existing registry is copied, so that it may continue 206 to be used while we add to this new registry. Finally, 207 the new registry is moved into the right place. 210 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
211 """Construct a context manager 213 @param registryName: Name of registry file 214 @param createTableFunc: Function to create tables 215 @param forceCreateTables: Force the (re-)creation of tables? 216 @param permissions: Permissions to set on database file 221 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
226 if os.path.exists(registryName):
228 os.chmod(self.
updateName, os.stat(registryName).st_mode)
229 shutil.copyfile(registryName, self.
updateName)
233 if not haveTable
or forceCreateTables:
234 createTableFunc(self.
conn)
238 """Provide the 'as' value""" 255 """A context manager that doesn't provide any context 257 Useful for dry runs where we don't want to actually do anything real. 262 class RegisterTask(Task):
263 """Task that will generate the registry for the Mapper""" 264 ConfigClass = RegisterConfig
266 typemap = {
'text': str,
'int': int,
'double': float}
268 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
269 """Open the registry and return the connection handle. 271 @param directory Directory in which the registry file will be placed 272 @param create Clobber any existing registry and create a new one? 273 @param dryrun Don't do anything permanent? 274 @param name Filename of the registry 275 @return Database connection 280 registryName = os.path.join(directory, name)
285 """Create the registry tables 287 One table (typically 'raw') contains information on all files, and the 288 other (typically 'raw_visit') contains information on all visits. 290 @param conn Database connection 291 @param table Name of table to create in database 294 table = self.config.table
295 cmd =
"create table %s (id integer primary key autoincrement, " % table
296 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
297 if len(self.config.unique) > 0:
298 cmd +=
", unique(" +
",".join(self.config.unique) +
")" 300 conn.cursor().execute(cmd)
302 cmd =
"create table %s_visit (" % table
303 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
304 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")" 306 conn.cursor().execute(cmd)
310 def check(self, conn, info, table=None):
311 """Check for the presence of a row already 313 Not sure this is required, given the 'ignore' configuration option. 316 table = self.config.table
317 if self.config.ignore
or len(self.config.unique) == 0:
319 cursor = conn.cursor()
320 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
321 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
322 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
324 cursor.execute(sql, values)
325 if cursor.fetchone()[0] > 0:
329 def addRow(self, conn, info, dryrun=False, create=False, table=None):
330 """Add a row to the file table (typically 'raw'). 332 @param conn Database connection 333 @param info File properties to add to database 334 @param table Name of table in database 337 table = self.config.table
338 sql =
"INSERT INTO %s (%s) SELECT " % (table,
",".join(self.config.columns))
339 sql +=
",".join([self.
placeHolder] * len(self.config.columns))
340 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
342 if self.config.ignore:
343 sql +=
" WHERE NOT EXISTS (SELECT 1 FROM %s WHERE " % table
344 sql +=
" AND ".join([
"%s=%s" % (col, self.
placeHolder)
for col
in self.config.unique])
346 values += [info[col]
for col
in self.config.unique]
349 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
351 conn.cursor().execute(sql, values)
354 """Generate the visits table (typically 'raw_visits') from the 355 file table (typically 'raw'). 357 @param conn Database connection 358 @param table Name of table in database 361 table = self.config.table
362 sql =
"INSERT INTO %s_visit SELECT DISTINCT " % table
363 sql +=
",".join(self.config.visit)
364 sql +=
" FROM %s AS vv1" % table
365 sql +=
" WHERE NOT EXISTS " 366 sql +=
"(SELECT vv2.visit FROM %s_visit AS vv2 WHERE vv1.visit = vv2.visit)" % (table,)
368 print(
"Would execute: %s" % sql)
370 conn.cursor().execute(sql)
374 """Configuration for IngestTask""" 375 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
376 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
377 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
378 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
382 """Task that will ingest images into the data repository""" 383 ConfigClass = IngestConfig
384 ArgumentParser = IngestArgumentParser
385 _DefaultName =
"ingest" 388 super(IngestTask, self).
__init__(*args, **kwargs)
389 self.makeSubtask(
"parse")
390 self.makeSubtask(
"register")
394 """Parse the command-line arguments and run the Task""" 397 args = parser.parse_args(config)
398 task = cls(config=args.config)
401 def ingest(self, infile, outfile, mode="move", dryrun=False):
402 """Ingest a file into the image repository. 404 @param infile Name of input file 405 @param outfile Name of output file (file in repository) 406 @param mode Mode of ingest (copy/link/move/skip) 407 @param dryrun Only report what would occur? 408 @param Success boolean 413 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
416 outdir = os.path.dirname(outfile)
417 if not os.path.isdir(outdir):
422 if not os.path.isdir(outdir):
424 if os.path.lexists(outfile):
425 if self.config.clobber:
428 raise RuntimeError(
"File %s already exists; consider --config clobber=True" % outfile)
432 shutil.copyfile(infile, outfile)
434 os.symlink(os.path.abspath(infile), outfile)
437 os.rename(infile, outfile)
439 raise AssertionError(
"Unknown mode: %s" % mode)
440 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
441 except Exception
as e:
442 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
443 if not self.config.allowError:
449 """Return whether the file qualifies as bad 451 We match against the list of bad file patterns. 453 filename = os.path.basename(filename)
456 for badFile
in badFileList:
457 if fnmatch(filename, badFile):
462 """Return whether the file information qualifies as bad 464 We match against the list of bad data identifiers. 468 for badId
in badIdList:
469 if all(info[key] == value
for key, value
in badId.items()):
474 """!Expand a set of filenames and globs, returning a list of filenames 476 \param fileNameList A list of files and glob patterns 478 N.b. globs obey Posix semantics, so a pattern that matches nothing is returned unchanged 481 for globPattern
in fileNameList:
482 files = glob(globPattern)
485 self.log.warn(
"%s doesn't match any file" % globPattern)
488 filenameList.extend(files)
493 """Ingest all specified files and add them to the registry""" 496 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
497 with context
as registry:
498 for infile
in filenameList:
501 self.log.info(
"Skipping declared bad file %s" % infile)
504 fileInfo, hduInfoList = self.parse.getInfo(infile)
505 except Exception
as e:
506 if not self.config.allowError:
508 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
510 if self.
isBadId(fileInfo, args.badId.idList):
511 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
513 if self.register.check(registry, fileInfo):
514 if args.ignoreIngested:
517 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
518 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
519 ingested = self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
522 for info
in hduInfoList:
523 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
524 except Exception
as exc:
525 self.log.warn(
"Failed to ingest file %s: %s", infile, exc)
526 self.register.addVisits(registry, dryrun=args.dryrun)
530 """Can I copy a file? Raise an exception is space constraints not met. 532 @param fromPath Path from which the file is being copied 533 @param toPath Path to which the file is being copied 535 req = os.stat(fromPath).st_size
536 st = os.statvfs(os.path.dirname(toPath))
537 avail = st.f_bavail * st.f_frsize
539 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))
def ingest(self, infile, outfile, mode="move", dryrun=False)
def translate_filter(self, md)
def createTable(self, conn, table=None)
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
def translate_date(self, md)
def __exit__(self, excType, excValue, traceback)
def getInfo(self, filename)
def getDestination(self, butler, info, filename)
def isBadFile(self, filename, badFileList)
def __init__(self, registryName, createTableFunc, forceCreateTables, permissions)
def getInfoFromMetadata(self, md, info={})
def assertCanCopy(fromPath, toPath)
def check(self, conn, info, table=None)
def __init__(self, args, kwargs)
def addVisits(self, conn, dryrun=False, table=None)
def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3")
def __init__(self, args, kwargs)
def addRow(self, conn, info, dryrun=False, create=False, table=None)
def isBadId(self, info, badIdList)