1 from __future__
import absolute_import, division, print_function
2 from past.builtins
import basestring
3 from builtins
import object
11 import sqlite
as sqlite3
12 from fnmatch
import fnmatch
14 from contextlib
import contextmanager
16 from lsst.pex.config import Config, Field, DictField, ListField, ConfigurableField
24 """Argument parser to support ingesting images into the image repository""" 27 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
28 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
29 help=
"Don't perform any action?")
30 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
31 help=
"Mode of delivering the files to their destination")
32 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
33 self.add_argument(
"--ignore-ingested", dest=
"ignoreIngested", action=
"store_true",
34 help=
"Don't register files that have already been registered")
35 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
36 self.add_argument(
"--badFile", nargs=
"*", default=[],
37 help=
"Names of bad files (no path; wildcards allowed)")
38 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
42 """Configuration for ParseTask""" 43 translation = DictField(keytype=str, itemtype=str, default={},
44 doc=
"Translation table for property --> header")
45 translators = DictField(keytype=str, itemtype=str, default={},
46 doc=
"Properties and name of translator method")
47 defaults = DictField(keytype=str, itemtype=str, default={},
48 doc=
"Default values if header is not present")
49 hdu = Field(dtype=int, default=DEFAULT_HDU, doc=
"HDU to read for metadata")
50 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
54 """Task that will parse the filename and/or its contents to get the required information 55 for putting the file in the correct location and populating the registry.""" 56 ConfigClass = ParseConfig
59 """Get information about the image from the filename and its contents 61 Here, we open the image and parse the header, but one could also look at the filename itself 62 and derive information from that, or set values from the configuration. 64 @param filename Name of file to inspect 65 @return File properties; list of file properties for each extension 67 md = afwImage.readMetadata(filename, self.config.hdu)
69 if len(self.config.extnames) == 0:
71 return phuInfo, [phuInfo]
73 extnames = set(self.config.extnames)
76 while len(extnames) > 0:
79 md = afwImage.readMetadata(filename, extnum)
81 self.log.warn(
"Error reading %s extensions %s" % (filename, extnames))
87 hduInfo[
"hdu"] = extnum
88 infoList.append(hduInfo)
90 return phuInfo, infoList
94 """ Get the name of an extension. 95 @param md: PropertySet like one obtained from afwImage.readMetadata) 96 @return Name of the extension if it exists. None otherwise. 100 ext = md.get(
"EXTNAME")
106 """Attempt to pull the desired information out of the header 108 This is done through two mechanisms: 109 * translation: a property is set directly from the relevant header keyword 110 * translator: a property is set with the result of calling a method 112 The translator methods receive the header metadata and should return the 113 appropriate value, or None if the value cannot be determined. 115 @param md FITS header 116 @param info File properties, to be supplemented 121 for p, h
in self.config.translation.items():
124 if isinstance(value, basestring):
125 value = value.strip()
127 elif p
in self.config.defaults:
128 info[p] = self.config.defaults[p]
130 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
131 for p, t
in self.config.translators.items():
132 func = getattr(self, t)
135 except Exception
as e:
136 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
138 if value
is not None:
143 """Convert a full DATE-OBS to a mere date 145 Besides being an example of a translator, this is also generally useful. 146 It will only be used if listed as a translator in the configuration. 148 date = md.get(
"DATE-OBS").strip()
155 """Translate a full filter description into a mere filter name 157 Besides being an example of a translator, this is also generally useful. 158 It will only be used if listed as a translator in the configuration. 160 filterName = md.get(
"FILTER").strip()
161 filterName = filterName.strip()
162 c = filterName.find(
" ")
164 filterName = filterName[:c]
168 """Get destination for the file 170 @param butler Data butler 171 @param info File properties, used as dataId for the butler 172 @param filename Input filename 173 @return Destination filename 175 raw = butler.get(
"raw_filename", info)[0]
184 """Configuration for the RegisterTask""" 185 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
186 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
187 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
188 default={
'object':
'text',
197 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
198 default=[
"visit",
"ccd"])
199 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
200 doc=
"List of columns for raw_visit table")
201 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
202 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry")
206 """Context manager to provide a registry 208 An existing registry is copied, so that it may continue 209 to be used while we add to this new registry. Finally, 210 the new registry is moved into the right place. 213 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
214 """Construct a context manager 216 @param registryName: Name of registry file 217 @param createTableFunc: Function to create tables 218 @param forceCreateTables: Force the (re-)creation of tables? 219 @param permissions: Permissions to set on database file 224 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
229 if os.path.exists(registryName):
231 os.chmod(self.
updateName, os.stat(registryName).st_mode)
232 shutil.copyfile(registryName, self.
updateName)
236 if not haveTable
or forceCreateTables:
237 createTableFunc(self.
conn)
241 """Provide the 'as' value""" 258 """A context manager that doesn't provide any context 260 Useful for dry runs where we don't want to actually do anything real. 265 class RegisterTask(Task):
266 """Task that will generate the registry for the Mapper""" 267 ConfigClass = RegisterConfig
269 typemap = {
'text': str,
'int': int,
'double': float}
271 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
272 """Open the registry and return the connection handle. 274 @param directory Directory in which the registry file will be placed 275 @param create Clobber any existing registry and create a new one? 276 @param dryrun Don't do anything permanent? 277 @param name Filename of the registry 278 @return Database connection 283 registryName = os.path.join(directory, name)
288 """Create the registry tables 290 One table (typically 'raw') contains information on all files, and the 291 other (typically 'raw_visit') contains information on all visits. 293 @param conn Database connection 294 @param table Name of table to create in database 297 table = self.config.table
298 cmd =
"create table %s (id integer primary key autoincrement, " % table
299 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
300 if len(self.config.unique) > 0:
301 cmd +=
", unique(" +
",".join(self.config.unique) +
")" 303 conn.cursor().execute(cmd)
305 cmd =
"create table %s_visit (" % table
306 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
307 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")" 309 conn.cursor().execute(cmd)
313 def check(self, conn, info, table=None):
314 """Check for the presence of a row already 316 Not sure this is required, given the 'ignore' configuration option. 319 table = self.config.table
320 if self.config.ignore
or len(self.config.unique) == 0:
322 cursor = conn.cursor()
323 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
324 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
325 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
327 cursor.execute(sql, values)
328 if cursor.fetchone()[0] > 0:
332 def addRow(self, conn, info, dryrun=False, create=False, table=None):
333 """Add a row to the file table (typically 'raw'). 335 @param conn Database connection 336 @param info File properties to add to database 337 @param table Name of table in database 340 table = self.config.table
341 sql =
"INSERT INTO %s (%s) SELECT " % (table,
",".join(self.config.columns))
342 sql +=
",".join([self.
placeHolder] * len(self.config.columns))
343 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
345 if self.config.ignore:
346 sql +=
" WHERE NOT EXISTS (SELECT 1 FROM %s WHERE " % table
347 sql +=
" AND ".join([
"%s=%s" % (col, self.
placeHolder)
for col
in self.config.unique])
349 values += [info[col]
for col
in self.config.unique]
352 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
354 conn.cursor().execute(sql, values)
357 """Generate the visits table (typically 'raw_visits') from the 358 file table (typically 'raw'). 360 @param conn Database connection 361 @param table Name of table in database 364 table = self.config.table
365 sql =
"INSERT INTO %s_visit SELECT DISTINCT " % table
366 sql +=
",".join(self.config.visit)
367 sql +=
" FROM %s AS vv1" % table
368 sql +=
" WHERE NOT EXISTS " 369 sql +=
"(SELECT vv2.visit FROM %s_visit AS vv2 WHERE vv1.visit = vv2.visit)" % (table,)
371 print(
"Would execute: %s" % sql)
373 conn.cursor().execute(sql)
377 """Configuration for IngestTask""" 378 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
379 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
380 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
381 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
385 """Task that will ingest images into the data repository""" 386 ConfigClass = IngestConfig
387 ArgumentParser = IngestArgumentParser
388 _DefaultName =
"ingest" 391 super(IngestTask, self).
__init__(*args, **kwargs)
392 self.makeSubtask(
"parse")
393 self.makeSubtask(
"register")
397 """Parse the command-line arguments and run the Task""" 400 args = parser.parse_args(config)
401 task = cls(config=args.config)
404 def ingest(self, infile, outfile, mode="move", dryrun=False):
405 """Ingest a file into the image repository. 407 @param infile Name of input file 408 @param outfile Name of output file (file in repository) 409 @param mode Mode of ingest (copy/link/move/skip) 410 @param dryrun Only report what would occur? 411 @param Success boolean 416 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
419 outdir = os.path.dirname(outfile)
420 if not os.path.isdir(outdir):
425 if not os.path.isdir(outdir):
427 if os.path.lexists(outfile):
428 if self.config.clobber:
431 raise RuntimeError(
"File %s already exists; consider --config clobber=True" % outfile)
435 shutil.copyfile(infile, outfile)
437 os.symlink(os.path.abspath(infile), outfile)
440 os.rename(infile, outfile)
442 raise AssertionError(
"Unknown mode: %s" % mode)
443 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
444 except Exception
as e:
445 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
446 if not self.config.allowError:
452 """Return whether the file qualifies as bad 454 We match against the list of bad file patterns. 456 filename = os.path.basename(filename)
459 for badFile
in badFileList:
460 if fnmatch(filename, badFile):
465 """Return whether the file information qualifies as bad 467 We match against the list of bad data identifiers. 471 for badId
in badIdList:
472 if all(info[key] == value
for key, value
in badId.items()):
477 """!Expand a set of filenames and globs, returning a list of filenames 479 \param fileNameList A list of files and glob patterns 481 N.b. globs obey Posix semantics, so a pattern that matches nothing is returned unchanged 484 for globPattern
in fileNameList:
485 files = glob(globPattern)
488 self.log.warn(
"%s doesn't match any file" % globPattern)
491 filenameList.extend(files)
496 """!Examine and ingest a single file 498 @param infile: File to process 499 @param args: Parsed command-line arguments 500 @return parsed information from FITS HDUs or None 503 self.log.info(
"Skipping declared bad file %s" % infile)
506 fileInfo, hduInfoList = self.parse.getInfo(infile)
507 except Exception
as e:
508 if not self.config.allowError:
510 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
512 if self.
isBadId(fileInfo, args.badId.idList):
513 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
515 if registry
is not None and self.register.check(registry, fileInfo):
516 if args.ignoreIngested:
518 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
519 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
520 if not self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun):
525 """Ingest all specified files and add them to the registry""" 528 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
529 with context
as registry:
530 for infile
in filenameList:
532 hduInfoList = self.
runFile(infile, registry, args)
533 except Exception
as exc:
534 self.log.warn(
"Failed to ingest file %s: %s", infile, exc)
536 if hduInfoList
is None:
538 for info
in hduInfoList:
539 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
540 self.register.addVisits(registry, dryrun=args.dryrun)
544 """Can I copy a file? Raise an exception is space constraints not met. 546 @param fromPath Path from which the file is being copied 547 @param toPath Path to which the file is being copied 549 req = os.stat(fromPath).st_size
550 st = os.statvfs(os.path.dirname(toPath))
551 avail = st.f_bavail * st.f_frsize
553 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))
def ingest(self, infile, outfile, mode="move", dryrun=False)
def translate_filter(self, md)
def createTable(self, conn, table=None)
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
def translate_date(self, md)
def __exit__(self, excType, excValue, traceback)
def getInfo(self, filename)
def getInfoFromMetadata(self, md, info=None)
def getDestination(self, butler, info, filename)
def runFile(self, infile, registry, args)
Examine and ingest a single file.
def isBadFile(self, filename, badFileList)
def __init__(self, registryName, createTableFunc, forceCreateTables, permissions)
def assertCanCopy(fromPath, toPath)
def check(self, conn, info, table=None)
def __init__(self, args, kwargs)
def addVisits(self, conn, dryrun=False, table=None)
def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3")
def __init__(self, args, kwargs)
def addRow(self, conn, info, dryrun=False, create=False, table=None)
def isBadId(self, info, badIdList)