1 from __future__
import absolute_import, division, print_function
2 from past.builtins
import basestring
3 from builtins
import object
8 from fnmatch
import fnmatch
10 from contextlib
import contextmanager
12 from lsst.pex.config import Config, Field, DictField, ListField, ConfigurableField
20 """Argument parser to support ingesting images into the image repository""" 23 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
24 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
25 help=
"Don't perform any action?")
26 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
27 help=
"Mode of delivering the files to their destination")
28 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
29 self.add_argument(
"--ignore-ingested", dest=
"ignoreIngested", action=
"store_true",
30 help=
"Don't register files that have already been registered")
31 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
32 self.add_argument(
"--badFile", nargs=
"*", default=[],
33 help=
"Names of bad files (no path; wildcards allowed)")
34 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
38 """Configuration for ParseTask""" 39 translation = DictField(keytype=str, itemtype=str, default={},
40 doc=
"Translation table for property --> header")
41 translators = DictField(keytype=str, itemtype=str, default={},
42 doc=
"Properties and name of translator method")
43 defaults = DictField(keytype=str, itemtype=str, default={},
44 doc=
"Default values if header is not present")
45 hdu = Field(dtype=int, default=DEFAULT_HDU, doc=
"HDU to read for metadata")
46 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
50 """Task that will parse the filename and/or its contents to get the required information 51 for putting the file in the correct location and populating the registry.""" 52 ConfigClass = ParseConfig
55 """Get information about the image from the filename and its contents 57 Here, we open the image and parse the header, but one could also look at the filename itself 58 and derive information from that, or set values from the configuration. 60 @param filename Name of file to inspect 61 @return File properties; list of file properties for each extension 63 md = readMetadata(filename, self.config.hdu)
65 if len(self.config.extnames) == 0:
67 return phuInfo, [phuInfo]
69 extnames = set(self.config.extnames)
72 while len(extnames) > 0:
75 md = readMetadata(filename, extnum)
77 self.log.warn(
"Error reading %s extensions %s" % (filename, extnames))
83 hduInfo[
"hdu"] = extnum
84 infoList.append(hduInfo)
86 return phuInfo, infoList
90 """ Get the name of an extension. 91 @param md: PropertySet like one obtained from lsst.afw.fits.readMetadata) 92 @return Name of the extension if it exists. None otherwise. 96 ext = md.get(
"EXTNAME")
102 """Attempt to pull the desired information out of the header 104 This is done through two mechanisms: 105 * translation: a property is set directly from the relevant header keyword 106 * translator: a property is set with the result of calling a method 108 The translator methods receive the header metadata and should return the 109 appropriate value, or None if the value cannot be determined. 111 @param md FITS header 112 @param info File properties, to be supplemented 117 for p, h
in self.config.translation.items():
120 if isinstance(value, basestring):
121 value = value.strip()
123 elif p
in self.config.defaults:
124 info[p] = self.config.defaults[p]
126 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
127 for p, t
in self.config.translators.items():
128 func = getattr(self, t)
131 except Exception
as e:
132 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
134 if value
is not None:
139 """Convert a full DATE-OBS to a mere date 141 Besides being an example of a translator, this is also generally useful. 142 It will only be used if listed as a translator in the configuration. 144 date = md.get(
"DATE-OBS").strip()
151 """Translate a full filter description into a mere filter name 153 Besides being an example of a translator, this is also generally useful. 154 It will only be used if listed as a translator in the configuration. 156 filterName = md.get(
"FILTER").strip()
157 filterName = filterName.strip()
158 c = filterName.find(
" ")
160 filterName = filterName[:c]
164 """Get destination for the file 166 @param butler Data butler 167 @param info File properties, used as dataId for the butler 168 @param filename Input filename 169 @return Destination filename 171 raw = butler.get(
"raw_filename", info)[0]
180 """Configuration for the RegisterTask""" 181 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
182 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
183 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
184 default={
'object':
'text',
193 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
194 default=[
"visit",
"ccd"])
195 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
196 doc=
"List of columns for raw_visit table")
197 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
198 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry; 0o664 = rw-rw-r--")
202 """Context manager to provide a registry 204 An existing registry is copied, so that it may continue 205 to be used while we add to this new registry. Finally, 206 the new registry is moved into the right place. 209 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
210 """Construct a context manager 212 @param registryName: Name of registry file 213 @param createTableFunc: Function to create tables 214 @param forceCreateTables: Force the (re-)creation of tables? 215 @param permissions: Permissions to set on database file 220 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
225 if os.path.exists(registryName):
227 os.chmod(self.
updateName, os.stat(registryName).st_mode)
228 shutil.copyfile(registryName, self.
updateName)
232 if not haveTable
or forceCreateTables:
233 createTableFunc(self.
conn)
237 """Provide the 'as' value""" 254 """A context manager that doesn't provide any context 256 Useful for dry runs where we don't want to actually do anything real. 261 class RegisterTask(Task):
262 """Task that will generate the registry for the Mapper""" 263 ConfigClass = RegisterConfig
265 typemap = {
'text': str,
'int': int,
'double': float}
267 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
268 """Open the registry and return the connection handle. 270 @param directory Directory in which the registry file will be placed 271 @param create Clobber any existing registry and create a new one? 272 @param dryrun Don't do anything permanent? 273 @param name Filename of the registry 274 @return Database connection 279 registryName = os.path.join(directory, name)
284 """Create the registry tables 286 One table (typically 'raw') contains information on all files, and the 287 other (typically 'raw_visit') contains information on all visits. 289 @param conn Database connection 290 @param table Name of table to create in database 293 table = self.config.table
294 cmd =
"create table %s (id integer primary key autoincrement, " % table
295 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
296 if len(self.config.unique) > 0:
297 cmd +=
", unique(" +
",".join(self.config.unique) +
")" 299 conn.cursor().execute(cmd)
301 cmd =
"create table %s_visit (" % table
302 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
303 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")" 305 conn.cursor().execute(cmd)
309 def check(self, conn, info, table=None):
310 """Check for the presence of a row already 312 Not sure this is required, given the 'ignore' configuration option. 315 table = self.config.table
316 if self.config.ignore
or len(self.config.unique) == 0:
318 cursor = conn.cursor()
319 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
320 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
321 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
323 cursor.execute(sql, values)
324 if cursor.fetchone()[0] > 0:
328 def addRow(self, conn, info, dryrun=False, create=False, table=None):
329 """Add a row to the file table (typically 'raw'). 331 @param conn Database connection 332 @param info File properties to add to database 333 @param table Name of table in database 336 table = self.config.table
337 sql =
"INSERT INTO %s (%s) SELECT " % (table,
",".join(self.config.columns))
338 sql +=
",".join([self.
placeHolder] * len(self.config.columns))
339 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
341 if self.config.ignore:
342 sql +=
" WHERE NOT EXISTS (SELECT 1 FROM %s WHERE " % table
343 sql +=
" AND ".join([
"%s=%s" % (col, self.
placeHolder)
for col
in self.config.unique])
345 values += [info[col]
for col
in self.config.unique]
348 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
350 conn.cursor().execute(sql, values)
353 """Generate the visits table (typically 'raw_visits') from the 354 file table (typically 'raw'). 356 @param conn Database connection 357 @param table Name of table in database 360 table = self.config.table
361 sql =
"INSERT INTO %s_visit SELECT DISTINCT " % table
362 sql +=
",".join(self.config.visit)
363 sql +=
" FROM %s AS vv1" % table
364 sql +=
" WHERE NOT EXISTS " 365 sql +=
"(SELECT vv2.visit FROM %s_visit AS vv2 WHERE vv1.visit = vv2.visit)" % (table,)
367 print(
"Would execute: %s" % sql)
369 conn.cursor().execute(sql)
373 """Configuration for IngestTask""" 374 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
375 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
376 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
377 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
381 """Task that will ingest images into the data repository""" 382 ConfigClass = IngestConfig
383 ArgumentParser = IngestArgumentParser
384 _DefaultName =
"ingest" 387 super(IngestTask, self).
__init__(*args, **kwargs)
388 self.makeSubtask(
"parse")
389 self.makeSubtask(
"register")
393 """Parse the command-line arguments and run the Task""" 396 args = parser.parse_args(config)
397 task = cls(config=args.config)
400 def ingest(self, infile, outfile, mode="move", dryrun=False):
401 """Ingest a file into the image repository. 403 @param infile Name of input file 404 @param outfile Name of output file (file in repository) 405 @param mode Mode of ingest (copy/link/move/skip) 406 @param dryrun Only report what would occur? 407 @param Success boolean 412 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
415 outdir = os.path.dirname(outfile)
416 if not os.path.isdir(outdir):
421 if not os.path.isdir(outdir):
423 if os.path.lexists(outfile):
424 if self.config.clobber:
427 raise RuntimeError(
"File %s already exists; consider --config clobber=True" % outfile)
431 shutil.copyfile(infile, outfile)
433 os.symlink(os.path.abspath(infile), outfile)
436 os.rename(infile, outfile)
438 raise AssertionError(
"Unknown mode: %s" % mode)
439 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
440 except Exception
as e:
441 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
442 if not self.config.allowError:
448 """Return whether the file qualifies as bad 450 We match against the list of bad file patterns. 452 filename = os.path.basename(filename)
455 for badFile
in badFileList:
456 if fnmatch(filename, badFile):
461 """Return whether the file information qualifies as bad 463 We match against the list of bad data identifiers. 467 for badId
in badIdList:
468 if all(info[key] == value
for key, value
in badId.items()):
473 """!Expand a set of filenames and globs, returning a list of filenames 475 \param fileNameList A list of files and glob patterns 477 N.b. globs obey Posix semantics, so a pattern that matches nothing is returned unchanged 480 for globPattern
in fileNameList:
481 files = glob(globPattern)
484 self.log.warn(
"%s doesn't match any file" % globPattern)
487 filenameList.extend(files)
492 """!Examine and ingest a single file 494 @param infile: File to process 495 @param args: Parsed command-line arguments 496 @return parsed information from FITS HDUs or None 499 self.log.info(
"Skipping declared bad file %s" % infile)
502 fileInfo, hduInfoList = self.parse.getInfo(infile)
503 except Exception
as e:
504 if not self.config.allowError:
506 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
508 if self.
isBadId(fileInfo, args.badId.idList):
509 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
511 if registry
is not None and self.register.check(registry, fileInfo):
512 if args.ignoreIngested:
514 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
515 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
516 if not self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun):
521 """Ingest all specified files and add them to the registry""" 524 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
525 with context
as registry:
526 for infile
in filenameList:
528 hduInfoList = self.
runFile(infile, registry, args)
529 except Exception
as exc:
530 self.log.warn(
"Failed to ingest file %s: %s", infile, exc)
532 if hduInfoList
is None:
534 for info
in hduInfoList:
535 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
536 self.register.addVisits(registry, dryrun=args.dryrun)
540 """Can I copy a file? Raise an exception is space constraints not met. 542 @param fromPath Path from which the file is being copied 543 @param toPath Path to which the file is being copied 545 req = os.stat(fromPath).st_size
546 st = os.statvfs(os.path.dirname(toPath))
547 avail = st.f_bavail * st.f_frsize
549 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))
def ingest(self, infile, outfile, mode="move", dryrun=False)
def translate_filter(self, md)
def createTable(self, conn, table=None)
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
def translate_date(self, md)
def __exit__(self, excType, excValue, traceback)
def getInfo(self, filename)
def getInfoFromMetadata(self, md, info=None)
def getDestination(self, butler, info, filename)
def runFile(self, infile, registry, args)
Examine and ingest a single file.
def isBadFile(self, filename, badFileList)
def __init__(self, registryName, createTableFunc, forceCreateTables, permissions)
def assertCanCopy(fromPath, toPath)
def check(self, conn, info, table=None)
def __init__(self, args, kwargs)
def addVisits(self, conn, dryrun=False, table=None)
def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3")
def __init__(self, args, kwargs)
def addRow(self, conn, info, dryrun=False, create=False, table=None)
def isBadId(self, info, badIdList)