1 from __future__
import absolute_import, division, print_function
2 from past.builtins
import basestring
3 from builtins
import object
11 import sqlite
as sqlite3
12 from fnmatch
import fnmatch
14 from contextlib
import contextmanager
16 from lsst.pex.config import Config, Field, DictField, ListField, ConfigurableField
24 """Argument parser to support ingesting images into the image repository""" 27 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
28 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
29 help=
"Don't perform any action?")
30 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
31 help=
"Mode of delivering the files to their destination")
32 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
33 self.add_argument(
"--ignore-ingested", dest=
"ignoreIngested", action=
"store_true",
34 help=
"Don't register files that have already been registered")
35 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
36 self.add_argument(
"--badFile", nargs=
"*", default=[],
37 help=
"Names of bad files (no path; wildcards allowed)")
38 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
42 """Configuration for ParseTask""" 43 translation = DictField(keytype=str, itemtype=str, default={},
44 doc=
"Translation table for property --> header")
45 translators = DictField(keytype=str, itemtype=str, default={},
46 doc=
"Properties and name of translator method")
47 defaults = DictField(keytype=str, itemtype=str, default={},
48 doc=
"Default values if header is not present")
49 hdu = Field(dtype=int, default=DEFAULT_HDU, doc=
"HDU to read for metadata")
50 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
54 """Task that will parse the filename and/or its contents to get the required information 55 for putting the file in the correct location and populating the registry.""" 56 ConfigClass = ParseConfig
59 """Get information about the image from the filename and its contents 61 Here, we open the image and parse the header, but one could also look at the filename itself 62 and derive information from that, or set values from the configuration. 64 @param filename Name of file to inspect 65 @return File properties; list of file properties for each extension 67 md = afwImage.readMetadata(filename, self.config.hdu)
69 if len(self.config.extnames) == 0:
71 return phuInfo, [phuInfo]
73 extnames = set(self.config.extnames)
76 while len(extnames) > 0:
79 md = afwImage.readMetadata(filename, extnum)
81 self.log.warn(
"Error reading %s extensions %s" % (filename, extnames))
87 hduInfo[
"hdu"] = extnum
88 infoList.append(hduInfo)
90 return phuInfo, infoList
94 """ Get the name of an extension. 95 @param md: PropertySet like one obtained from afwImage.readMetadata) 96 @return Name of the extension if it exists. None otherwise. 100 ext = md.get(
"EXTNAME")
106 """Attempt to pull the desired information out of the header 108 This is done through two mechanisms: 109 * translation: a property is set directly from the relevant header keyword 110 * translator: a property is set with the result of calling a method 112 The translator methods receive the header metadata and should return the 113 appropriate value, or None if the value cannot be determined. 115 @param md FITS header 116 @param info File properties, to be supplemented 119 for p, h
in self.config.translation.items():
122 if isinstance(value, basestring):
123 value = value.strip()
125 elif p
in self.config.defaults:
126 info[p] = self.config.defaults[p]
128 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
129 for p, t
in self.config.translators.items():
130 func = getattr(self, t)
133 except Exception
as e:
134 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
136 if value
is not None:
141 """Convert a full DATE-OBS to a mere date 143 Besides being an example of a translator, this is also generally useful. 144 It will only be used if listed as a translator in the configuration. 146 date = md.get(
"DATE-OBS").strip()
153 """Translate a full filter description into a mere filter name 155 Besides being an example of a translator, this is also generally useful. 156 It will only be used if listed as a translator in the configuration. 158 filterName = md.get(
"FILTER").strip()
159 filterName = filterName.strip()
160 c = filterName.find(
" ")
162 filterName = filterName[:c]
166 """Get destination for the file 168 @param butler Data butler 169 @param info File properties, used as dataId for the butler 170 @param filename Input filename 171 @return Destination filename 173 raw = butler.get(
"raw_filename", info)[0]
182 """Configuration for the RegisterTask""" 183 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
184 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
185 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
186 default={
'object':
'text',
195 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
196 default=[
"visit",
"ccd"])
197 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
198 doc=
"List of columns for raw_visit table")
199 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
200 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry")
204 """Context manager to provide a registry 206 An existing registry is copied, so that it may continue 207 to be used while we add to this new registry. Finally, 208 the new registry is moved into the right place. 211 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
212 """Construct a context manager 214 @param registryName: Name of registry file 215 @param createTableFunc: Function to create tables 216 @param forceCreateTables: Force the (re-)creation of tables? 217 @param permissions: Permissions to set on database file 222 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
227 if os.path.exists(registryName):
229 os.chmod(self.
updateName, os.stat(registryName).st_mode)
230 shutil.copyfile(registryName, self.
updateName)
234 if not haveTable
or forceCreateTables:
235 createTableFunc(self.
conn)
239 """Provide the 'as' value""" 256 """A context manager that doesn't provide any context 258 Useful for dry runs where we don't want to actually do anything real. 263 class RegisterTask(Task):
264 """Task that will generate the registry for the Mapper""" 265 ConfigClass = RegisterConfig
267 typemap = {
'text': str,
'int': int,
'double': float}
269 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
270 """Open the registry and return the connection handle. 272 @param directory Directory in which the registry file will be placed 273 @param create Clobber any existing registry and create a new one? 274 @param dryrun Don't do anything permanent? 275 @param name Filename of the registry 276 @return Database connection 281 registryName = os.path.join(directory, name)
286 """Create the registry tables 288 One table (typically 'raw') contains information on all files, and the 289 other (typically 'raw_visit') contains information on all visits. 291 @param conn Database connection 292 @param table Name of table to create in database 295 table = self.config.table
296 cmd =
"create table %s (id integer primary key autoincrement, " % table
297 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
298 if len(self.config.unique) > 0:
299 cmd +=
", unique(" +
",".join(self.config.unique) +
")" 301 conn.cursor().execute(cmd)
303 cmd =
"create table %s_visit (" % table
304 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
305 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")" 307 conn.cursor().execute(cmd)
311 def check(self, conn, info, table=None):
312 """Check for the presence of a row already 314 Not sure this is required, given the 'ignore' configuration option. 317 table = self.config.table
318 if self.config.ignore
or len(self.config.unique) == 0:
320 cursor = conn.cursor()
321 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
322 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
323 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
325 cursor.execute(sql, values)
326 if cursor.fetchone()[0] > 0:
330 def addRow(self, conn, info, dryrun=False, create=False, table=None):
331 """Add a row to the file table (typically 'raw'). 333 @param conn Database connection 334 @param info File properties to add to database 335 @param table Name of table in database 338 table = self.config.table
339 sql =
"INSERT INTO %s (%s) SELECT " % (table,
",".join(self.config.columns))
340 sql +=
",".join([self.
placeHolder] * len(self.config.columns))
341 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
343 if self.config.ignore:
344 sql +=
" WHERE NOT EXISTS (SELECT 1 FROM %s WHERE " % table
345 sql +=
" AND ".join([
"%s=%s" % (col, self.
placeHolder)
for col
in self.config.unique])
347 values += [info[col]
for col
in self.config.unique]
350 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
352 conn.cursor().execute(sql, values)
355 """Generate the visits table (typically 'raw_visits') from the 356 file table (typically 'raw'). 358 @param conn Database connection 359 @param table Name of table in database 362 table = self.config.table
363 sql =
"INSERT INTO %s_visit SELECT DISTINCT " % table
364 sql +=
",".join(self.config.visit)
365 sql +=
" FROM %s AS vv1" % table
366 sql +=
" WHERE NOT EXISTS " 367 sql +=
"(SELECT vv2.visit FROM %s_visit AS vv2 WHERE vv1.visit = vv2.visit)" % (table,)
369 print(
"Would execute: %s" % sql)
371 conn.cursor().execute(sql)
375 """Configuration for IngestTask""" 376 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
377 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
378 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
379 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
383 """Task that will ingest images into the data repository""" 384 ConfigClass = IngestConfig
385 ArgumentParser = IngestArgumentParser
386 _DefaultName =
"ingest" 389 super(IngestTask, self).
__init__(*args, **kwargs)
390 self.makeSubtask(
"parse")
391 self.makeSubtask(
"register")
395 """Parse the command-line arguments and run the Task""" 398 args = parser.parse_args(config)
399 task = cls(config=args.config)
402 def ingest(self, infile, outfile, mode="move", dryrun=False):
403 """Ingest a file into the image repository. 405 @param infile Name of input file 406 @param outfile Name of output file (file in repository) 407 @param mode Mode of ingest (copy/link/move/skip) 408 @param dryrun Only report what would occur? 409 @param Success boolean 414 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
417 outdir = os.path.dirname(outfile)
418 if not os.path.isdir(outdir):
423 if not os.path.isdir(outdir):
425 if os.path.lexists(outfile):
426 if self.config.clobber:
429 raise RuntimeError(
"File %s already exists; consider --config clobber=True" % outfile)
433 shutil.copyfile(infile, outfile)
435 os.symlink(os.path.abspath(infile), outfile)
438 os.rename(infile, outfile)
440 raise AssertionError(
"Unknown mode: %s" % mode)
441 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
442 except Exception
as e:
443 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
444 if not self.config.allowError:
450 """Return whether the file qualifies as bad 452 We match against the list of bad file patterns. 454 filename = os.path.basename(filename)
457 for badFile
in badFileList:
458 if fnmatch(filename, badFile):
463 """Return whether the file information qualifies as bad 465 We match against the list of bad data identifiers. 469 for badId
in badIdList:
470 if all(info[key] == value
for key, value
in badId.items()):
475 """!Expand a set of filenames and globs, returning a list of filenames 477 \param fileNameList A list of files and glob patterns 479 N.b. globs obey Posix semantics, so a pattern that matches nothing is returned unchanged 482 for globPattern
in fileNameList:
483 files = glob(globPattern)
486 self.log.warn(
"%s doesn't match any file" % globPattern)
489 filenameList.extend(files)
494 """Ingest all specified files and add them to the registry""" 497 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
498 with context
as registry:
499 for infile
in filenameList:
502 self.log.info(
"Skipping declared bad file %s" % infile)
505 fileInfo, hduInfoList = self.parse.getInfo(infile)
506 except Exception
as e:
507 if not self.config.allowError:
509 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
511 if self.
isBadId(fileInfo, args.badId.idList):
512 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
514 if self.register.check(registry, fileInfo):
515 if args.ignoreIngested:
518 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
519 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
520 ingested = self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun)
523 for info
in hduInfoList:
524 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
525 except Exception
as exc:
526 self.log.warn(
"Failed to ingest file %s: %s", infile, exc)
527 self.register.addVisits(registry, dryrun=args.dryrun)
531 """Can I copy a file? Raise an exception is space constraints not met. 533 @param fromPath Path from which the file is being copied 534 @param toPath Path to which the file is being copied 536 req = os.stat(fromPath).st_size
537 st = os.statvfs(os.path.dirname(toPath))
538 avail = st.f_bavail * st.f_frsize
540 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))
def ingest(self, infile, outfile, mode="move", dryrun=False)
def translate_filter(self, md)
def createTable(self, conn, table=None)
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
def translate_date(self, md)
def __exit__(self, excType, excValue, traceback)
def getInfo(self, filename)
def getDestination(self, butler, info, filename)
def isBadFile(self, filename, badFileList)
def __init__(self, registryName, createTableFunc, forceCreateTables, permissions)
def getInfoFromMetadata(self, md, info={})
def assertCanCopy(fromPath, toPath)
def check(self, conn, info, table=None)
def __init__(self, args, kwargs)
def addVisits(self, conn, dryrun=False, table=None)
def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3")
def __init__(self, args, kwargs)
def addRow(self, conn, info, dryrun=False, create=False, table=None)
def isBadId(self, info, badIdList)