1 from past.builtins
import basestring
6 from fnmatch
import fnmatch
8 from contextlib
import contextmanager
10 from lsst.pex.config import Config, Field, DictField, ListField, ConfigurableField
18 """Argument parser to support ingesting images into the image repository""" 21 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
22 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
23 help=
"Don't perform any action?")
24 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
25 help=
"Mode of delivering the files to their destination")
26 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
27 self.add_argument(
"--ignore-ingested", dest=
"ignoreIngested", action=
"store_true",
28 help=
"Don't register files that have already been registered")
29 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
30 self.add_argument(
"--badFile", nargs=
"*", default=[],
31 help=
"Names of bad files (no path; wildcards allowed)")
32 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
36 """Configuration for ParseTask""" 37 translation = DictField(keytype=str, itemtype=str, default={},
38 doc=
"Translation table for property --> header")
39 translators = DictField(keytype=str, itemtype=str, default={},
40 doc=
"Properties and name of translator method")
41 defaults = DictField(keytype=str, itemtype=str, default={},
42 doc=
"Default values if header is not present")
43 hdu = Field(dtype=int, default=DEFAULT_HDU, doc=
"HDU to read for metadata")
44 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
48 """Task that will parse the filename and/or its contents to get the required information 49 for putting the file in the correct location and populating the registry.""" 50 ConfigClass = ParseConfig
53 """Get information about the image from the filename and its contents 55 Here, we open the image and parse the header, but one could also look at the filename itself 56 and derive information from that, or set values from the configuration. 58 @param filename Name of file to inspect 59 @return File properties; list of file properties for each extension 61 md = readMetadata(filename, self.config.hdu)
63 if len(self.config.extnames) == 0:
65 return phuInfo, [phuInfo]
67 extnames = set(self.config.extnames)
70 while len(extnames) > 0:
73 md = readMetadata(filename, extnum)
75 self.log.warn(
"Error reading %s extensions %s" % (filename, extnames))
81 hduInfo[
"hdu"] = extnum
82 infoList.append(hduInfo)
84 return phuInfo, infoList
88 """ Get the name of an extension. 89 @param md: PropertySet like one obtained from lsst.afw.fits.readMetadata) 90 @return Name of the extension if it exists. None otherwise. 94 ext = md.get(
"EXTNAME")
100 """Attempt to pull the desired information out of the header 102 This is done through two mechanisms: 103 * translation: a property is set directly from the relevant header keyword 104 * translator: a property is set with the result of calling a method 106 The translator methods receive the header metadata and should return the 107 appropriate value, or None if the value cannot be determined. 109 @param md FITS header 110 @param info File properties, to be supplemented 115 for p, h
in self.config.translation.items():
118 if isinstance(value, basestring):
119 value = value.strip()
121 elif p
in self.config.defaults:
122 info[p] = self.config.defaults[p]
124 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
125 for p, t
in self.config.translators.items():
126 func = getattr(self, t)
129 except Exception
as e:
130 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
132 if value
is not None:
137 """Convert a full DATE-OBS to a mere date 139 Besides being an example of a translator, this is also generally useful. 140 It will only be used if listed as a translator in the configuration. 142 date = md.get(
"DATE-OBS").strip()
149 """Translate a full filter description into a mere filter name 151 Besides being an example of a translator, this is also generally useful. 152 It will only be used if listed as a translator in the configuration. 154 filterName = md.get(
"FILTER").strip()
155 filterName = filterName.strip()
156 c = filterName.find(
" ")
158 filterName = filterName[:c]
162 """Get destination for the file 164 @param butler Data butler 165 @param info File properties, used as dataId for the butler 166 @param filename Input filename 167 @return Destination filename 169 raw = butler.get(
"raw_filename", info)[0]
178 """Configuration for the RegisterTask""" 179 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
180 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
181 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
182 default={
'object':
'text',
191 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
192 default=[
"visit",
"ccd"])
193 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
194 doc=
"List of columns for raw_visit table")
195 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
196 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry; 0o664 = rw-rw-r--")
200 """Context manager to provide a registry 202 An existing registry is copied, so that it may continue 203 to be used while we add to this new registry. Finally, 204 the new registry is moved into the right place. 207 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
208 """Construct a context manager 210 @param registryName: Name of registry file 211 @param createTableFunc: Function to create tables 212 @param forceCreateTables: Force the (re-)creation of tables? 213 @param permissions: Permissions to set on database file 218 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
223 if os.path.exists(registryName):
225 os.chmod(self.
updateName, os.stat(registryName).st_mode)
226 shutil.copyfile(registryName, self.
updateName)
230 if not haveTable
or forceCreateTables:
231 createTableFunc(self.
conn)
235 """Provide the 'as' value""" 252 """A context manager that doesn't provide any context 254 Useful for dry runs where we don't want to actually do anything real. 259 class RegisterTask(Task):
260 """Task that will generate the registry for the Mapper""" 261 ConfigClass = RegisterConfig
263 typemap = {
'text': str,
'int': int,
'double': float}
265 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
266 """Open the registry and return the connection handle. 268 @param directory Directory in which the registry file will be placed 269 @param create Clobber any existing registry and create a new one? 270 @param dryrun Don't do anything permanent? 271 @param name Filename of the registry 272 @return Database connection 277 registryName = os.path.join(directory, name)
282 """Create the registry tables 284 One table (typically 'raw') contains information on all files, and the 285 other (typically 'raw_visit') contains information on all visits. 287 @param conn Database connection 288 @param table Name of table to create in database 291 table = self.config.table
292 cmd =
"create table %s (id integer primary key autoincrement, " % table
293 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
294 if len(self.config.unique) > 0:
295 cmd +=
", unique(" +
",".join(self.config.unique) +
")" 297 conn.cursor().execute(cmd)
299 cmd =
"create table %s_visit (" % table
300 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
301 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")" 303 conn.cursor().execute(cmd)
307 def check(self, conn, info, table=None):
308 """Check for the presence of a row already 310 Not sure this is required, given the 'ignore' configuration option. 313 table = self.config.table
314 if self.config.ignore
or len(self.config.unique) == 0:
316 cursor = conn.cursor()
317 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
318 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
319 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
321 cursor.execute(sql, values)
322 if cursor.fetchone()[0] > 0:
326 def addRow(self, conn, info, dryrun=False, create=False, table=None):
327 """Add a row to the file table (typically 'raw'). 329 @param conn Database connection 330 @param info File properties to add to database 331 @param table Name of table in database 334 table = self.config.table
335 sql =
"INSERT INTO %s (%s) SELECT " % (table,
",".join(self.config.columns))
336 sql +=
",".join([self.
placeHolder] * len(self.config.columns))
337 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
339 if self.config.ignore:
340 sql +=
" WHERE NOT EXISTS (SELECT 1 FROM %s WHERE " % table
341 sql +=
" AND ".join([
"%s=%s" % (col, self.
placeHolder)
for col
in self.config.unique])
343 values += [info[col]
for col
in self.config.unique]
346 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
348 conn.cursor().execute(sql, values)
351 """Generate the visits table (typically 'raw_visits') from the 352 file table (typically 'raw'). 354 @param conn Database connection 355 @param table Name of table in database 358 table = self.config.table
359 sql =
"INSERT INTO %s_visit SELECT DISTINCT " % table
360 sql +=
",".join(self.config.visit)
361 sql +=
" FROM %s AS vv1" % table
362 sql +=
" WHERE NOT EXISTS " 363 sql +=
"(SELECT vv2.visit FROM %s_visit AS vv2 WHERE vv1.visit = vv2.visit)" % (table,)
365 print(
"Would execute: %s" % sql)
367 conn.cursor().execute(sql)
371 """Configuration for IngestTask""" 372 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
373 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
374 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
375 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
379 """Task that will ingest images into the data repository""" 380 ConfigClass = IngestConfig
381 ArgumentParser = IngestArgumentParser
382 _DefaultName =
"ingest" 385 super(IngestTask, self).
__init__(*args, **kwargs)
386 self.makeSubtask(
"parse")
387 self.makeSubtask(
"register")
391 """Parse the command-line arguments and run the Task""" 394 args = parser.parse_args(config)
395 task = cls(config=args.config)
398 def ingest(self, infile, outfile, mode="move", dryrun=False):
399 """Ingest a file into the image repository. 401 @param infile Name of input file 402 @param outfile Name of output file (file in repository) 403 @param mode Mode of ingest (copy/link/move/skip) 404 @param dryrun Only report what would occur? 405 @param Success boolean 410 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
413 outdir = os.path.dirname(outfile)
414 if not os.path.isdir(outdir):
419 if not os.path.isdir(outdir):
421 if os.path.lexists(outfile):
422 if self.config.clobber:
425 raise RuntimeError(
"File %s already exists; consider --config clobber=True" % outfile)
429 shutil.copyfile(infile, outfile)
431 os.symlink(os.path.abspath(infile), outfile)
434 os.rename(infile, outfile)
436 raise AssertionError(
"Unknown mode: %s" % mode)
437 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
438 except Exception
as e:
439 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
440 if not self.config.allowError:
446 """Return whether the file qualifies as bad 448 We match against the list of bad file patterns. 450 filename = os.path.basename(filename)
453 for badFile
in badFileList:
454 if fnmatch(filename, badFile):
459 """Return whether the file information qualifies as bad 461 We match against the list of bad data identifiers. 465 for badId
in badIdList:
466 if all(info[key] == value
for key, value
in badId.items()):
471 """!Expand a set of filenames and globs, returning a list of filenames 473 \param fileNameList A list of files and glob patterns 475 N.b. globs obey Posix semantics, so a pattern that matches nothing is returned unchanged 478 for globPattern
in fileNameList:
479 files = glob(globPattern)
482 self.log.warn(
"%s doesn't match any file" % globPattern)
485 filenameList.extend(files)
490 """!Examine and ingest a single file 492 @param infile: File to process 493 @param args: Parsed command-line arguments 494 @return parsed information from FITS HDUs or None 497 self.log.info(
"Skipping declared bad file %s" % infile)
500 fileInfo, hduInfoList = self.parse.getInfo(infile)
501 except Exception
as e:
502 if not self.config.allowError:
504 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
506 if self.
isBadId(fileInfo, args.badId.idList):
507 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
509 if registry
is not None and self.register.check(registry, fileInfo):
510 if args.ignoreIngested:
512 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
513 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
514 if not self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun):
519 """Ingest all specified files and add them to the registry""" 522 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
523 with context
as registry:
524 for infile
in filenameList:
526 hduInfoList = self.
runFile(infile, registry, args)
527 except Exception
as exc:
528 self.log.warn(
"Failed to ingest file %s: %s", infile, exc)
530 if hduInfoList
is None:
532 for info
in hduInfoList:
533 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
534 self.register.addVisits(registry, dryrun=args.dryrun)
538 """Can I copy a file? Raise an exception is space constraints not met. 540 @param fromPath Path from which the file is being copied 541 @param toPath Path to which the file is being copied 543 req = os.stat(fromPath).st_size
544 st = os.statvfs(os.path.dirname(toPath))
545 avail = st.f_bavail * st.f_frsize
547 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))
def ingest(self, infile, outfile, mode="move", dryrun=False)
def translate_filter(self, md)
def createTable(self, conn, table=None)
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
def translate_date(self, md)
def __exit__(self, excType, excValue, traceback)
def getInfo(self, filename)
def getInfoFromMetadata(self, md, info=None)
def getDestination(self, butler, info, filename)
def runFile(self, infile, registry, args)
Examine and ingest a single file.
def isBadFile(self, filename, badFileList)
def __init__(self, registryName, createTableFunc, forceCreateTables, permissions)
def assertCanCopy(fromPath, toPath)
def check(self, conn, info, table=None)
def __init__(self, args, kwargs)
def addVisits(self, conn, dryrun=False, table=None)
def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3")
def __init__(self, args, kwargs)
def addRow(self, conn, info, dryrun=False, create=False, table=None)
def isBadId(self, info, badIdList)