27 from fnmatch
import fnmatch
29 from contextlib
import contextmanager
31 from lsst.pex.config
import Config, Field, DictField, ListField, ConfigurableField
39 """Argument parser to support ingesting images into the image repository""" 42 super(IngestArgumentParser, self).
__init__(*args, **kwargs)
43 self.add_argument(
"-n",
"--dry-run", dest=
"dryrun", action=
"store_true", default=
False,
44 help=
"Don't perform any action?")
45 self.add_argument(
"--mode", choices=[
"move",
"copy",
"link",
"skip"], default=
"link",
46 help=
"Mode of delivering the files to their destination")
47 self.add_argument(
"--create", action=
"store_true", help=
"Create new registry (clobber old)?")
48 self.add_argument(
"--ignore-ingested", dest=
"ignoreIngested", action=
"store_true",
49 help=
"Don't register files that have already been registered")
50 self.add_id_argument(
"--badId",
"raw",
"Data identifier for bad data", doMakeDataRefList=
False)
51 self.add_argument(
"--badFile", nargs=
"*", default=[],
52 help=
"Names of bad files (no path; wildcards allowed)")
53 self.add_argument(
"files", nargs=
"+", help=
"Names of file")
57 """Configuration for ParseTask""" 58 translation = DictField(keytype=str, itemtype=str, default={},
59 doc=
"Translation table for property --> header")
60 translators = DictField(keytype=str, itemtype=str, default={},
61 doc=
"Properties and name of translator method")
62 defaults = DictField(keytype=str, itemtype=str, default={},
63 doc=
"Default values if header is not present")
64 hdu = Field(dtype=int, default=DEFAULT_HDU, doc=
"HDU to read for metadata")
65 extnames = ListField(dtype=str, default=[], doc=
"Extension names to search for")
69 """Task that will parse the filename and/or its contents to get the required information 70 for putting the file in the correct location and populating the registry.""" 71 ConfigClass = ParseConfig
74 """Get information about the image from the filename and its contents 76 Here, we open the image and parse the header, but one could also look at the filename itself 77 and derive information from that, or set values from the configuration. 79 @param filename Name of file to inspect 80 @return File properties; list of file properties for each extension 82 md = readMetadata(filename, self.config.hdu)
84 if len(self.config.extnames) == 0:
86 return phuInfo, [phuInfo]
88 extnames = set(self.config.extnames)
91 while len(extnames) > 0:
94 md = readMetadata(filename, extnum)
95 except Exception
as e:
96 self.log.warn(
"Error reading %s extensions %s: %s" % (filename, extnames, e))
102 hduInfo[
"hdu"] = extnum
103 infoList.append(hduInfo)
104 extnames.discard(ext)
105 return phuInfo, infoList
109 """ Get the name of an extension. 110 @param md: PropertySet like one obtained from lsst.afw.fits.readMetadata) 111 @return Name of the extension if it exists. None otherwise. 115 ext = md.getScalar(
"EXTNAME")
121 """Attempt to pull the desired information out of the header 123 This is done through two mechanisms: 124 * translation: a property is set directly from the relevant header keyword 125 * translator: a property is set with the result of calling a method 127 The translator methods receive the header metadata and should return the 128 appropriate value, or None if the value cannot be determined. 130 @param md FITS header 131 @param info File properties, to be supplemented 136 for p, h
in self.config.translation.items():
137 value = md.get(h,
None)
138 if value
is not None:
139 if isinstance(value, str):
140 value = value.strip()
142 elif p
in self.config.defaults:
143 info[p] = self.config.defaults[p]
145 self.log.warn(
"Unable to find value for %s (derived from %s)" % (p, h))
146 for p, t
in self.config.translators.items():
147 func = getattr(self, t)
150 except Exception
as e:
151 self.log.warn(
"%s failed to translate %s: %s", t, p, e)
153 if value
is not None:
158 """Convert a full DATE-OBS to a mere date 160 Besides being an example of a translator, this is also generally useful. 161 It will only be used if listed as a translator in the configuration. 163 date = md.getScalar(
"DATE-OBS").strip()
170 """Translate a full filter description into a mere filter name 172 Besides being an example of a translator, this is also generally useful. 173 It will only be used if listed as a translator in the configuration. 175 filterName = md.getScalar(
"FILTER").strip()
176 filterName = filterName.strip()
177 c = filterName.find(
" ")
179 filterName = filterName[:c]
183 """Get destination for the file 185 @param butler Data butler 186 @param info File properties, used as dataId for the butler 187 @param filename Input filename 188 @return Destination filename 190 raw = butler.get(
"raw_filename", info)[0]
199 """Configuration for the RegisterTask""" 200 table = Field(dtype=str, default=
"raw", doc=
"Name of table")
201 columns = DictField(keytype=str, itemtype=str, doc=
"List of columns for raw table, with their types",
202 itemCheck=
lambda x: x
in (
"text",
"int",
"double"),
203 default={
'object':
'text',
212 unique = ListField(dtype=str, doc=
"List of columns to be declared unique for the table",
213 default=[
"visit",
"ccd"])
214 visit = ListField(dtype=str, default=[
"visit",
"object",
"date",
"filter"],
215 doc=
"List of columns for raw_visit table")
216 ignore = Field(dtype=bool, default=
False, doc=
"Ignore duplicates in the table?")
217 permissions = Field(dtype=int, default=0o664, doc=
"Permissions mode for registry; 0o664 = rw-rw-r--")
221 """Context manager to provide a registry 223 An existing registry is copied, so that it may continue 224 to be used while we add to this new registry. Finally, 225 the new registry is moved into the right place. 228 def __init__(self, registryName, createTableFunc, forceCreateTables, permissions):
229 """Construct a context manager 231 @param registryName: Name of registry file 232 @param createTableFunc: Function to create tables 233 @param forceCreateTables: Force the (re-)creation of tables? 234 @param permissions: Permissions to set on database file 239 updateFile = tempfile.NamedTemporaryFile(prefix=registryName, dir=os.path.dirname(self.
registryName),
243 if os.path.exists(registryName):
245 os.chmod(self.
updateName, os.stat(registryName).st_mode)
246 shutil.copyfile(registryName, self.
updateName)
249 createTableFunc(self.
conn, forceCreateTables=forceCreateTables)
253 """Provide the 'as' value""" 270 """A context manager that doesn't provide any context 272 Useful for dry runs where we don't want to actually do anything real. 277 class RegisterTask(Task):
278 """Task that will generate the registry for the Mapper""" 279 ConfigClass = RegisterConfig
281 typemap = {
'text': str,
'int': int,
'double': float}
283 def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3"):
284 """Open the registry and return the connection handle. 286 @param directory Directory in which the registry file will be placed 287 @param create Clobber any existing registry and create a new one? 288 @param dryrun Don't do anything permanent? 289 @param name Filename of the registry 290 @return Database connection 295 registryName = os.path.join(directory, name)
300 """Create the registry tables 302 One table (typically 'raw') contains information on all files, and the 303 other (typically 'raw_visit') contains information on all visits. 305 @param conn Database connection 306 @param table Name of table to create in database 308 cursor = conn.cursor()
310 table = self.config.table
311 cmd =
"SELECT name FROM sqlite_master WHERE type='table' AND name='%s'" % table
313 if cursor.fetchone()
and not forceCreateTables:
314 self.log.info(
'Table "%s" exists. Skipping creation' % table)
317 cmd =
"drop table if exists %s" % table
319 cmd =
"drop table if exists %s_visit" % table
322 cmd =
"create table %s (id integer primary key autoincrement, " % table
323 cmd +=
",".join([(
"%s %s" % (col, colType))
for col, colType
in self.config.columns.items()])
324 if len(self.config.unique) > 0:
325 cmd +=
", unique(" +
",".join(self.config.unique) +
")" 329 cmd =
"create table %s_visit (" % table
330 cmd +=
",".join([(
"%s %s" % (col, self.config.columns[col]))
for col
in self.config.visit])
331 cmd +=
", unique(" +
",".join(set(self.config.visit).intersection(set(self.config.unique))) +
")" 337 def check(self, conn, info, table=None):
338 """Check for the presence of a row already 340 Not sure this is required, given the 'ignore' configuration option. 343 table = self.config.table
344 if self.config.ignore
or len(self.config.unique) == 0:
346 cursor = conn.cursor()
347 sql =
"SELECT COUNT(*) FROM %s WHERE " % table
348 sql +=
" AND ".join([
"%s = %s" % (col, self.
placeHolder)
for col
in self.config.unique])
349 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.unique]
351 cursor.execute(sql, values)
352 if cursor.fetchone()[0] > 0:
356 def addRow(self, conn, info, dryrun=False, create=False, table=None):
357 """Add a row to the file table (typically 'raw'). 359 @param conn Database connection 360 @param info File properties to add to database 361 @param table Name of table in database 364 table = self.config.table
366 if self.config.ignore:
367 ignoreClause =
" OR IGNORE" 368 sql =
"INSERT%s INTO %s (%s) VALUES (" % (ignoreClause, table,
",".join(self.config.columns))
369 sql +=
",".join([self.
placeHolder] * len(self.config.columns)) +
")" 370 values = [self.
typemap[tt](info[col])
for col, tt
in self.config.columns.items()]
373 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
375 conn.cursor().execute(sql, values)
377 sql =
"INSERT OR IGNORE INTO %s_visit VALUES (" % table
378 sql +=
",".join([self.
placeHolder] * len(self.config.visit)) +
")" 379 values = [self.
typemap[self.config.columns[col]](info[col])
for col
in self.config.visit]
382 print(
"Would execute: '%s' with %s" % (sql,
",".join([str(value)
for value
in values])))
384 conn.cursor().execute(sql, values)
388 """Configuration for IngestTask""" 389 parse = ConfigurableField(target=ParseTask, doc=
"File parsing")
390 register = ConfigurableField(target=RegisterTask, doc=
"Registry entry")
391 allowError = Field(dtype=bool, default=
False, doc=
"Allow error in ingestion?")
392 clobber = Field(dtype=bool, default=
False, doc=
"Clobber existing file?")
403 """Task that will ingest images into the data repository""" 404 ConfigClass = IngestConfig
405 ArgumentParser = IngestArgumentParser
406 _DefaultName =
"ingest" 409 super(IngestTask, self).
__init__(*args, **kwargs)
410 self.makeSubtask(
"parse")
411 self.makeSubtask(
"register")
415 """Parse the command-line arguments and return them along with a Task 419 args = parser.parse_args(config)
420 task = cls(config=args.config)
425 """Parse the command-line arguments and run the Task.""" 430 def prepareTask(cls, root=None, dryrun=False, mode="move", create=False,
431 ignoreIngested=False):
432 """Prepare for running the task repeatedly with `ingestFiles`. 434 Saves the parsed arguments, including the Butler and log, as a 435 private instance variable. 439 root : `str`, optional 440 Repository root pathname. If None, run the Task using the 441 command line arguments, ignoring all other arguments below. 442 dryrun : `bool`, optional 443 If True, don't perform any action; log what would have happened. 444 mode : `str`, optional 445 How files are delivered to their destination. Default is "move", 446 unlike the command-line default of "link". 447 create : `bool`, optional 448 If True, create a new registry, clobbering any old one present. 449 ignoreIngested : `bool`, optional 450 If True, do not complain if the file is already present in the 451 registry (and do nothing else). 456 If `root` was provided, the IngestTask instance 458 sys.argv = [
"IngestTask"]
459 sys.argv.append(root)
461 sys.argv.append(
"--dry-run")
462 sys.argv.append(
"--mode")
463 sys.argv.append(mode)
465 sys.argv.append(
"--create")
467 sys.argv.append(
"--ignore-ingested")
468 sys.argv.append(
"__fakefile__")
474 def ingest(self, infile, outfile, mode="move", dryrun=False):
475 """Ingest a file into the image repository. 477 @param infile Name of input file 478 @param outfile Name of output file (file in repository) 479 @param mode Mode of ingest (copy/link/move/skip) 480 @param dryrun Only report what would occur? 481 @param Success boolean 486 self.log.info(
"Would %s from %s to %s" % (mode, infile, outfile))
489 outdir = os.path.dirname(outfile)
490 if not os.path.isdir(outdir):
493 except OSError
as exc:
495 if not os.path.isdir(outdir):
496 raise RuntimeError(f
"Failed to create directory {outdir}")
from exc
497 if os.path.lexists(outfile):
498 if self.config.clobber:
501 raise RuntimeError(
"File %s already exists; consider --config clobber=True" % outfile)
505 shutil.copyfile(infile, outfile)
507 os.symlink(os.path.abspath(infile), outfile)
510 shutil.move(infile, outfile)
512 raise AssertionError(
"Unknown mode: %s" % mode)
513 self.log.info(
"%s --<%s>--> %s" % (infile, mode, outfile))
514 except Exception
as e:
515 self.log.warn(
"Failed to %s %s to %s: %s" % (mode, infile, outfile, e))
516 if not self.config.allowError:
517 raise RuntimeError(f
"Failed to {mode} {infile} to {outfile}")
from e
522 """Return whether the file qualifies as bad 524 We match against the list of bad file patterns. 526 filename = os.path.basename(filename)
529 for badFile
in badFileList:
530 if fnmatch(filename, badFile):
535 """Return whether the file information qualifies as bad 537 We match against the list of bad data identifiers. 541 for badId
in badIdList:
542 if all(info[key] == value
for key, value
in badId.items()):
547 """!Expand a set of filenames and globs, returning a list of filenames 549 @param fileNameList A list of files and glob patterns 551 N.b. globs obey Posix semantics, so a pattern that matches nothing is returned unchanged 554 for globPattern
in fileNameList:
555 files = glob(globPattern)
558 self.log.warn(
"%s doesn't match any file" % globPattern)
561 filenameList.extend(files)
566 """!Examine and ingest a single file 568 @param infile: File to process 569 @param args: Parsed command-line arguments 570 @return parsed information from FITS HDUs or None 573 self.log.info(
"Skipping declared bad file %s" % infile)
576 fileInfo, hduInfoList = self.parse.getInfo(infile)
577 except Exception
as e:
578 if not self.config.allowError:
579 raise RuntimeError(f
"Error parsing {infile}")
from e
580 self.log.warn(
"Error parsing %s (%s); skipping" % (infile, e))
582 if self.
isBadId(fileInfo, args.badId.idList):
583 self.log.info(
"Skipping declared bad file %s: %s" % (infile, fileInfo))
585 if registry
is not None and self.register.check(registry, fileInfo):
586 if args.ignoreIngested:
588 self.log.warn(
"%s: already ingested: %s" % (infile, fileInfo))
589 outfile = self.parse.getDestination(args.butler, fileInfo, infile)
590 if not self.
ingest(infile, outfile, mode=args.mode, dryrun=args.dryrun):
595 """Ingest all specified files and add them to the registry""" 598 context = self.register.openRegistry(root, create=args.create, dryrun=args.dryrun)
599 with context
as registry:
600 for pos
in range(len(filenameList)):
601 infile = filenameList[pos]
603 hduInfoList = self.
runFile(infile, registry, args)
604 except Exception
as exc:
605 self.log.warn(
"Failed to ingest file %s: %s", infile, exc)
606 if not self.config.allowError:
607 raise IngestError(f
"Failed to ingest file {infile}", infile, pos)
from exc
609 if hduInfoList
is None:
611 for info
in hduInfoList:
613 self.register.addRow(registry, info, dryrun=args.dryrun, create=args.create)
614 except Exception
as exc:
615 raise IngestError(f
"Failed to register file {infile}", infile, pos)
from exc
618 """Ingest specified file or list of files and add them to the registry. 620 This method can only be called if `prepareTask` was used. 624 fileList : `str` or `list` [`str`] 625 Pathname or list of pathnames of files to ingest. 627 if not hasattr(self,
"_args"):
628 raise RuntimeError(
"Task not created with prepareTask")
629 if isinstance(fileList, str):
630 fileList = [fileList]
631 self._args.files = fileList
636 """Can I copy a file? Raise an exception is space constraints not met. 638 @param fromPath Path from which the file is being copied 639 @param toPath Path to which the file is being copied 641 req = os.stat(fromPath).st_size
642 st = os.statvfs(os.path.dirname(toPath))
643 avail = st.f_bavail * st.f_frsize
645 raise RuntimeError(
"Insufficient space: %d vs %d" % (req, avail))
def ingest(self, infile, outfile, mode="move", dryrun=False)
def ingestFiles(self, fileList)
def __init__(self, message, pathname, position)
def translate_filter(self, md)
def expandFiles(self, fileNameList)
Expand a set of filenames and globs, returning a list of filenames.
def translate_date(self, md)
def __exit__(self, excType, excValue, traceback)
def getInfo(self, filename)
def getInfoFromMetadata(self, md, info=None)
def getDestination(self, butler, info, filename)
def runFile(self, infile, registry, args)
Examine and ingest a single file.
def isBadFile(self, filename, badFileList)
def __init__(self, registryName, createTableFunc, forceCreateTables, permissions)
def createTable(self, conn, table=None, forceCreateTables=False)
def assertCanCopy(fromPath, toPath)
def check(self, conn, info, table=None)
def __init__(self, args, kwargs)
def openRegistry(self, directory, create=False, dryrun=False, name="registry.sqlite3")
def prepareTask(cls, root=None, dryrun=False, mode="move", create=False, ignoreIngested=False)
def __init__(self, args, kwargs)
def addRow(self, conn, info, dryrun=False, create=False, table=None)
def isBadId(self, info, badIdList)