lsst.obs.base  16.0-11-gfbb8ea7+1
mapping.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 from collections import OrderedDict
24 import os
25 import re
26 from lsst.daf.base import PropertySet
27 from lsst.daf.persistence import ButlerLocation, NoResults
28 from lsst.daf.persistence.policy import Policy
29 import lsst.pex.policy as pexPolicy
30 
31 __all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"]
32 
33 
34 class Mapping(object):
35 
36  """Mapping is a base class for all mappings. Mappings are used by
37  the Mapper to map (determine a path to some data given some
38  identifiers) and standardize (convert data into some standard
39  format or type) data, and to query the associated registry to see
40  what data is available.
41 
42  Subclasses must specify self.storage or else override self.map().
43 
44  Public methods: lookup, have, need, getKeys, map
45 
46  Mappings are specified mainly by policy. A Mapping policy should
47  consist of:
48 
49  template (string): a Python string providing the filename for that
50  particular dataset type based on some data identifiers. In the
51  case of redundancy in the path (e.g., file uniquely specified by
52  the exposure number, but filter in the path), the
53  redundant/dependent identifiers can be looked up in the registry.
54 
55  python (string): the Python type for the retrieved data (e.g.
56  lsst.afw.image.ExposureF)
57 
58  persistable (string): the Persistable registration for the on-disk data
59  (e.g. ImageU)
60 
61  storage (string, optional): Storage type for this dataset type (e.g.
62  "BoostStorage")
63 
64  level (string, optional): the level in the camera hierarchy at which the
65  data is stored (Amp, Ccd or skyTile), if relevant
66 
67  tables (string, optional): a whitespace-delimited list of tables in the
68  registry that can be NATURAL JOIN-ed to look up additional
69  information.
70 
71  Parameters
72  ----------
73  datasetType : `str`
74  Butler dataset type to be mapped.
75  policy : `daf_persistence.Policy` or `pexPolicy.Policy`
76  Mapping Policy. (pexPolicy only for backward compatibility)
77  registry : `lsst.obs.base.Registry`
78  Registry for metadata lookups.
79  rootStorage : Storage subclass instance
80  Interface to persisted repository data.
81  provided : `list` of `str`
82  Keys provided by the mapper.
83  """
84 
85  def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
86 
87  if policy is None:
88  raise RuntimeError("No policy provided for mapping")
89 
90  if isinstance(policy, pexPolicy.Policy):
91  policy = Policy(policy)
92 
93  self.datasetType = datasetType
94  self.registry = registry
95  self.rootStorage = rootStorage
96 
97  self._template = policy['template'] # Template path
98  # in most cases, the template can not be used if it is empty, and is accessed via a property that will
99  # raise if it is used while `not self._template`. In this case we *do* allow it to be empty, for the
100  # purpose of fetching the key dict so that the mapping can be constructed, so that it can raise if
101  # it's invalid. I know it's a little odd, but it allows this template check to be introduced without a
102  # major refactor.
103  if self._template:
104  self.keyDict = dict([
105  (k, _formatMap(v, k, datasetType))
106  for k, v in
107  re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
108  ])
109  else:
110  self.keyDict = {}
111  if provided is not None:
112  for p in provided:
113  if p in self.keyDict:
114  del self.keyDict[p]
115  self.python = policy['python'] # Python type
116  self.persistable = policy['persistable'] # Persistable type
117  self.storage = policy['storage']
118  if 'level' in policy:
119  self.level = policy['level'] # Level in camera hierarchy
120  if 'tables' in policy:
121  self.tables = policy.asArray('tables')
122  else:
123  self.tables = None
124  self.range = None
125  self.columns = None
126  self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
127  self.recipe = policy['recipe'] if 'recipe' in policy else 'default'
128 
129  @property
130  def template(self):
131  if self._template: # template must not be an empty string or None
132  return self._template
133  else:
134  raise RuntimeError("Template is not defined for the {} dataset type, ".format(self.datasetType) +
135  "it must be set before it can be used.")
136 
137  def keys(self):
138  """Return the dict of keys and value types required for this mapping."""
139  return self.keyDict
140 
141  def map(self, mapper, dataId, write=False):
142  """Standard implementation of map function.
143 
144  Parameters
145  ----------
146  mapper: `lsst.daf.persistence.Mapper`
147  Object to be mapped.
148  dataId: `dict`
149  Dataset identifier.
150 
151  Returns
152  -------
153  lsst.daf.persistence.ButlerLocation
154  Location of object that was mapped.
155  """
156  actualId = self.need(iter(self.keyDict.keys()), dataId)
157  usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
158  path = mapper._mapActualToPath(self.template, actualId)
159  if os.path.isabs(path):
160  raise RuntimeError("Mapped path should not be absolute.")
161  if not write:
162  # This allows mapped files to be compressed, ending in .gz or .fz, without any indication from the
163  # policy that the file should be compressed, easily allowing repositories to contain a combination
164  # of comporessed and not-compressed files.
165  # If needed we can add a policy flag to allow compressed files or not, and perhaps a list of
166  # allowed extensions that may exist at the end of the template.
167  for ext in (None, '.gz', '.fz'):
168  if ext and path.endswith(ext):
169  continue # if the path already ends with the extension
170  extPath = path + ext if ext else path
171  newPath = self.rootStorage.instanceSearch(extPath)
172  if newPath:
173  path = newPath
174  break
175  assert path, "Fully-qualified filename is empty."
176 
177  addFunc = "add_" + self.datasetType # Name of method for additionalData
178  if hasattr(mapper, addFunc):
179  addFunc = getattr(mapper, addFunc)
180  additionalData = addFunc(self.datasetType, actualId)
181  assert isinstance(additionalData, PropertySet), \
182  "Bad type for returned data: %s" (type(additionalData),)
183  else:
184  additionalData = None
185 
186  return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
187  locationList=path, dataId=actualId.copy(), mapper=mapper,
188  storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType,
189  additionalData=additionalData)
190 
191  def lookup(self, properties, dataId):
192  """Look up properties for in a metadata registry given a partial
193  dataset identifier.
194 
195  Parameters
196  ----------
197  properties : `list` of `str`
198  What to look up.
199  dataId : `dict`
200  Dataset identifier
201 
202  Returns
203  -------
204  `list` of `tuple`
205  Values of properties.
206  """
207  if self.registry is None:
208  raise RuntimeError("No registry for lookup")
209 
210  skyMapKeys = ("tract", "patch")
211 
212  where = []
213  values = []
214 
215  # Prepare to remove skymap entries from properties list. These must
216  # be in the data ID, so we store which ones we're removing and create
217  # an OrderedDict that tells us where to re-insert them. That maps the
218  # name of the property to either its index in the properties list
219  # *after* the skymap ones have been removed (for entries that aren't
220  # skymap ones) or the value from the data ID (for those that are).
221  removed = set()
222  substitutions = OrderedDict()
223  index = 0
224  properties = list(properties) # don't modify the original list
225  for p in properties:
226  if p in skyMapKeys:
227  try:
228  substitutions[p] = dataId[p]
229  removed.add(p)
230  except KeyError:
231  raise RuntimeError(
232  "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
233  )
234  else:
235  substitutions[p] = index
236  index += 1
237  # Can't actually remove while iterating above, so we do it here.
238  for p in removed:
239  properties.remove(p)
240 
241  fastPath = True
242  for p in properties:
243  if p not in ('filter', 'expTime', 'taiObs'):
244  fastPath = False
245  break
246  if fastPath and 'visit' in dataId and "raw" in self.tables:
247  lookupDataId = {'visit': dataId['visit']}
248  result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
249  else:
250  if dataId is not None:
251  for k, v in dataId.items():
252  if self.columns and k not in self.columns:
253  continue
254  if k == self.obsTimeName:
255  continue
256  if k in skyMapKeys:
257  continue
258  where.append((k, '?'))
259  values.append(v)
260  lookupDataId = {k[0]: v for k, v in zip(where, values)}
261  if self.range:
262  # format of self.range is ('?', isBetween-lowKey, isBetween-highKey)
263  # here we transform that to {(lowKey, highKey): value}
264  lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
265  result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
266  if not removed:
267  return result
268  # Iterate over the query results, re-inserting the skymap entries.
269  result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
270  for item in result]
271  return result
272 
273  def have(self, properties, dataId):
274  """Returns whether the provided data identifier has all
275  the properties in the provided list.
276 
277  Parameters
278  ----------
279  properties : `list of `str`
280  Properties required.
281  dataId : `dict`
282  Dataset identifier.
283 
284  Returns
285  -------
286  bool
287  True if all properties are present.
288  """
289  for prop in properties:
290  if prop not in dataId:
291  return False
292  return True
293 
294  def need(self, properties, dataId):
295  """Ensures all properties in the provided list are present in
296  the data identifier, looking them up as needed. This is only
297  possible for the case where the data identifies a single
298  exposure.
299 
300  Parameters
301  ----------
302  properties : `list` of `str`
303  Properties required.
304  dataId : `dict`
305  Partial dataset identifier
306 
307  Returns
308  -------
309  `dict`
310  Copy of dataset identifier with enhanced values.
311  """
312  newId = dataId.copy()
313  newProps = [] # Properties we don't already have
314  for prop in properties:
315  if prop not in newId:
316  newProps.append(prop)
317  if len(newProps) == 0:
318  return newId
319 
320  lookups = self.lookup(newProps, newId)
321  if len(lookups) != 1:
322  raise NoResults("No unique lookup for %s from %s: %d matches" %
323  (newProps, newId, len(lookups)),
324  self.datasetType, dataId)
325  for i, prop in enumerate(newProps):
326  newId[prop] = lookups[0][i]
327  return newId
328 
329 
330 def _formatMap(ch, k, datasetType):
331  """Convert a format character into a Python type."""
332  if ch in "diouxX":
333  return int
334  elif ch in "eEfFgG":
335  return float
336  elif ch in "crs":
337  return str
338  else:
339  raise RuntimeError("Unexpected format specifier %s"
340  " for field %s in template for dataset %s" %
341  (ch, k, datasetType))
342 
343 
345  """ImageMapping is a Mapping subclass for non-camera images.
346 
347  Parameters
348  ----------
349  datasetType : `str`
350  Butler dataset type to be mapped.
351  policy : `daf_persistence.Policy` `pexPolicy.Policy`
352  Mapping Policy. (pexPolicy only for backward compatibility)
353  registry : `lsst.obs.base.Registry`
354  Registry for metadata lookups
355  root : `str`
356  Path of root directory
357  """
358 
359  def __init__(self, datasetType, policy, registry, root, **kwargs):
360  if isinstance(policy, pexPolicy.Policy):
361  policy = Policy(policy)
362  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
363  self.columns = policy.asArray('columns') if 'columns' in policy else None
364 
365 
367  """ExposureMapping is a Mapping subclass for normal exposures.
368 
369  Parameters
370  ----------
371  datasetType : `str`
372  Butler dataset type to be mapped.
373  policy : `daf_persistence.Policy` or `pexPolicy.Policy`
374  Mapping Policy (pexPolicy only for backward compatibility)
375  registry : `lsst.obs.base.Registry`
376  Registry for metadata lookups
377  root : `str`
378  Path of root directory
379  """
380 
381  def __init__(self, datasetType, policy, registry, root, **kwargs):
382  if isinstance(policy, pexPolicy.Policy):
383  policy = Policy(policy)
384  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
385  self.columns = policy.asArray('columns') if 'columns' in policy else None
386 
387  def standardize(self, mapper, item, dataId):
388  return mapper._standardizeExposure(self, item, dataId)
389 
390 
392  """CalibrationMapping is a Mapping subclass for calibration-type products.
393 
394  The difference is that data properties in the query or template
395  can be looked up using a reference Mapping in addition to this one.
396 
397  CalibrationMapping Policies can contain the following:
398 
399  reference (string, optional)
400  a list of tables for finding missing dataset
401  identifier components (including the observation time, if a validity range
402  is required) in the exposure registry; note that the "tables" entry refers
403  to the calibration registry
404 
405  refCols (string, optional)
406  a list of dataset properties required from the
407  reference tables for lookups in the calibration registry
408 
409  validRange (bool)
410  true if the calibration dataset has a validity range
411  specified by a column in the tables of the reference dataset in the
412  exposure registry) and two columns in the tables of this calibration
413  dataset in the calibration registry)
414 
415  obsTimeName (string, optional)
416  the name of the column in the reference
417  dataset tables containing the observation time (default "taiObs")
418 
419  validStartName (string, optional)
420  the name of the column in the
421  calibration dataset tables containing the start of the validity range
422  (default "validStart")
423 
424  validEndName (string, optional)
425  the name of the column in the
426  calibration dataset tables containing the end of the validity range
427  (default "validEnd")
428 
429  Parameters
430  ----------
431  datasetType : `str`
432  Butler dataset type to be mapped.
433  policy : `daf_persistence.Policy` or `pexPolicy.Policy`
434  Mapping Policy (pexPolicy only for backward compatibility)
435  registry : `lsst.obs.base.Registry`
436  Registry for metadata lookups
437  calibRegistry : `lsst.obs.base.Registry`
438  Registry for calibration metadata lookups.
439  calibRoot : `str`
440  Path of calibration root directory.
441  dataRoot : `str`
442  Path of data root directory; used for outputs only.
443  """
444 
445  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
446  if isinstance(policy, pexPolicy.Policy):
447  policy = Policy(policy)
448  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
449  self.reference = policy.asArray("reference") if "reference" in policy else None
450  self.refCols = policy.asArray("refCols") if "refCols" in policy else None
451  self.refRegistry = registry
452  self.dataRoot = dataRoot
453  if "validRange" in policy and policy["validRange"]:
454  self.range = ("?", policy["validStartName"], policy["validEndName"])
455  if "columns" in policy:
456  self.columns = policy.asArray("columns")
457  if "filter" in policy:
458  self.setFilter = policy["filter"]
459  self.metadataKeys = None
460  if "metadataKey" in policy:
461  self.metadataKeys = policy.asArray("metadataKey")
462 
463  def map(self, mapper, dataId, write=False):
464  location = Mapping.map(self, mapper, dataId, write=write)
465  # Want outputs to be in the output directory
466  if write and self.dataRoot:
467  location.storage = self.dataRoot
468  return location
469 
470  def lookup(self, properties, dataId):
471  """Look up properties for in a metadata registry given a partial
472  dataset identifier.
473 
474  Parameters
475  ----------
476  properties : `list` of `str`
477  Properties to look up.
478  dataId : `dict`
479  Dataset identifier.
480 
481  Returns
482  -------
483  `list` of `tuple`
484  Values of properties.
485  """
486 
487 # Either look up taiObs in reference and then all in calibRegistry
488 # Or look up all in registry
489 
490  newId = dataId.copy()
491  if self.reference is not None:
492  where = []
493  values = []
494  for k, v in dataId.items():
495  if self.refCols and k not in self.refCols:
496  continue
497  where.append(k)
498  values.append(v)
499 
500  # Columns we need from the regular registry
501  if self.columns is not None:
502  columns = set(self.columns)
503  for k in dataId.keys():
504  columns.discard(k)
505  else:
506  columns = set(properties)
507 
508  if not columns:
509  # Nothing to lookup in reference registry; continue with calib registry
510  return Mapping.lookup(self, properties, newId)
511 
512  lookupDataId = dict(zip(where, values))
513  lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
514  if len(lookups) != 1:
515  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
516  (columns, dataId, len(lookups)))
517  if columns == set(properties):
518  # Have everything we need
519  return lookups
520  for i, prop in enumerate(columns):
521  newId[prop] = lookups[0][i]
522  return Mapping.lookup(self, properties, newId)
523 
524  def standardize(self, mapper, item, dataId):
525  return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
526 
527 
529  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
530  be retrieved by the standard daf_persistence mechanism.
531 
532  The differences are that the Storage type must be specified and no
533  Exposure standardization is performed.
534 
535  The "storage" entry in the Policy is mandatory; the "tables" entry is
536  optional; no "level" entry is allowed.
537 
538  Parameters
539  ----------
540  datasetType : `str`
541  Butler dataset type to be mapped.
542  policy : `daf_persistence.Policy` `pexPolicy.Policy`
543  Mapping Policy. (pexPolicy only for backward compatibility)
544  registry : `lsst.obs.base.Registry`
545  Registry for metadata lookups
546  root : `str`
547  Path of root directory
548  """
549 
550  def __init__(self, datasetType, policy, registry, root, **kwargs):
551  if isinstance(policy, pexPolicy.Policy):
552  policy = Policy(policy)
553  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
554  self.storage = policy["storage"] # Storage type
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:381
def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, kwargs)
Definition: mapping.py:445
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:359
def standardize(self, mapper, item, dataId)
Definition: mapping.py:524
def map(self, mapper, dataId, write=False)
Definition: mapping.py:463
def have(self, properties, dataId)
Definition: mapping.py:273
def standardize(self, mapper, item, dataId)
Definition: mapping.py:387
def need(self, properties, dataId)
Definition: mapping.py:294
def lookup(self, properties, dataId)
Definition: mapping.py:470
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:550
def lookup(self, properties, dataId)
Definition: mapping.py:191
def __init__(self, datasetType, policy, registry, rootStorage, provided=None)
Definition: mapping.py:85
def map(self, mapper, dataId, write=False)
Definition: mapping.py:141