lsst.obs.base  14.0-29-g0478fed+5
mapping.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 from builtins import zip
24 from builtins import object
25 from collections import OrderedDict
26 import os
27 import re
28 from lsst.daf.base import PropertySet
29 from lsst.daf.persistence import ButlerLocation, NoResults
30 from lsst.daf.persistence.policy import Policy
31 import lsst.pex.policy as pexPolicy
32 
33 __all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"]
34 
35 
36 class Mapping(object):
37 
38  """Mapping is a base class for all mappings. Mappings are used by
39  the Mapper to map (determine a path to some data given some
40  identifiers) and standardize (convert data into some standard
41  format or type) data, and to query the associated registry to see
42  what data is available.
43 
44  Subclasses must specify self.storage or else override self.map().
45 
46  Public methods: lookup, have, need, getKeys, map
47 
48  Mappings are specified mainly by policy. A Mapping policy should
49  consist of:
50 
51  template (string): a Python string providing the filename for that
52  particular dataset type based on some data identifiers. In the
53  case of redundancy in the path (e.g., file uniquely specified by
54  the exposure number, but filter in the path), the
55  redundant/dependent identifiers can be looked up in the registry.
56 
57  python (string): the Python type for the retrieved data (e.g.
58  lsst.afw.image.ExposureF)
59 
60  persistable (string): the Persistable registration for the on-disk data
61  (e.g. ImageU)
62 
63  storage (string, optional): Storage type for this dataset type (e.g.
64  "BoostStorage")
65 
66  level (string, optional): the level in the camera hierarchy at which the
67  data is stored (Amp, Ccd or skyTile), if relevant
68 
69  tables (string, optional): a whitespace-delimited list of tables in the
70  registry that can be NATURAL JOIN-ed to look up additional
71  information.
72 
73  Parameters
74  ----------
75  datasetType : `str`
76  Butler dataset type to be mapped.
77  policy : `daf_persistence.Policy` or `pexPolicy.Policy`
78  Mapping Policy. (pexPolicy only for backward compatibility)
79  registry : `lsst.obs.base.Registry`
80  Registry for metadata lookups.
81  rootStorage : Storage subclass instance
82  Interface to persisted repository data.
83  provided : `list` of `str`
84  Keys provided by the mapper.
85  """
86 
87  def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
88 
89  if policy is None:
90  raise RuntimeError("No policy provided for mapping")
91 
92  if isinstance(policy, pexPolicy.Policy):
93  policy = Policy(policy)
94 
95  self.datasetType = datasetType
96  self.registry = registry
97  self.rootStorage = rootStorage
98 
99  self._template = policy['template'] # Template path
100  # in most cases, the template can not be used if it is empty, and is accessed via a property that will
101  # raise if it is used while `not self._template`. In this case we *do* allow it to be empty, for the
102  # purpose of fetching the key dict so that the mapping can be constructed, so that it can raise if
103  # it's invalid. I know it's a little odd, but it allows this template check to be introduced without a
104  # major refactor.
105  if self._template:
106  self.keyDict = dict([
107  (k, _formatMap(v, k, datasetType))
108  for k, v in
109  re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
110  ])
111  else:
112  self.keyDict = {}
113  if provided is not None:
114  for p in provided:
115  if p in self.keyDict:
116  del self.keyDict[p]
117  self.python = policy['python'] # Python type
118  self.persistable = policy['persistable'] # Persistable type
119  self.storage = policy['storage']
120  if 'level' in policy:
121  self.level = policy['level'] # Level in camera hierarchy
122  if 'tables' in policy:
123  self.tables = policy.asArray('tables')
124  else:
125  self.tables = None
126  self.range = None
127  self.columns = None
128  self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
129  self.recipe = policy['recipe'] if 'recipe' in policy else 'default'
130 
131  @property
132  def template(self):
133  if self._template: # template must not be an empty string or None
134  return self._template
135  else:
136  raise RuntimeError("Template is not defined for the {} dataset type, ".format(self.datasetType) +
137  "it must be set before it can be used.")
138 
139  def keys(self):
140  """Return the dict of keys and value types required for this mapping."""
141  return self.keyDict
142 
143  def map(self, mapper, dataId, write=False):
144  """Standard implementation of map function.
145 
146  Parameters
147  ----------
148  mapper: `lsst.daf.persistence.Mapper`
149  Object to be mapped.
150  dataId: `dict`
151  Dataset identifier.
152 
153  Returns
154  -------
155  lsst.daf.persistence.ButlerLocation
156  Location of object that was mapped.
157  """
158  actualId = self.need(iter(self.keyDict.keys()), dataId)
159  usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
160  path = mapper._mapActualToPath(self.template, actualId)
161  if os.path.isabs(path):
162  raise RuntimeError("Mapped path should not be absolute.")
163  if not write:
164  # This allows mapped files to be compressed, ending in .gz or .fz, without any indication from the
165  # policy that the file should be compressed, easily allowing repositories to contain a combination
166  # of comporessed and not-compressed files.
167  # If needed we can add a policy flag to allow compressed files or not, and perhaps a list of
168  # allowed extensions that may exist at the end of the template.
169  for ext in (None, '.gz', '.fz'):
170  if ext and path.endswith(ext):
171  continue # if the path already ends with the extension
172  extPath = path + ext if ext else path
173  newPath = self.rootStorage.instanceSearch(extPath)
174  if newPath:
175  path = newPath
176  break
177  assert path, "Fully-qualified filename is empty."
178 
179  addFunc = "add_" + self.datasetType # Name of method for additionalData
180  if hasattr(mapper, addFunc):
181  addFunc = getattr(mapper, addFunc)
182  additionalData = addFunc(self.datasetType, actualId)
183  assert isinstance(additionalData, PropertySet), \
184  "Bad type for returned data: %s" (type(additionalData),)
185  else:
186  additionalData = None
187 
188  return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
189  locationList=path, dataId=actualId.copy(), mapper=mapper,
190  storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType,
191  additionalData=additionalData)
192 
193  def lookup(self, properties, dataId):
194  """Look up properties for in a metadata registry given a partial
195  dataset identifier.
196 
197  Parameters
198  ----------
199  properties : `list` of `str`
200  What to look up.
201  dataId : `dict`
202  Dataset identifier
203 
204  Returns
205  -------
206  `list` of `tuple`
207  Values of properties.
208  """
209  if self.registry is None:
210  raise RuntimeError("No registry for lookup")
211 
212  skyMapKeys = ("tract", "patch")
213 
214  where = []
215  values = []
216 
217  # Prepare to remove skymap entries from properties list. These must
218  # be in the data ID, so we store which ones we're removing and create
219  # an OrderedDict that tells us where to re-insert them. That maps the
220  # name of the property to either its index in the properties list
221  # *after* the skymap ones have been removed (for entries that aren't
222  # skymap ones) or the value from the data ID (for those that are).
223  removed = set()
224  substitutions = OrderedDict()
225  index = 0
226  properties = list(properties) # don't modify the original list
227  for p in properties:
228  if p in skyMapKeys:
229  try:
230  substitutions[p] = dataId[p]
231  removed.add(p)
232  except KeyError:
233  raise RuntimeError(
234  "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
235  )
236  else:
237  substitutions[p] = index
238  index += 1
239  # Can't actually remove while iterating above, so we do it here.
240  for p in removed:
241  properties.remove(p)
242 
243  fastPath = True
244  for p in properties:
245  if p not in ('filter', 'expTime', 'taiObs'):
246  fastPath = False
247  break
248  if fastPath and 'visit' in dataId and "raw" in self.tables:
249  lookupDataId = {'visit': dataId['visit']}
250  result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
251  else:
252  if dataId is not None:
253  for k, v in dataId.items():
254  if self.columns and k not in self.columns:
255  continue
256  if k == self.obsTimeName:
257  continue
258  if k in skyMapKeys:
259  continue
260  where.append((k, '?'))
261  values.append(v)
262  lookupDataId = {k[0]: v for k, v in zip(where, values)}
263  if self.range:
264  # format of self.range is ('?', isBetween-lowKey, isBetween-highKey)
265  # here we transform that to {(lowKey, highKey): value}
266  lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
267  result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
268  if not removed:
269  return result
270  # Iterate over the query results, re-inserting the skymap entries.
271  result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
272  for item in result]
273  return result
274 
275  def have(self, properties, dataId):
276  """Returns whether the provided data identifier has all
277  the properties in the provided list.
278 
279  Parameters
280  ----------
281  properties : `list of `str`
282  Properties required.
283  dataId : `dict`
284  Dataset identifier.
285 
286  Returns
287  -------
288  bool
289  True if all properties are present.
290  """
291  for prop in properties:
292  if prop not in dataId:
293  return False
294  return True
295 
296  def need(self, properties, dataId):
297  """Ensures all properties in the provided list are present in
298  the data identifier, looking them up as needed. This is only
299  possible for the case where the data identifies a single
300  exposure.
301 
302  Parameters
303  ----------
304  properties : `list` of `str`
305  Properties required.
306  dataId : `dict`
307  Partial dataset identifier
308 
309  Returns
310  -------
311  `dict`
312  Copy of dataset identifier with enhanced values.
313  """
314  newId = dataId.copy()
315  newProps = [] # Properties we don't already have
316  for prop in properties:
317  if prop not in newId:
318  newProps.append(prop)
319  if len(newProps) == 0:
320  return newId
321 
322  lookups = self.lookup(newProps, newId)
323  if len(lookups) != 1:
324  raise NoResults("No unique lookup for %s from %s: %d matches" %
325  (newProps, newId, len(lookups)),
326  self.datasetType, dataId)
327  for i, prop in enumerate(newProps):
328  newId[prop] = lookups[0][i]
329  return newId
330 
331 
332 def _formatMap(ch, k, datasetType):
333  """Convert a format character into a Python type."""
334  if ch in "diouxX":
335  return int
336  elif ch in "eEfFgG":
337  return float
338  elif ch in "crs":
339  return str
340  else:
341  raise RuntimeError("Unexpected format specifier %s"
342  " for field %s in template for dataset %s" %
343  (ch, k, datasetType))
344 
345 
347  """ImageMapping is a Mapping subclass for non-camera images.
348 
349  Parameters
350  ----------
351  datasetType : `str`
352  Butler dataset type to be mapped.
353  policy : `daf_persistence.Policy` `pexPolicy.Policy`
354  Mapping Policy. (pexPolicy only for backward compatibility)
355  registry : `lsst.obs.base.Registry`
356  Registry for metadata lookups
357  root : `str`
358  Path of root directory
359  """
360 
361  def __init__(self, datasetType, policy, registry, root, **kwargs):
362  if isinstance(policy, pexPolicy.Policy):
363  policy = Policy(policy)
364  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
365  self.columns = policy.asArray('columns') if 'columns' in policy else None
366 
367 
369  """ExposureMapping is a Mapping subclass for normal exposures.
370 
371  Parameters
372  ----------
373  datasetType : `str`
374  Butler dataset type to be mapped.
375  policy : `daf_persistence.Policy` or `pexPolicy.Policy`
376  Mapping Policy (pexPolicy only for backward compatibility)
377  registry : `lsst.obs.base.Registry`
378  Registry for metadata lookups
379  root : `str`
380  Path of root directory
381  """
382 
383  def __init__(self, datasetType, policy, registry, root, **kwargs):
384  if isinstance(policy, pexPolicy.Policy):
385  policy = Policy(policy)
386  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
387  self.columns = policy.asArray('columns') if 'columns' in policy else None
388 
389  def standardize(self, mapper, item, dataId):
390  return mapper._standardizeExposure(self, item, dataId)
391 
392 
394  """CalibrationMapping is a Mapping subclass for calibration-type products.
395 
396  The difference is that data properties in the query or template
397  can be looked up using a reference Mapping in addition to this one.
398 
399  CalibrationMapping Policies can contain the following:
400 
401  reference (string, optional)
402  a list of tables for finding missing dataset
403  identifier components (including the observation time, if a validity range
404  is required) in the exposure registry; note that the "tables" entry refers
405  to the calibration registry
406 
407  refCols (string, optional)
408  a list of dataset properties required from the
409  reference tables for lookups in the calibration registry
410 
411  validRange (bool)
412  true if the calibration dataset has a validity range
413  specified by a column in the tables of the reference dataset in the
414  exposure registry) and two columns in the tables of this calibration
415  dataset in the calibration registry)
416 
417  obsTimeName (string, optional)
418  the name of the column in the reference
419  dataset tables containing the observation time (default "taiObs")
420 
421  validStartName (string, optional)
422  the name of the column in the
423  calibration dataset tables containing the start of the validity range
424  (default "validStart")
425 
426  validEndName (string, optional)
427  the name of the column in the
428  calibration dataset tables containing the end of the validity range
429  (default "validEnd")
430 
431  Parameters
432  ----------
433  datasetType : `str`
434  Butler dataset type to be mapped.
435  policy : `daf_persistence.Policy` or `pexPolicy.Policy`
436  Mapping Policy (pexPolicy only for backward compatibility)
437  registry : `lsst.obs.base.Registry`
438  Registry for metadata lookups
439  calibRegistry : `lsst.obs.base.Registry`
440  Registry for calibration metadata lookups.
441  calibRoot : `str`
442  Path of calibration root directory.
443  dataRoot : `str`
444  Path of data root directory; used for outputs only.
445  """
446 
447  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
448  if isinstance(policy, pexPolicy.Policy):
449  policy = Policy(policy)
450  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
451  self.reference = policy.asArray("reference") if "reference" in policy else None
452  self.refCols = policy.asArray("refCols") if "refCols" in policy else None
453  self.refRegistry = registry
454  self.dataRoot = dataRoot
455  if "validRange" in policy and policy["validRange"]:
456  self.range = ("?", policy["validStartName"], policy["validEndName"])
457  if "columns" in policy:
458  self.columns = policy.asArray("columns")
459  if "filter" in policy:
460  self.setFilter = policy["filter"]
461  self.metadataKeys = None
462  if "metadataKey" in policy:
463  self.metadataKeys = policy.asArray("metadataKey")
464 
465  def map(self, mapper, dataId, write=False):
466  location = Mapping.map(self, mapper, dataId, write=write)
467  # Want outputs to be in the output directory
468  if write and self.dataRoot:
469  location.storage = self.dataRoot
470  return location
471 
472  def lookup(self, properties, dataId):
473  """Look up properties for in a metadata registry given a partial
474  dataset identifier.
475 
476  Parameters
477  ----------
478  properties : `list` of `str`
479  Properties to look up.
480  dataId : `dict`
481  Dataset identifier.
482 
483  Returns
484  -------
485  `list` of `tuple`
486  Values of properties.
487  """
488 
489 # Either look up taiObs in reference and then all in calibRegistry
490 # Or look up all in registry
491 
492  newId = dataId.copy()
493  if self.reference is not None:
494  where = []
495  values = []
496  for k, v in dataId.items():
497  if self.refCols and k not in self.refCols:
498  continue
499  where.append(k)
500  values.append(v)
501 
502  # Columns we need from the regular registry
503  if self.columns is not None:
504  columns = set(self.columns)
505  for k in dataId.keys():
506  columns.discard(k)
507  else:
508  columns = set(properties)
509 
510  if not columns:
511  # Nothing to lookup in reference registry; continue with calib registry
512  return Mapping.lookup(self, properties, newId)
513 
514  lookupDataId = dict(zip(where, values))
515  lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
516  if len(lookups) != 1:
517  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
518  (columns, dataId, len(lookups)))
519  if columns == set(properties):
520  # Have everything we need
521  return lookups
522  for i, prop in enumerate(columns):
523  newId[prop] = lookups[0][i]
524  return Mapping.lookup(self, properties, newId)
525 
526  def standardize(self, mapper, item, dataId):
527  return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
528 
529 
531  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
532  be retrieved by the standard daf_persistence mechanism.
533 
534  The differences are that the Storage type must be specified and no
535  Exposure standardization is performed.
536 
537  The "storage" entry in the Policy is mandatory; the "tables" entry is
538  optional; no "level" entry is allowed.
539 
540  Parameters
541  ----------
542  datasetType : `str`
543  Butler dataset type to be mapped.
544  policy : `daf_persistence.Policy` `pexPolicy.Policy`
545  Mapping Policy. (pexPolicy only for backward compatibility)
546  registry : `lsst.obs.base.Registry`
547  Registry for metadata lookups
548  root : `str`
549  Path of root directory
550  """
551 
552  def __init__(self, datasetType, policy, registry, root, **kwargs):
553  if isinstance(policy, pexPolicy.Policy):
554  policy = Policy(policy)
555  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
556  self.storage = policy["storage"] # Storage type
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:383
def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, kwargs)
Definition: mapping.py:447
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:361
def standardize(self, mapper, item, dataId)
Definition: mapping.py:526
def map(self, mapper, dataId, write=False)
Definition: mapping.py:465
def have(self, properties, dataId)
Definition: mapping.py:275
def standardize(self, mapper, item, dataId)
Definition: mapping.py:389
def need(self, properties, dataId)
Definition: mapping.py:296
def lookup(self, properties, dataId)
Definition: mapping.py:472
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:552
def lookup(self, properties, dataId)
Definition: mapping.py:193
def __init__(self, datasetType, policy, registry, rootStorage, provided=None)
Definition: mapping.py:87
def map(self, mapper, dataId, write=False)
Definition: mapping.py:143