lsst.obs.base  13.0-43-g43302d7+4
 All Classes Namespaces Files Functions Variables
mapping.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 from builtins import zip
24 from builtins import object
25 from collections import OrderedDict
26 import os
27 import re
28 from lsst.daf.persistence import ButlerLocation, NoResults
29 from lsst.daf.persistence.policy import Policy
30 import lsst.pex.policy as pexPolicy
31 
32 """This module defines the Mapping base class."""
33 
34 
35 class Mapping(object):
36 
37  """Mapping is a base class for all mappings. Mappings are used by
38  the Mapper to map (determine a path to some data given some
39  identifiers) and standardize (convert data into some standard
40  format or type) data, and to query the associated registry to see
41  what data is available.
42 
43  Subclasses must specify self.storage or else override self.map().
44 
45  Public methods: lookup, have, need, getKeys, map
46 
47  Mappings are specified mainly by policy. A Mapping policy should
48  consist of:
49 
50  template (string): a Python string providing the filename for that
51  particular dataset type based on some data identifiers. In the
52  case of redundancy in the path (e.g., file uniquely specified by
53  the exposure number, but filter in the path), the
54  redundant/dependent identifiers can be looked up in the registry.
55 
56  python (string): the Python type for the retrieved data (e.g.
57  lsst.afw.image.ExposureF)
58 
59  persistable (string): the Persistable registration for the on-disk data
60  (e.g. ImageU)
61 
62  storage (string, optional): Storage type for this dataset type (e.g.
63  "BoostStorage")
64 
65  level (string, optional): the level in the camera hierarchy at which the
66  data is stored (Amp, Ccd or skyTile), if relevant
67 
68  tables (string, optional): a whitespace-delimited list of tables in the
69  registry that can be NATURAL JOIN-ed to look up additional
70  information. """
71 
72  def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
73  """Constructor for Mapping class.
74  @param datasetType (string)
75  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
76  Mapping Policy
77  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
78  @param rootStorage (Storage subclass instance) Interface to persisted repository data
79  @param provided (list of strings) Keys provided by the mapper
80  """
81 
82  if policy is None:
83  raise RuntimeError("No policy provided for mapping")
84 
85  if isinstance(policy, pexPolicy.Policy):
86  policy = Policy(policy)
87 
88  self.datasetType = datasetType
89  self.registry = registry
90  self.rootStorage = rootStorage
91 
92  self._template = policy['template'] # Template path
93  # in most cases, the template can not be used if it is empty, and is accessed via a property that will
94  # raise if it is used while `not self._template`. In this case we *do* allow it to be empty, for the
95  # purpose of fetching the key dict so that the mapping can be constructed, so that it can raise if
96  # it's invalid. I know it's a little odd, but it allows this template check to be introduced without a
97  # major refactor.
98  if self._template:
99  self.keyDict = dict([
100  (k, _formatMap(v, k, datasetType))
101  for k, v in
102  re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
103  ])
104  else:
105  self.keyDict = {}
106  if provided is not None:
107  for p in provided:
108  if p in self.keyDict:
109  del self.keyDict[p]
110  self.python = policy['python'] # Python type
111  self.persistable = policy['persistable'] # Persistable type
112  self.storage = policy['storage']
113  if 'level' in policy:
114  self.level = policy['level'] # Level in camera hierarchy
115  if 'tables' in policy:
116  self.tables = policy.asArray('tables')
117  else:
118  self.tables = None
119  self.range = None
120  self.columns = None
121  self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
122 
123  @property
124  def template(self):
125  if self._template: # template must not be an empty string or None
126  return self._template
127  else:
128  raise RuntimeError("Template is not defined for the {} dataset type, ".format(self.datasetType) +
129  "it must be set before it can be used.")
130 
131  def keys(self):
132  """Return the dict of keys and value types required for this mapping."""
133  return self.keyDict
134 
135  def map(self, mapper, dataId, write=False):
136  """Standard implementation of map function.
137  @param mapper (lsst.daf.persistence.Mapper)
138  @param dataId (dict) Dataset identifier
139  @return (lsst.daf.persistence.ButlerLocation)"""
140  actualId = self.need(iter(self.keyDict.keys()), dataId)
141  usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
142  path = mapper._mapActualToPath(self.template, actualId)
143  if os.path.isabs(path):
144  raise RuntimeError("Mapped path should not be absolute.")
145  if not write:
146  # This allows mapped files to be compressed, ending in .gz or .fz, without any indication from the
147  # policy that the file should be compressed, easily allowing repositories to contain a combination
148  # of comporessed and not-compressed files.
149  # If needed we can add a policy flag to allow compressed files or not, and perhaps a list of
150  # allowed extensions that may exist at the end of the template.
151  for ext in (None, '.gz', '.fz'):
152  if ext and path.endswith(ext):
153  continue # if the path already ends with the extension
154  extPath = path + ext if ext else path
155  newPath = self.rootStorage.instanceSearch(extPath)
156  if newPath:
157  path = newPath
158  break
159  assert path, "Fully-qualified filename is empty."
160 
161  addFunc = "add_" + self.datasetType # Name of method for additionalData
162  if hasattr(mapper, addFunc):
163  addFunc = getattr(mapper, addFunc)
164  additionalData = addFunc(actualId)
165  assert isinstance(additionalData, dict), "Bad type for returned data"
166  else:
167  additionalData = actualId.copy()
168 
169  return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
170  locationList=path, dataId=additionalData, mapper=mapper,
171  storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType)
172 
173  def lookup(self, properties, dataId):
174  """Look up properties for in a metadata registry given a partial
175  dataset identifier.
176  @param properties (list of strings)
177  @param dataId (dict) Dataset identifier
178  @return (list of tuples) values of properties"""
179 
180  if self.registry is None:
181  raise RuntimeError("No registry for lookup")
182 
183  skyMapKeys = ("tract", "patch")
184 
185  where = []
186  values = []
187 
188  # Prepare to remove skymap entries from properties list. These must
189  # be in the data ID, so we store which ones we're removing and create
190  # an OrderedDict that tells us where to re-insert them. That maps the
191  # name of the property to either its index in the properties list
192  # *after* the skymap ones have been removed (for entries that aren't
193  # skymap ones) or the value from the data ID (for those that are).
194  removed = set()
195  substitutions = OrderedDict()
196  index = 0
197  properties = list(properties) # don't modify the original list
198  for p in properties:
199  if p in skyMapKeys:
200  try:
201  substitutions[p] = dataId[p]
202  removed.add(p)
203  except KeyError:
204  raise RuntimeError(
205  "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
206  )
207  else:
208  substitutions[p] = index
209  index += 1
210  # Can't actually remove while iterating above, so we do it here.
211  for p in removed:
212  properties.remove(p)
213 
214  fastPath = True
215  for p in properties:
216  if p not in ('filter', 'expTime', 'taiObs'):
217  fastPath = False
218  break
219  if fastPath and 'visit' in dataId and "raw" in self.tables:
220  lookupDataId = {'visit': dataId['visit']}
221  result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
222  else:
223  if dataId is not None:
224  for k, v in dataId.items():
225  if self.columns and k not in self.columns:
226  continue
227  if k == self.obsTimeName:
228  continue
229  if k in skyMapKeys:
230  continue
231  where.append((k, '?'))
232  values.append(v)
233  lookupDataId = {k[0]: v for k, v in zip(where, values)}
234  if self.range:
235  # format of self.range is ('?', isBetween-lowKey, isBetween-highKey)
236  # here we transform that to {(lowKey, highKey): value}
237  lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
238  result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
239  if not removed:
240  return result
241  # Iterate over the query results, re-inserting the skymap entries.
242  result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
243  for item in result]
244  return result
245 
246  def have(self, properties, dataId):
247  """Returns whether the provided data identifier has all
248  the properties in the provided list.
249  @param properties (list of strings) Properties required
250  @parm dataId (dict) Dataset identifier
251  @return (bool) True if all properties are present"""
252  for prop in properties:
253  if prop not in dataId:
254  return False
255  return True
256 
257  def need(self, properties, dataId):
258  """Ensures all properties in the provided list are present in
259  the data identifier, looking them up as needed. This is only
260  possible for the case where the data identifies a single
261  exposure.
262  @param properties (list of strings) Properties required
263  @param dataId (dict) Partial dataset identifier
264  @return (dict) copy of dataset identifier with enhanced values
265  """
266  newId = dataId.copy()
267  newProps = [] # Properties we don't already have
268  for prop in properties:
269  if prop not in newId:
270  newProps.append(prop)
271  if len(newProps) == 0:
272  return newId
273 
274  lookups = self.lookup(newProps, newId)
275  if len(lookups) != 1:
276  raise NoResults("No unique lookup for %s from %s: %d matches" %
277  (newProps, newId, len(lookups)),
278  self.datasetType, dataId)
279  for i, prop in enumerate(newProps):
280  newId[prop] = lookups[0][i]
281  return newId
282 
283 
284 def _formatMap(ch, k, datasetType):
285  """Convert a format character into a Python type."""
286  if ch in "diouxX":
287  return int
288  elif ch in "eEfFgG":
289  return float
290  elif ch in "crs":
291  return str
292  else:
293  raise RuntimeError("Unexpected format specifier %s"
294  " for field %s in template for dataset %s" %
295  (ch, k, datasetType))
296 
297 
299  """ImageMapping is a Mapping subclass for non-camera images."""
300 
301  def __init__(self, datasetType, policy, registry, root, **kwargs):
302  """Constructor for Mapping class.
303  @param datasetType (string)
304  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
305  Mapping Policy
306  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
307  @param root (string) Path of root directory"""
308  if isinstance(policy, pexPolicy.Policy):
309  policy = Policy(policy)
310  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
311  self.columns = policy.asArray('columns') if 'columns' in policy else None
312 
313 
315  """ExposureMapping is a Mapping subclass for normal exposures."""
316 
317  def __init__(self, datasetType, policy, registry, root, **kwargs):
318  """Constructor for Mapping class.
319  @param datasetType (string)
320  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
321  Mapping Policy
322  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
323  @param root (string) Path of root directory"""
324  if isinstance(policy, pexPolicy.Policy):
325  policy = Policy(policy)
326  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
327  self.columns = policy.asArray('columns') if 'columns' in policy else None
328 
329  def standardize(self, mapper, item, dataId):
330  return mapper._standardizeExposure(self, item, dataId)
331 
332 
334  """CalibrationMapping is a Mapping subclass for calibration-type products.
335 
336  The difference is that data properties in the query or template
337  can be looked up using a reference Mapping in addition to this one.
338 
339  CalibrationMapping Policies can contain the following:
340 
341  reference (string, optional): a list of tables for finding missing dataset
342  identifier components (including the observation time, if a validity range
343  is required) in the exposure registry; note that the "tables" entry refers
344  to the calibration registry
345 
346  refCols (string, optional): a list of dataset properties required from the
347  reference tables for lookups in the calibration registry
348 
349  validRange (bool): true if the calibration dataset has a validity range
350  specified by a column in the tables of the reference dataset in the
351  exposure registry) and two columns in the tables of this calibration
352  dataset in the calibration registry)
353 
354  obsTimeName (string, optional): the name of the column in the reference
355  dataset tables containing the observation time (default "taiObs")
356 
357  validStartName (string, optional): the name of the column in the
358  calibration dataset tables containing the start of the validity range
359  (default "validStart")
360 
361  validEndName (string, optional): the name of the column in the
362  calibration dataset tables containing the end of the validity range
363  (default "validEnd") """
364 
365  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
366  """Constructor for Mapping class.
367  @param datasetType (string)
368  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
369  Mapping Policy
370  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
371  @param calibRegistry (lsst.obs.base.Registry) Registry for calibration metadata lookups
372  @param calibRoot (string) Path of calibration root directory
373  @param dataRoot. (string) Path of data root directory; used for outputs only
374  """
375  if isinstance(policy, pexPolicy.Policy):
376  policy = Policy(policy)
377  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
378  self.reference = policy.asArray("reference") if "reference" in policy else None
379  self.refCols = policy.asArray("refCols") if "refCols" in policy else None
380  self.refRegistry = registry
381  self.dataRoot = dataRoot
382  if "validRange" in policy and policy["validRange"]:
383  self.range = ("?", policy["validStartName"], policy["validEndName"])
384  if "columns" in policy:
385  self.columns = policy.asArray("columns")
386  if "filter" in policy:
387  self.setFilter = policy["filter"]
388  self.metadataKeys = None
389  if "metadataKey" in policy:
390  self.metadataKeys = policy.asArray("metadataKey")
391 
392  def map(self, mapper, dataId, write=False):
393  location = Mapping.map(self, mapper, dataId, write=write)
394  # Want outputs to be in the output directory
395  if write and self.dataRoot:
396  location.storage = self.dataRoot
397  return location
398 
399  def lookup(self, properties, dataId):
400  """Look up properties for in a metadata registry given a partial
401  dataset identifier.
402  @param properties (list of strings)
403  @param dataId (dict) Dataset identifier
404  @return (list of tuples) values of properties"""
405 
406 # Either look up taiObs in reference and then all in calibRegistry
407 # Or look up all in registry
408 
409  newId = dataId.copy()
410  if self.reference is not None:
411  where = []
412  values = []
413  for k, v in dataId.items():
414  if self.refCols and k not in self.refCols:
415  continue
416  where.append(k)
417  values.append(v)
418 
419  # Columns we need from the regular registry
420  if self.columns is not None:
421  columns = set(self.columns)
422  for k in dataId.keys():
423  columns.discard(k)
424  else:
425  columns = set(properties)
426 
427  if not columns:
428  # Nothing to lookup in reference registry; continue with calib registry
429  return Mapping.lookup(self, properties, newId)
430 
431  lookupDataId = dict(zip(where, values))
432  lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
433  if len(lookups) != 1:
434  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
435  (columns, dataId, len(lookups)))
436  if columns == set(properties):
437  # Have everything we need
438  return lookups
439  for i, prop in enumerate(columns):
440  newId[prop] = lookups[0][i]
441  return Mapping.lookup(self, properties, newId)
442 
443  def standardize(self, mapper, item, dataId):
444  return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
445 
446 
448  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
449  be retrieved by the standard daf_persistence mechanism.
450 
451  The differences are that the Storage type must be specified and no
452  Exposure standardization is performed.
453 
454  The "storage" entry in the Policy is mandatory; the "tables" entry is
455  optional; no "level" entry is allowed. """
456 
457  def __init__(self, datasetType, policy, registry, root, **kwargs):
458  """Constructor for DatasetMapping class.
459  @param[in,out] mapper (lsst.daf.persistence.Mapper) Mapper object
460  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
461  Mapping Policy
462  @param datasetType (string)
463  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
464  @param root (string) Path of root directory"""
465  if isinstance(policy, pexPolicy.Policy):
466  policy = Policy(policy)
467  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
468  self.storage = policy["storage"] # Storage type