lsst.obs.base  13.0-55-gb064ced+8
mapping.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 from builtins import zip
24 from builtins import object
25 from collections import OrderedDict
26 import os
27 import re
28 from lsst.daf.base import PropertySet
29 from lsst.daf.persistence import ButlerLocation, NoResults
30 from lsst.daf.persistence.policy import Policy
31 import lsst.pex.policy as pexPolicy
32 
33 """This module defines the Mapping base class."""
34 
35 
36 class Mapping(object):
37 
38  """Mapping is a base class for all mappings. Mappings are used by
39  the Mapper to map (determine a path to some data given some
40  identifiers) and standardize (convert data into some standard
41  format or type) data, and to query the associated registry to see
42  what data is available.
43 
44  Subclasses must specify self.storage or else override self.map().
45 
46  Public methods: lookup, have, need, getKeys, map
47 
48  Mappings are specified mainly by policy. A Mapping policy should
49  consist of:
50 
51  template (string): a Python string providing the filename for that
52  particular dataset type based on some data identifiers. In the
53  case of redundancy in the path (e.g., file uniquely specified by
54  the exposure number, but filter in the path), the
55  redundant/dependent identifiers can be looked up in the registry.
56 
57  python (string): the Python type for the retrieved data (e.g.
58  lsst.afw.image.ExposureF)
59 
60  persistable (string): the Persistable registration for the on-disk data
61  (e.g. ImageU)
62 
63  storage (string, optional): Storage type for this dataset type (e.g.
64  "BoostStorage")
65 
66  level (string, optional): the level in the camera hierarchy at which the
67  data is stored (Amp, Ccd or skyTile), if relevant
68 
69  tables (string, optional): a whitespace-delimited list of tables in the
70  registry that can be NATURAL JOIN-ed to look up additional
71  information. """
72 
73  def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
74  """Constructor for Mapping class.
75  @param datasetType (string)
76  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
77  Mapping Policy
78  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
79  @param rootStorage (Storage subclass instance) Interface to persisted repository data
80  @param provided (list of strings) Keys provided by the mapper
81  """
82 
83  if policy is None:
84  raise RuntimeError("No policy provided for mapping")
85 
86  if isinstance(policy, pexPolicy.Policy):
87  policy = Policy(policy)
88 
89  self.datasetType = datasetType
90  self.registry = registry
91  self.rootStorage = rootStorage
92 
93  self._template = policy['template'] # Template path
94  # in most cases, the template can not be used if it is empty, and is accessed via a property that will
95  # raise if it is used while `not self._template`. In this case we *do* allow it to be empty, for the
96  # purpose of fetching the key dict so that the mapping can be constructed, so that it can raise if
97  # it's invalid. I know it's a little odd, but it allows this template check to be introduced without a
98  # major refactor.
99  if self._template:
100  self.keyDict = dict([
101  (k, _formatMap(v, k, datasetType))
102  for k, v in
103  re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
104  ])
105  else:
106  self.keyDict = {}
107  if provided is not None:
108  for p in provided:
109  if p in self.keyDict:
110  del self.keyDict[p]
111  self.python = policy['python'] # Python type
112  self.persistable = policy['persistable'] # Persistable type
113  self.storage = policy['storage']
114  if 'level' in policy:
115  self.level = policy['level'] # Level in camera hierarchy
116  if 'tables' in policy:
117  self.tables = policy.asArray('tables')
118  else:
119  self.tables = None
120  self.range = None
121  self.columns = None
122  self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
123  self.recipe = policy['recipe'] if 'recipe' in policy else 'default'
124 
125  @property
126  def template(self):
127  if self._template: # template must not be an empty string or None
128  return self._template
129  else:
130  raise RuntimeError("Template is not defined for the {} dataset type, ".format(self.datasetType) +
131  "it must be set before it can be used.")
132 
133  def keys(self):
134  """Return the dict of keys and value types required for this mapping."""
135  return self.keyDict
136 
137  def map(self, mapper, dataId, write=False):
138  """Standard implementation of map function.
139  @param mapper (lsst.daf.persistence.Mapper)
140  @param dataId (dict) Dataset identifier
141  @return (lsst.daf.persistence.ButlerLocation)"""
142  actualId = self.need(iter(self.keyDict.keys()), dataId)
143  usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
144  path = mapper._mapActualToPath(self.template, actualId)
145  if os.path.isabs(path):
146  raise RuntimeError("Mapped path should not be absolute.")
147  if not write:
148  # This allows mapped files to be compressed, ending in .gz or .fz, without any indication from the
149  # policy that the file should be compressed, easily allowing repositories to contain a combination
150  # of comporessed and not-compressed files.
151  # If needed we can add a policy flag to allow compressed files or not, and perhaps a list of
152  # allowed extensions that may exist at the end of the template.
153  for ext in (None, '.gz', '.fz'):
154  if ext and path.endswith(ext):
155  continue # if the path already ends with the extension
156  extPath = path + ext if ext else path
157  newPath = self.rootStorage.instanceSearch(extPath)
158  if newPath:
159  path = newPath
160  break
161  assert path, "Fully-qualified filename is empty."
162 
163  addFunc = "add_" + self.datasetType # Name of method for additionalData
164  if hasattr(mapper, addFunc):
165  addFunc = getattr(mapper, addFunc)
166  additionalData = addFunc(self.datasetType, actualId)
167  assert isinstance(additionalData, PropertySet), \
168  "Bad type for returned data: %s" (type(additionalData),)
169  else:
170  additionalData = None
171 
172  return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
173  locationList=path, dataId=actualId.copy(), mapper=mapper,
174  storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType,
175  additionalData=additionalData)
176 
177  def lookup(self, properties, dataId):
178  """Look up properties for in a metadata registry given a partial
179  dataset identifier.
180  @param properties (list of strings)
181  @param dataId (dict) Dataset identifier
182  @return (list of tuples) values of properties"""
183 
184  if self.registry is None:
185  raise RuntimeError("No registry for lookup")
186 
187  skyMapKeys = ("tract", "patch")
188 
189  where = []
190  values = []
191 
192  # Prepare to remove skymap entries from properties list. These must
193  # be in the data ID, so we store which ones we're removing and create
194  # an OrderedDict that tells us where to re-insert them. That maps the
195  # name of the property to either its index in the properties list
196  # *after* the skymap ones have been removed (for entries that aren't
197  # skymap ones) or the value from the data ID (for those that are).
198  removed = set()
199  substitutions = OrderedDict()
200  index = 0
201  properties = list(properties) # don't modify the original list
202  for p in properties:
203  if p in skyMapKeys:
204  try:
205  substitutions[p] = dataId[p]
206  removed.add(p)
207  except KeyError:
208  raise RuntimeError(
209  "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
210  )
211  else:
212  substitutions[p] = index
213  index += 1
214  # Can't actually remove while iterating above, so we do it here.
215  for p in removed:
216  properties.remove(p)
217 
218  fastPath = True
219  for p in properties:
220  if p not in ('filter', 'expTime', 'taiObs'):
221  fastPath = False
222  break
223  if fastPath and 'visit' in dataId and "raw" in self.tables:
224  lookupDataId = {'visit': dataId['visit']}
225  result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
226  else:
227  if dataId is not None:
228  for k, v in dataId.items():
229  if self.columns and k not in self.columns:
230  continue
231  if k == self.obsTimeName:
232  continue
233  if k in skyMapKeys:
234  continue
235  where.append((k, '?'))
236  values.append(v)
237  lookupDataId = {k[0]: v for k, v in zip(where, values)}
238  if self.range:
239  # format of self.range is ('?', isBetween-lowKey, isBetween-highKey)
240  # here we transform that to {(lowKey, highKey): value}
241  lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
242  result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
243  if not removed:
244  return result
245  # Iterate over the query results, re-inserting the skymap entries.
246  result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
247  for item in result]
248  return result
249 
250  def have(self, properties, dataId):
251  """Returns whether the provided data identifier has all
252  the properties in the provided list.
253  @param properties (list of strings) Properties required
254  @parm dataId (dict) Dataset identifier
255  @return (bool) True if all properties are present"""
256  for prop in properties:
257  if prop not in dataId:
258  return False
259  return True
260 
261  def need(self, properties, dataId):
262  """Ensures all properties in the provided list are present in
263  the data identifier, looking them up as needed. This is only
264  possible for the case where the data identifies a single
265  exposure.
266  @param properties (list of strings) Properties required
267  @param dataId (dict) Partial dataset identifier
268  @return (dict) copy of dataset identifier with enhanced values
269  """
270  newId = dataId.copy()
271  newProps = [] # Properties we don't already have
272  for prop in properties:
273  if prop not in newId:
274  newProps.append(prop)
275  if len(newProps) == 0:
276  return newId
277 
278  lookups = self.lookup(newProps, newId)
279  if len(lookups) != 1:
280  raise NoResults("No unique lookup for %s from %s: %d matches" %
281  (newProps, newId, len(lookups)),
282  self.datasetType, dataId)
283  for i, prop in enumerate(newProps):
284  newId[prop] = lookups[0][i]
285  return newId
286 
287 
288 def _formatMap(ch, k, datasetType):
289  """Convert a format character into a Python type."""
290  if ch in "diouxX":
291  return int
292  elif ch in "eEfFgG":
293  return float
294  elif ch in "crs":
295  return str
296  else:
297  raise RuntimeError("Unexpected format specifier %s"
298  " for field %s in template for dataset %s" %
299  (ch, k, datasetType))
300 
301 
303  """ImageMapping is a Mapping subclass for non-camera images."""
304 
305  def __init__(self, datasetType, policy, registry, root, **kwargs):
306  """Constructor for Mapping class.
307  @param datasetType (string)
308  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
309  Mapping Policy
310  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
311  @param root (string) Path of root directory"""
312  if isinstance(policy, pexPolicy.Policy):
313  policy = Policy(policy)
314  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
315  self.columns = policy.asArray('columns') if 'columns' in policy else None
316 
317 
319  """ExposureMapping is a Mapping subclass for normal exposures."""
320 
321  def __init__(self, datasetType, policy, registry, root, **kwargs):
322  """Constructor for Mapping class.
323  @param datasetType (string)
324  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
325  Mapping Policy
326  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
327  @param root (string) Path of root directory"""
328  if isinstance(policy, pexPolicy.Policy):
329  policy = Policy(policy)
330  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
331  self.columns = policy.asArray('columns') if 'columns' in policy else None
332 
333  def standardize(self, mapper, item, dataId):
334  return mapper._standardizeExposure(self, item, dataId)
335 
336 
338  """CalibrationMapping is a Mapping subclass for calibration-type products.
339 
340  The difference is that data properties in the query or template
341  can be looked up using a reference Mapping in addition to this one.
342 
343  CalibrationMapping Policies can contain the following:
344 
345  reference (string, optional): a list of tables for finding missing dataset
346  identifier components (including the observation time, if a validity range
347  is required) in the exposure registry; note that the "tables" entry refers
348  to the calibration registry
349 
350  refCols (string, optional): a list of dataset properties required from the
351  reference tables for lookups in the calibration registry
352 
353  validRange (bool): true if the calibration dataset has a validity range
354  specified by a column in the tables of the reference dataset in the
355  exposure registry) and two columns in the tables of this calibration
356  dataset in the calibration registry)
357 
358  obsTimeName (string, optional): the name of the column in the reference
359  dataset tables containing the observation time (default "taiObs")
360 
361  validStartName (string, optional): the name of the column in the
362  calibration dataset tables containing the start of the validity range
363  (default "validStart")
364 
365  validEndName (string, optional): the name of the column in the
366  calibration dataset tables containing the end of the validity range
367  (default "validEnd") """
368 
369  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
370  """Constructor for Mapping class.
371  @param datasetType (string)
372  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
373  Mapping Policy
374  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
375  @param calibRegistry (lsst.obs.base.Registry) Registry for calibration metadata lookups
376  @param calibRoot (string) Path of calibration root directory
377  @param dataRoot. (string) Path of data root directory; used for outputs only
378  """
379  if isinstance(policy, pexPolicy.Policy):
380  policy = Policy(policy)
381  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
382  self.reference = policy.asArray("reference") if "reference" in policy else None
383  self.refCols = policy.asArray("refCols") if "refCols" in policy else None
384  self.refRegistry = registry
385  self.dataRoot = dataRoot
386  if "validRange" in policy and policy["validRange"]:
387  self.range = ("?", policy["validStartName"], policy["validEndName"])
388  if "columns" in policy:
389  self.columns = policy.asArray("columns")
390  if "filter" in policy:
391  self.setFilter = policy["filter"]
392  self.metadataKeys = None
393  if "metadataKey" in policy:
394  self.metadataKeys = policy.asArray("metadataKey")
395 
396  def map(self, mapper, dataId, write=False):
397  location = Mapping.map(self, mapper, dataId, write=write)
398  # Want outputs to be in the output directory
399  if write and self.dataRoot:
400  location.storage = self.dataRoot
401  return location
402 
403  def lookup(self, properties, dataId):
404  """Look up properties for in a metadata registry given a partial
405  dataset identifier.
406  @param properties (list of strings)
407  @param dataId (dict) Dataset identifier
408  @return (list of tuples) values of properties"""
409 
410 # Either look up taiObs in reference and then all in calibRegistry
411 # Or look up all in registry
412 
413  newId = dataId.copy()
414  if self.reference is not None:
415  where = []
416  values = []
417  for k, v in dataId.items():
418  if self.refCols and k not in self.refCols:
419  continue
420  where.append(k)
421  values.append(v)
422 
423  # Columns we need from the regular registry
424  if self.columns is not None:
425  columns = set(self.columns)
426  for k in dataId.keys():
427  columns.discard(k)
428  else:
429  columns = set(properties)
430 
431  if not columns:
432  # Nothing to lookup in reference registry; continue with calib registry
433  return Mapping.lookup(self, properties, newId)
434 
435  lookupDataId = dict(zip(where, values))
436  lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
437  if len(lookups) != 1:
438  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
439  (columns, dataId, len(lookups)))
440  if columns == set(properties):
441  # Have everything we need
442  return lookups
443  for i, prop in enumerate(columns):
444  newId[prop] = lookups[0][i]
445  return Mapping.lookup(self, properties, newId)
446 
447  def standardize(self, mapper, item, dataId):
448  return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
449 
450 
452  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
453  be retrieved by the standard daf_persistence mechanism.
454 
455  The differences are that the Storage type must be specified and no
456  Exposure standardization is performed.
457 
458  The "storage" entry in the Policy is mandatory; the "tables" entry is
459  optional; no "level" entry is allowed. """
460 
461  def __init__(self, datasetType, policy, registry, root, **kwargs):
462  """Constructor for DatasetMapping class.
463  @param[in,out] mapper (lsst.daf.persistence.Mapper) Mapper object
464  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
465  Mapping Policy
466  @param datasetType (string)
467  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
468  @param root (string) Path of root directory"""
469  if isinstance(policy, pexPolicy.Policy):
470  policy = Policy(policy)
471  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
472  self.storage = policy["storage"] # Storage type
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:321
def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, kwargs)
Definition: mapping.py:369
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:305
def standardize(self, mapper, item, dataId)
Definition: mapping.py:447
def map(self, mapper, dataId, write=False)
Definition: mapping.py:396
def have(self, properties, dataId)
Definition: mapping.py:250
def standardize(self, mapper, item, dataId)
Definition: mapping.py:333
def need(self, properties, dataId)
Definition: mapping.py:261
def lookup(self, properties, dataId)
Definition: mapping.py:403
def __init__(self, datasetType, policy, registry, root, kwargs)
Definition: mapping.py:461
def lookup(self, properties, dataId)
Definition: mapping.py:177
def __init__(self, datasetType, policy, registry, rootStorage, provided=None)
Definition: mapping.py:73
def map(self, mapper, dataId, write=False)
Definition: mapping.py:137