lsst.obs.base  13.0-27-g9d555d8+3
 All Classes Namespaces Files Functions Variables
mapping.py
Go to the documentation of this file.
1 #
2 # LSST Data Management System
3 # Copyright 2008, 2009, 2010 LSST Corporation.
4 #
5 # This product includes software developed by the
6 # LSST Project (http://www.lsst.org/).
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the LSST License Statement and
19 # the GNU General Public License along with this program. If not,
20 # see <http://www.lsstcorp.org/LegalNotices/>.
21 #
22 
23 from builtins import zip
24 from builtins import object
25 from collections import OrderedDict
26 import os
27 import re
28 from lsst.daf.persistence import ButlerLocation, NoResults
29 from lsst.daf.persistence.policy import Policy
30 import lsst.pex.policy as pexPolicy
31 
32 """This module defines the Mapping base class."""
33 
34 
35 class Mapping(object):
36 
37  """Mapping is a base class for all mappings. Mappings are used by
38  the Mapper to map (determine a path to some data given some
39  identifiers) and standardize (convert data into some standard
40  format or type) data, and to query the associated registry to see
41  what data is available.
42 
43  Subclasses must specify self.storage or else override self.map().
44 
45  Public methods: lookup, have, need, getKeys, map
46 
47  Mappings are specified mainly by policy. A Mapping policy should
48  consist of:
49 
50  template (string): a Python string providing the filename for that
51  particular dataset type based on some data identifiers. In the
52  case of redundancy in the path (e.g., file uniquely specified by
53  the exposure number, but filter in the path), the
54  redundant/dependent identifiers can be looked up in the registry.
55 
56  python (string): the Python type for the retrieved data (e.g.
57  lsst.afw.image.ExposureF)
58 
59  persistable (string): the Persistable registration for the on-disk data
60  (e.g. ImageU)
61 
62  storage (string, optional): Storage type for this dataset type (e.g.
63  "BoostStorage")
64 
65  level (string, optional): the level in the camera hierarchy at which the
66  data is stored (Amp, Ccd or skyTile), if relevant
67 
68  tables (string, optional): a whitespace-delimited list of tables in the
69  registry that can be NATURAL JOIN-ed to look up additional
70  information. """
71 
72  def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
73  """Constructor for Mapping class.
74  @param datasetType (string)
75  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
76  Mapping Policy
77  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
78  @param rootStorage (Storage subclass instance) Interface to persisted repository data
79  @param provided (list of strings) Keys provided by the mapper
80  """
81 
82  if policy is None:
83  raise RuntimeError("No policy provided for mapping")
84 
85  if isinstance(policy, pexPolicy.Policy):
86  policy = Policy(policy)
87 
88  self.datasetType = datasetType
89  self.registry = registry
90  self.rootStorage = rootStorage
91 
92  self.template = policy['template'] # Template path
93  self.keyDict = dict([
94  (k, _formatMap(v, k, datasetType))
95  for k, v in
96  re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
97  ])
98  if provided is not None:
99  for p in provided:
100  if p in self.keyDict:
101  del self.keyDict[p]
102  self.python = policy['python'] # Python type
103  self.persistable = policy['persistable'] # Persistable type
104  self.storage = policy['storage']
105  if 'level' in policy:
106  self.level = policy['level'] # Level in camera hierarchy
107  if 'tables' in policy:
108  self.tables = policy.asArray('tables')
109  else:
110  self.tables = None
111  self.range = None
112  self.columns = None
113  self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
114 
115  def keys(self):
116  """Return the dict of keys and value types required for this mapping."""
117  return self.keyDict
118 
119  def map(self, mapper, dataId, write=False):
120  """Standard implementation of map function.
121  @param mapper (lsst.daf.persistence.Mapper)
122  @param dataId (dict) Dataset identifier
123  @return (lsst.daf.persistence.ButlerLocation)"""
124  actualId = self.need(iter(self.keyDict.keys()), dataId)
125  usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
126  path = mapper._mapActualToPath(self.template, actualId)
127  if os.path.isabs(path):
128  raise RuntimeError("Mapped path should not be absolute.")
129  if not write:
130  # This allows mapped files to be compressed, ending in .gz or .fz, without any indication from the
131  # policy that the file should be compressed, easily allowing repositories to contain a combination
132  # of comporessed and not-compressed files.
133  # If needed we can add a policy flag to allow compressed files or not, and perhaps a list of
134  # allowed extensions that may exist at the end of the template.
135  for ext in (None, '.gz', '.fz'):
136  if ext and path.endswith(ext):
137  continue # if the path already ends with the extension
138  extPath = path + ext if ext else path
139  newPath = self.rootStorage.instanceSearch(extPath)
140  if newPath:
141  path = newPath
142  break
143  assert path, "Fully-qualified filename is empty."
144 
145  addFunc = "add_" + self.datasetType # Name of method for additionalData
146  if hasattr(mapper, addFunc):
147  addFunc = getattr(mapper, addFunc)
148  additionalData = addFunc(actualId)
149  assert isinstance(additionalData, dict), "Bad type for returned data"
150  else:
151  additionalData = actualId.copy()
152 
153  return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
154  locationList=path, dataId=additionalData, mapper=mapper,
155  storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType)
156 
157  def lookup(self, properties, dataId):
158  """Look up properties for in a metadata registry given a partial
159  dataset identifier.
160  @param properties (list of strings)
161  @param dataId (dict) Dataset identifier
162  @return (list of tuples) values of properties"""
163 
164  if self.registry is None:
165  raise RuntimeError("No registry for lookup")
166 
167  skyMapKeys = ("tract", "patch")
168 
169  where = []
170  values = []
171 
172  # Prepare to remove skymap entries from properties list. These must
173  # be in the data ID, so we store which ones we're removing and create
174  # an OrderedDict that tells us where to re-insert them. That maps the
175  # name of the property to either its index in the properties list
176  # *after* the skymap ones have been removed (for entries that aren't
177  # skymap ones) or the value from the data ID (for those that are).
178  removed = set()
179  substitutions = OrderedDict()
180  index = 0
181  properties = list(properties) # don't modify the original list
182  for p in properties:
183  if p in skyMapKeys:
184  try:
185  substitutions[p] = dataId[p]
186  removed.add(p)
187  except KeyError:
188  raise RuntimeError(
189  "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
190  )
191  else:
192  substitutions[p] = index
193  index += 1
194  # Can't actually remove while iterating above, so we do it here.
195  for p in removed:
196  properties.remove(p)
197 
198  fastPath = True
199  for p in properties:
200  if p not in ('filter', 'expTime', 'taiObs'):
201  fastPath = False
202  break
203  if fastPath and 'visit' in dataId and "raw" in self.tables:
204  lookupDataId = {'visit': dataId['visit']}
205  result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
206  else:
207  if dataId is not None:
208  for k, v in dataId.items():
209  if self.columns and k not in self.columns:
210  continue
211  if k == self.obsTimeName:
212  continue
213  if k in skyMapKeys:
214  continue
215  where.append((k, '?'))
216  values.append(v)
217  lookupDataId = {k[0]: v for k, v in zip(where, values)}
218  if self.range:
219  # format of self.range is ('?', isBetween-lowKey, isBetween-highKey)
220  # here we transform that to {(lowKey, highKey): value}
221  lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
222  result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
223  if not removed:
224  return result
225  # Iterate over the query results, re-inserting the skymap entries.
226  result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
227  for item in result]
228  return result
229 
230  def have(self, properties, dataId):
231  """Returns whether the provided data identifier has all
232  the properties in the provided list.
233  @param properties (list of strings) Properties required
234  @parm dataId (dict) Dataset identifier
235  @return (bool) True if all properties are present"""
236  for prop in properties:
237  if prop not in dataId:
238  return False
239  return True
240 
241  def need(self, properties, dataId):
242  """Ensures all properties in the provided list are present in
243  the data identifier, looking them up as needed. This is only
244  possible for the case where the data identifies a single
245  exposure.
246  @param properties (list of strings) Properties required
247  @param dataId (dict) Partial dataset identifier
248  @return (dict) copy of dataset identifier with enhanced values
249  """
250  newId = dataId.copy()
251  newProps = [] # Properties we don't already have
252  for prop in properties:
253  if prop not in newId:
254  newProps.append(prop)
255  if len(newProps) == 0:
256  return newId
257 
258  lookups = self.lookup(newProps, newId)
259  if len(lookups) != 1:
260  raise NoResults("No unique lookup for %s from %s: %d matches" %
261  (newProps, newId, len(lookups)),
262  self.datasetType, dataId)
263  for i, prop in enumerate(newProps):
264  newId[prop] = lookups[0][i]
265  return newId
266 
267 
268 def _formatMap(ch, k, datasetType):
269  """Convert a format character into a Python type."""
270  if ch in "diouxX":
271  return int
272  elif ch in "eEfFgG":
273  return float
274  elif ch in "crs":
275  return str
276  else:
277  raise RuntimeError("Unexpected format specifier %s"
278  " for field %s in template for dataset %s" %
279  (ch, k, datasetType))
280 
281 
283  """ImageMapping is a Mapping subclass for non-camera images."""
284 
285  def __init__(self, datasetType, policy, registry, root, **kwargs):
286  """Constructor for Mapping class.
287  @param datasetType (string)
288  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
289  Mapping Policy
290  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
291  @param root (string) Path of root directory"""
292  if isinstance(policy, pexPolicy.Policy):
293  policy = Policy(policy)
294  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
295  self.columns = policy.asArray('columns') if 'columns' in policy else None
296 
297 
299  """ExposureMapping is a Mapping subclass for normal exposures."""
300 
301  def __init__(self, datasetType, policy, registry, root, **kwargs):
302  """Constructor for Mapping class.
303  @param datasetType (string)
304  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
305  Mapping Policy
306  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
307  @param root (string) Path of root directory"""
308  if isinstance(policy, pexPolicy.Policy):
309  policy = Policy(policy)
310  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
311  self.columns = policy.asArray('columns') if 'columns' in policy else None
312 
313  def standardize(self, mapper, item, dataId):
314  return mapper._standardizeExposure(self, item, dataId)
315 
316 
318  """CalibrationMapping is a Mapping subclass for calibration-type products.
319 
320  The difference is that data properties in the query or template
321  can be looked up using a reference Mapping in addition to this one.
322 
323  CalibrationMapping Policies can contain the following:
324 
325  reference (string, optional): a list of tables for finding missing dataset
326  identifier components (including the observation time, if a validity range
327  is required) in the exposure registry; note that the "tables" entry refers
328  to the calibration registry
329 
330  refCols (string, optional): a list of dataset properties required from the
331  reference tables for lookups in the calibration registry
332 
333  validRange (bool): true if the calibration dataset has a validity range
334  specified by a column in the tables of the reference dataset in the
335  exposure registry) and two columns in the tables of this calibration
336  dataset in the calibration registry)
337 
338  obsTimeName (string, optional): the name of the column in the reference
339  dataset tables containing the observation time (default "taiObs")
340 
341  validStartName (string, optional): the name of the column in the
342  calibration dataset tables containing the start of the validity range
343  (default "validStart")
344 
345  validEndName (string, optional): the name of the column in the
346  calibration dataset tables containing the end of the validity range
347  (default "validEnd") """
348 
349  def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, **kwargs):
350  """Constructor for Mapping class.
351  @param datasetType (string)
352  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
353  Mapping Policy
354  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
355  @param calibRegistry (lsst.obs.base.Registry) Registry for calibration metadata lookups
356  @param calibRoot (string) Path of calibration root directory"""
357  if isinstance(policy, pexPolicy.Policy):
358  policy = Policy(policy)
359  Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
360  self.reference = policy.asArray("reference") if "reference" in policy else None
361  self.refCols = policy.asArray("refCols") if "refCols" in policy else None
362  self.refRegistry = registry
363  if "validRange" in policy and policy["validRange"]:
364  self.range = ("?", policy["validStartName"], policy["validEndName"])
365  if "columns" in policy:
366  self.columns = policy.asArray("columns")
367  if "filter" in policy:
368  self.setFilter = policy["filter"]
369  self.metadataKeys = None
370  if "metadataKey" in policy:
371  self.metadataKeys = policy.asArray("metadataKey")
372 
373  def lookup(self, properties, dataId):
374  """Look up properties for in a metadata registry given a partial
375  dataset identifier.
376  @param properties (list of strings)
377  @param dataId (dict) Dataset identifier
378  @return (list of tuples) values of properties"""
379 
380 # Either look up taiObs in reference and then all in calibRegistry
381 # Or look up all in registry
382 
383  newId = dataId.copy()
384  if self.reference is not None:
385  where = []
386  values = []
387  for k, v in dataId.items():
388  if self.refCols and k not in self.refCols:
389  continue
390  where.append(k)
391  values.append(v)
392 
393  # Columns we need from the regular registry
394  if self.columns is not None:
395  columns = set(self.columns)
396  for k in dataId.keys():
397  columns.discard(k)
398  else:
399  columns = set(properties)
400 
401  if not columns:
402  # Nothing to lookup in reference registry; continue with calib registry
403  return Mapping.lookup(self, properties, newId)
404 
405  lookupDataId = dict(zip(where, values))
406  lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
407  if len(lookups) != 1:
408  raise RuntimeError("No unique lookup for %s from %s: %d matches" %
409  (columns, dataId, len(lookups)))
410  if columns == set(properties):
411  # Have everything we need
412  return lookups
413  for i, prop in enumerate(columns):
414  newId[prop] = lookups[0][i]
415  return Mapping.lookup(self, properties, newId)
416 
417  def standardize(self, mapper, item, dataId):
418  return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
419 
420 
422  """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
423  be retrieved by the standard daf_persistence mechanism.
424 
425  The differences are that the Storage type must be specified and no
426  Exposure standardization is performed.
427 
428  The "storage" entry in the Policy is mandatory; the "tables" entry is
429  optional; no "level" entry is allowed. """
430 
431  def __init__(self, datasetType, policy, registry, root, **kwargs):
432  """Constructor for DatasetMapping class.
433  @param[in,out] mapper (lsst.daf.persistence.Mapper) Mapper object
434  @param policy (daf_persistence.Policy, or pexPolicy.Policy (only for backward compatibility))
435  Mapping Policy
436  @param datasetType (string)
437  @param registry (lsst.obs.base.Registry) Registry for metadata lookups
438  @param root (string) Path of root directory"""
439  if isinstance(policy, pexPolicy.Policy):
440  policy = Policy(policy)
441  Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
442  self.storage = policy["storage"] # Storage type