Coverage for python/lsst/obs/base/mapping.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from collections import OrderedDict
23import os
24import re
25from lsst.daf.base import PropertySet
26from lsst.daf.persistence import ButlerLocation, NoResults
27from lsst.utils import doImport
28from lsst.afw.image import Exposure, MaskedImage, Image, DecoratedImage
30__all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"]
33class Mapping(object):
35 """Mapping is a base class for all mappings. Mappings are used by
36 the Mapper to map (determine a path to some data given some
37 identifiers) and standardize (convert data into some standard
38 format or type) data, and to query the associated registry to see
39 what data is available.
41 Subclasses must specify self.storage or else override self.map().
43 Public methods: lookup, have, need, getKeys, map
45 Mappings are specified mainly by policy. A Mapping policy should
46 consist of:
48 template (string): a Python string providing the filename for that
49 particular dataset type based on some data identifiers. In the
50 case of redundancy in the path (e.g., file uniquely specified by
51 the exposure number, but filter in the path), the
52 redundant/dependent identifiers can be looked up in the registry.
54 python (string): the Python type for the retrieved data (e.g.
55 lsst.afw.image.ExposureF)
57 persistable (string): the Persistable registration for the on-disk data
58 (e.g. ImageU)
60 storage (string, optional): Storage type for this dataset type (e.g.
61 "FitsStorage")
63 level (string, optional): the level in the camera hierarchy at which the
64 data is stored (Amp, Ccd or skyTile), if relevant
66 tables (string, optional): a whitespace-delimited list of tables in the
67 registry that can be NATURAL JOIN-ed to look up additional
68 information.
70 Parameters
71 ----------
72 datasetType : `str`
73 Butler dataset type to be mapped.
74 policy : `daf_persistence.Policy`
75 Mapping Policy.
76 registry : `lsst.obs.base.Registry`
77 Registry for metadata lookups.
78 rootStorage : Storage subclass instance
79 Interface to persisted repository data.
80 provided : `list` of `str`
81 Keys provided by the mapper.
82 """
84 def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
86 if policy is None:
87 raise RuntimeError("No policy provided for mapping")
89 self.datasetType = datasetType
90 self.registry = registry
91 self.rootStorage = rootStorage
93 self._template = policy['template'] # Template path
94 # in most cases, the template can not be used if it is empty, and is accessed via a property that will
95 # raise if it is used while `not self._template`. In this case we *do* allow it to be empty, for the
96 # purpose of fetching the key dict so that the mapping can be constructed, so that it can raise if
97 # it's invalid. I know it's a little odd, but it allows this template check to be introduced without a
98 # major refactor.
99 if self._template:
100 self.keyDict = dict([
101 (k, _formatMap(v, k, datasetType))
102 for k, v in
103 re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
104 ])
105 else:
106 self.keyDict = {}
107 if provided is not None:
108 for p in provided:
109 if p in self.keyDict:
110 del self.keyDict[p]
111 self.python = policy['python'] # Python type
112 self.persistable = policy['persistable'] # Persistable type
113 self.storage = policy['storage']
114 if 'level' in policy:
115 self.level = policy['level'] # Level in camera hierarchy
116 if 'tables' in policy:
117 self.tables = policy.asArray('tables')
118 else:
119 self.tables = None
120 self.range = None
121 self.columns = None
122 self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
123 self.recipe = policy['recipe'] if 'recipe' in policy else 'default'
125 @property
126 def template(self):
127 if self._template: # template must not be an empty string or None
128 return self._template
129 else:
130 raise RuntimeError(f"Template is not defined for the {self.datasetType} dataset type, "
131 "it must be set before it can be used.")
133 def keys(self):
134 """Return the dict of keys and value types required for this mapping."""
135 return self.keyDict
137 def map(self, mapper, dataId, write=False):
138 """Standard implementation of map function.
140 Parameters
141 ----------
142 mapper: `lsst.daf.persistence.Mapper`
143 Object to be mapped.
144 dataId: `dict`
145 Dataset identifier.
147 Returns
148 -------
149 lsst.daf.persistence.ButlerLocation
150 Location of object that was mapped.
151 """
152 actualId = self.need(iter(self.keyDict.keys()), dataId)
153 usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
154 path = mapper._mapActualToPath(self.template, actualId)
155 if os.path.isabs(path):
156 raise RuntimeError("Mapped path should not be absolute.")
157 if not write:
158 # This allows mapped files to be compressed, ending in .gz or .fz, without any indication from the
159 # policy that the file should be compressed, easily allowing repositories to contain a combination
160 # of comporessed and not-compressed files.
161 # If needed we can add a policy flag to allow compressed files or not, and perhaps a list of
162 # allowed extensions that may exist at the end of the template.
163 for ext in (None, '.gz', '.fz'):
164 if ext and path.endswith(ext):
165 continue # if the path already ends with the extension
166 extPath = path + ext if ext else path
167 newPath = self.rootStorage.instanceSearch(extPath)
168 if newPath:
169 path = newPath
170 break
171 assert path, "Fully-qualified filename is empty."
173 addFunc = "add_" + self.datasetType # Name of method for additionalData
174 if hasattr(mapper, addFunc):
175 addFunc = getattr(mapper, addFunc)
176 additionalData = addFunc(self.datasetType, actualId)
177 assert isinstance(additionalData, PropertySet), \
178 "Bad type for returned data: %s" (type(additionalData),)
179 else:
180 additionalData = None
182 return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
183 locationList=path, dataId=actualId.copy(), mapper=mapper,
184 storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType,
185 additionalData=additionalData)
187 def lookup(self, properties, dataId):
188 """Look up properties for in a metadata registry given a partial
189 dataset identifier.
191 Parameters
192 ----------
193 properties : `list` of `str`
194 What to look up.
195 dataId : `dict`
196 Dataset identifier
198 Returns
199 -------
200 `list` of `tuple`
201 Values of properties.
202 """
203 if self.registry is None:
204 raise RuntimeError("No registry for lookup")
206 skyMapKeys = ("tract", "patch")
208 where = []
209 values = []
211 # Prepare to remove skymap entries from properties list. These must
212 # be in the data ID, so we store which ones we're removing and create
213 # an OrderedDict that tells us where to re-insert them. That maps the
214 # name of the property to either its index in the properties list
215 # *after* the skymap ones have been removed (for entries that aren't
216 # skymap ones) or the value from the data ID (for those that are).
217 removed = set()
218 substitutions = OrderedDict()
219 index = 0
220 properties = list(properties) # don't modify the original list
221 for p in properties:
222 if p in skyMapKeys:
223 try:
224 substitutions[p] = dataId[p]
225 removed.add(p)
226 except KeyError:
227 raise RuntimeError(
228 "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
229 )
230 else:
231 substitutions[p] = index
232 index += 1
233 # Can't actually remove while iterating above, so we do it here.
234 for p in removed:
235 properties.remove(p)
237 fastPath = True
238 for p in properties:
239 if p not in ('filter', 'expTime', 'taiObs'):
240 fastPath = False
241 break
242 if fastPath and 'visit' in dataId and "raw" in self.tables:
243 lookupDataId = {'visit': dataId['visit']}
244 result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
245 else:
246 if dataId is not None:
247 for k, v in dataId.items():
248 if self.columns and k not in self.columns:
249 continue
250 if k == self.obsTimeName:
251 continue
252 if k in skyMapKeys:
253 continue
254 where.append((k, '?'))
255 values.append(v)
256 lookupDataId = {k[0]: v for k, v in zip(where, values)}
257 if self.range:
258 # format of self.range is ('?', isBetween-lowKey, isBetween-highKey)
259 # here we transform that to {(lowKey, highKey): value}
260 lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
261 result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
262 if not removed:
263 return result
264 # Iterate over the query results, re-inserting the skymap entries.
265 result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
266 for item in result]
267 return result
269 def have(self, properties, dataId):
270 """Returns whether the provided data identifier has all
271 the properties in the provided list.
273 Parameters
274 ----------
275 properties : `list of `str`
276 Properties required.
277 dataId : `dict`
278 Dataset identifier.
280 Returns
281 -------
282 bool
283 True if all properties are present.
284 """
285 for prop in properties:
286 if prop not in dataId:
287 return False
288 return True
290 def need(self, properties, dataId):
291 """Ensures all properties in the provided list are present in
292 the data identifier, looking them up as needed. This is only
293 possible for the case where the data identifies a single
294 exposure.
296 Parameters
297 ----------
298 properties : `list` of `str`
299 Properties required.
300 dataId : `dict`
301 Partial dataset identifier
303 Returns
304 -------
305 `dict`
306 Copy of dataset identifier with enhanced values.
307 """
308 newId = dataId.copy()
309 newProps = [] # Properties we don't already have
310 for prop in properties:
311 if prop not in newId:
312 newProps.append(prop)
313 if len(newProps) == 0:
314 return newId
316 lookups = self.lookup(newProps, newId)
317 if len(lookups) != 1:
318 raise NoResults("No unique lookup for %s from %s: %d matches" %
319 (newProps, newId, len(lookups)),
320 self.datasetType, dataId)
321 for i, prop in enumerate(newProps):
322 newId[prop] = lookups[0][i]
323 return newId
326def _formatMap(ch, k, datasetType):
327 """Convert a format character into a Python type."""
328 if ch in "diouxX":
329 return int
330 elif ch in "eEfFgG":
331 return float
332 elif ch in "crs":
333 return str
334 else:
335 raise RuntimeError("Unexpected format specifier %s"
336 " for field %s in template for dataset %s" %
337 (ch, k, datasetType))
340class ImageMapping(Mapping):
341 """ImageMapping is a Mapping subclass for non-camera images.
343 Parameters
344 ----------
345 datasetType : `str`
346 Butler dataset type to be mapped.
347 policy : `daf_persistence.Policy`
348 Mapping Policy.
349 registry : `lsst.obs.base.Registry`
350 Registry for metadata lookups
351 root : `str`
352 Path of root directory
353 """
355 def __init__(self, datasetType, policy, registry, root, **kwargs):
356 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
357 self.columns = policy.asArray('columns') if 'columns' in policy else None
360class ExposureMapping(Mapping):
361 """ExposureMapping is a Mapping subclass for normal exposures.
363 Parameters
364 ----------
365 datasetType : `str`
366 Butler dataset type to be mapped.
367 policy : `daf_persistence.Policy`
368 Mapping Policy.
369 registry : `lsst.obs.base.Registry`
370 Registry for metadata lookups
371 root : `str`
372 Path of root directory
373 """
375 def __init__(self, datasetType, policy, registry, root, **kwargs):
376 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
377 self.columns = policy.asArray('columns') if 'columns' in policy else None
379 def standardize(self, mapper, item, dataId):
380 return mapper._standardizeExposure(self, item, dataId)
383class CalibrationMapping(Mapping):
384 """CalibrationMapping is a Mapping subclass for calibration-type products.
386 The difference is that data properties in the query or template
387 can be looked up using a reference Mapping in addition to this one.
389 CalibrationMapping Policies can contain the following:
391 reference (string, optional)
392 a list of tables for finding missing dataset
393 identifier components (including the observation time, if a validity range
394 is required) in the exposure registry; note that the "tables" entry refers
395 to the calibration registry
397 refCols (string, optional)
398 a list of dataset properties required from the
399 reference tables for lookups in the calibration registry
401 validRange (bool)
402 true if the calibration dataset has a validity range
403 specified by a column in the tables of the reference dataset in the
404 exposure registry) and two columns in the tables of this calibration
405 dataset in the calibration registry)
407 obsTimeName (string, optional)
408 the name of the column in the reference
409 dataset tables containing the observation time (default "taiObs")
411 validStartName (string, optional)
412 the name of the column in the
413 calibration dataset tables containing the start of the validity range
414 (default "validStart")
416 validEndName (string, optional)
417 the name of the column in the
418 calibration dataset tables containing the end of the validity range
419 (default "validEnd")
421 Parameters
422 ----------
423 datasetType : `str`
424 Butler dataset type to be mapped.
425 policy : `daf_persistence.Policy`
426 Mapping Policy.
427 registry : `lsst.obs.base.Registry`
428 Registry for metadata lookups
429 calibRegistry : `lsst.obs.base.Registry`
430 Registry for calibration metadata lookups.
431 calibRoot : `str`
432 Path of calibration root directory.
433 dataRoot : `str`
434 Path of data root directory; used for outputs only.
435 """
437 def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
438 Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
439 self.reference = policy.asArray("reference") if "reference" in policy else None
440 self.refCols = policy.asArray("refCols") if "refCols" in policy else None
441 self.refRegistry = registry
442 self.dataRoot = dataRoot
443 if "validRange" in policy and policy["validRange"]:
444 self.range = ("?", policy["validStartName"], policy["validEndName"])
445 if "columns" in policy:
446 self.columns = policy.asArray("columns")
447 if "filter" in policy:
448 self.setFilter = policy["filter"]
449 self.metadataKeys = None
450 if "metadataKey" in policy:
451 self.metadataKeys = policy.asArray("metadataKey")
453 def map(self, mapper, dataId, write=False):
454 location = Mapping.map(self, mapper, dataId, write=write)
455 # Want outputs to be in the output directory
456 if write and self.dataRoot:
457 location.storage = self.dataRoot
458 return location
460 def lookup(self, properties, dataId):
461 """Look up properties for in a metadata registry given a partial
462 dataset identifier.
464 Parameters
465 ----------
466 properties : `list` of `str`
467 Properties to look up.
468 dataId : `dict`
469 Dataset identifier.
471 Returns
472 -------
473 `list` of `tuple`
474 Values of properties.
475 """
477# Either look up taiObs in reference and then all in calibRegistry
478# Or look up all in registry
480 newId = dataId.copy()
481 if self.reference is not None:
482 where = []
483 values = []
484 for k, v in dataId.items():
485 if self.refCols and k not in self.refCols:
486 continue
487 where.append(k)
488 values.append(v)
490 # Columns we need from the regular registry
491 if self.columns is not None:
492 columns = set(self.columns)
493 for k in dataId.keys():
494 columns.discard(k)
495 else:
496 columns = set(properties)
498 if not columns:
499 # Nothing to lookup in reference registry; continue with calib registry
500 return Mapping.lookup(self, properties, newId)
502 lookupDataId = dict(zip(where, values))
503 lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
504 if len(lookups) != 1:
505 raise RuntimeError("No unique lookup for %s from %s: %d matches" %
506 (columns, dataId, len(lookups)))
507 if columns == set(properties):
508 # Have everything we need
509 return lookups
510 for i, prop in enumerate(columns):
511 newId[prop] = lookups[0][i]
512 return Mapping.lookup(self, properties, newId)
514 def standardize(self, mapper, item, dataId):
515 """Default standardization function for calibration datasets.
517 If the item is of a type that should be standardized, the base class
518 ``standardizeExposure`` method is called, otherwise the item is returned
519 unmodified.
521 Parameters
522 ----------
523 mapping : `lsst.obs.base.Mapping`
524 Mapping object to pass through.
525 item : object
526 Will be standardized if of type lsst.afw.image.Exposure,
527 lsst.afw.image.DecoratedImage, lsst.afw.image.Image
528 or lsst.afw.image.MaskedImage
530 dataId : `dict`
531 Dataset identifier
533 Returns
534 -------
535 `lsst.afw.image.Exposure` or item
536 The standardized object.
537 """
538 if issubclass(doImport(self.python), (Exposure, MaskedImage, Image, DecoratedImage)):
539 return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
540 return item
543class DatasetMapping(Mapping):
544 """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
545 be retrieved by the standard daf_persistence mechanism.
547 The differences are that the Storage type must be specified and no
548 Exposure standardization is performed.
550 The "storage" entry in the Policy is mandatory; the "tables" entry is
551 optional; no "level" entry is allowed.
553 Parameters
554 ----------
555 datasetType : `str`
556 Butler dataset type to be mapped.
557 policy : `daf_persistence.Policy`
558 Mapping Policy.
559 registry : `lsst.obs.base.Registry`
560 Registry for metadata lookups
561 root : `str`
562 Path of root directory
563 """
565 def __init__(self, datasetType, policy, registry, root, **kwargs):
566 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
567 self.storage = policy["storage"] # Storage type