Coverage for python/lsst/obs/base/mapping.py: 9%
204 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-05 18:01 -0800
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-05 18:01 -0800
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22from collections import OrderedDict
23import os
24import re
25from lsst.daf.base import PropertySet
26from lsst.daf.persistence import ButlerLocation, NoResults
27from lsst.utils import doImport
28from lsst.afw.image import Exposure, MaskedImage, Image, DecoratedImage
30__all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"]
33class Mapping(object):
35 """Mapping is a base class for all mappings. Mappings are used by
36 the Mapper to map (determine a path to some data given some
37 identifiers) and standardize (convert data into some standard
38 format or type) data, and to query the associated registry to see
39 what data is available.
41 Subclasses must specify self.storage or else override self.map().
43 Public methods: lookup, have, need, getKeys, map
45 Mappings are specified mainly by policy. A Mapping policy should
46 consist of:
48 template (string): a Python string providing the filename for that
49 particular dataset type based on some data identifiers. In the
50 case of redundancy in the path (e.g., file uniquely specified by
51 the exposure number, but filter in the path), the
52 redundant/dependent identifiers can be looked up in the registry.
54 python (string): the Python type for the retrieved data (e.g.
55 lsst.afw.image.ExposureF)
57 persistable (string): the Persistable registration for the on-disk data
58 (e.g. ImageU)
60 storage (string, optional): Storage type for this dataset type (e.g.
61 "FitsStorage")
63 level (string, optional): the level in the camera hierarchy at which the
64 data is stored (Amp, Ccd or skyTile), if relevant
66 tables (string, optional): a whitespace-delimited list of tables in the
67 registry that can be NATURAL JOIN-ed to look up additional
68 information.
70 Parameters
71 ----------
72 datasetType : `str`
73 Butler dataset type to be mapped.
74 policy : `daf_persistence.Policy`
75 Mapping Policy.
76 registry : `lsst.obs.base.Registry`
77 Registry for metadata lookups.
78 rootStorage : Storage subclass instance
79 Interface to persisted repository data.
80 provided : `list` of `str`
81 Keys provided by the mapper.
82 """
84 def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
86 if policy is None:
87 raise RuntimeError("No policy provided for mapping")
89 self.datasetType = datasetType
90 self.registry = registry
91 self.rootStorage = rootStorage
93 self._template = policy['template'] # Template path
94 # in most cases, the template can not be used if it is empty, and is
95 # accessed via a property that will raise if it is used while
96 # `not self._template`. In this case we *do* allow it to be empty, for
97 # the purpose of fetching the key dict so that the mapping can be
98 # constructed, so that it can raise if it's invalid. I know it's a
99 # little odd, but it allows this template check to be introduced
100 # without a major refactor.
101 if self._template:
102 self.keyDict = dict([
103 (k, _formatMap(v, k, datasetType))
104 for k, v in
105 re.findall(r'\%\((\w+)\).*?([diouxXeEfFgGcrs])', self.template)
106 ])
107 else:
108 self.keyDict = {}
109 if provided is not None:
110 for p in provided:
111 if p in self.keyDict:
112 del self.keyDict[p]
113 self.python = policy['python'] # Python type
114 self.persistable = policy['persistable'] # Persistable type
115 self.storage = policy['storage']
116 if 'level' in policy:
117 self.level = policy['level'] # Level in camera hierarchy
118 if 'tables' in policy:
119 self.tables = policy.asArray('tables')
120 else:
121 self.tables = None
122 self.range = None
123 self.columns = None
124 self.obsTimeName = policy['obsTimeName'] if 'obsTimeName' in policy else None
125 self.recipe = policy['recipe'] if 'recipe' in policy else 'default'
127 @property
128 def template(self):
129 if self._template: # template must not be an empty string or None
130 return self._template
131 else:
132 raise RuntimeError(f"Template is not defined for the {self.datasetType} dataset type, "
133 "it must be set before it can be used.")
135 def keys(self):
136 """Return the dict of keys and value types required for this mapping.
137 """
138 return self.keyDict
140 def map(self, mapper, dataId, write=False):
141 """Standard implementation of map function.
143 Parameters
144 ----------
145 mapper: `lsst.daf.persistence.Mapper`
146 Object to be mapped.
147 dataId: `dict`
148 Dataset identifier.
150 Returns
151 -------
152 lsst.daf.persistence.ButlerLocation
153 Location of object that was mapped.
154 """
155 actualId = self.need(iter(self.keyDict.keys()), dataId)
156 usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
157 path = mapper._mapActualToPath(self.template, actualId)
158 if os.path.isabs(path):
159 raise RuntimeError("Mapped path should not be absolute.")
160 if not write:
161 # This allows mapped files to be compressed, ending in .gz or .fz,
162 # without any indication from the policy that the file should be
163 # compressed, easily allowing repositories to contain a combination
164 # of comporessed and not-compressed files.
165 # If needed we can add a policy flag to allow compressed files or
166 # not, and perhaps a list of allowed extensions that may exist
167 # at the end of the template.
168 for ext in (None, '.gz', '.fz'):
169 if ext and path.endswith(ext):
170 continue # if the path already ends with the extension
171 extPath = path + ext if ext else path
172 newPath = self.rootStorage.instanceSearch(extPath)
173 if newPath:
174 path = newPath
175 break
176 assert path, "Fully-qualified filename is empty."
178 addFunc = "add_" + self.datasetType # Name of method for additionalData
179 if hasattr(mapper, addFunc):
180 addFunc = getattr(mapper, addFunc)
181 additionalData = addFunc(self.datasetType, actualId)
182 assert isinstance(additionalData, PropertySet), \
183 "Bad type for returned data: %s" % (type(additionalData),)
184 else:
185 additionalData = None
187 return ButlerLocation(pythonType=self.python, cppType=self.persistable, storageName=self.storage,
188 locationList=path, dataId=actualId.copy(), mapper=mapper,
189 storage=self.rootStorage, usedDataId=usedDataId, datasetType=self.datasetType,
190 additionalData=additionalData)
192 def lookup(self, properties, dataId):
193 """Look up properties for in a metadata registry given a partial
194 dataset identifier.
196 Parameters
197 ----------
198 properties : `list` of `str`
199 What to look up.
200 dataId : `dict`
201 Dataset identifier
203 Returns
204 -------
205 `list` of `tuple`
206 Values of properties.
207 """
208 if self.registry is None:
209 raise RuntimeError("No registry for lookup")
211 skyMapKeys = ("tract", "patch")
213 where = []
214 values = []
216 # Prepare to remove skymap entries from properties list. These must
217 # be in the data ID, so we store which ones we're removing and create
218 # an OrderedDict that tells us where to re-insert them. That maps the
219 # name of the property to either its index in the properties list
220 # *after* the skymap ones have been removed (for entries that aren't
221 # skymap ones) or the value from the data ID (for those that are).
222 removed = set()
223 substitutions = OrderedDict()
224 index = 0
225 properties = list(properties) # don't modify the original list
226 for p in properties:
227 if p in skyMapKeys:
228 try:
229 substitutions[p] = dataId[p]
230 removed.add(p)
231 except KeyError:
232 raise RuntimeError(
233 "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
234 )
235 else:
236 substitutions[p] = index
237 index += 1
238 # Can't actually remove while iterating above, so we do it here.
239 for p in removed:
240 properties.remove(p)
242 fastPath = True
243 for p in properties:
244 if p not in ('filter', 'expTime', 'taiObs'):
245 fastPath = False
246 break
247 if fastPath and 'visit' in dataId and "raw" in self.tables:
248 lookupDataId = {'visit': dataId['visit']}
249 result = self.registry.lookup(properties, 'raw_visit', lookupDataId, template=self.template)
250 else:
251 if dataId is not None:
252 for k, v in dataId.items():
253 if self.columns and k not in self.columns:
254 continue
255 if k == self.obsTimeName:
256 continue
257 if k in skyMapKeys:
258 continue
259 where.append((k, '?'))
260 values.append(v)
261 lookupDataId = {k[0]: v for k, v in zip(where, values)}
262 if self.range:
263 # format of self.range is
264 # ('?', isBetween-lowKey, isBetween-highKey)
265 # here we transform that to {(lowKey, highKey): value}
266 lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
267 result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
268 if not removed:
269 return result
270 # Iterate over the query results, re-inserting the skymap entries.
271 result = [tuple(v if k in removed else item[v] for k, v in substitutions.items())
272 for item in result]
273 return result
275 def have(self, properties, dataId):
276 """Returns whether the provided data identifier has all
277 the properties in the provided list.
279 Parameters
280 ----------
281 properties : `list of `str`
282 Properties required.
283 dataId : `dict`
284 Dataset identifier.
286 Returns
287 -------
288 bool
289 True if all properties are present.
290 """
291 for prop in properties:
292 if prop not in dataId:
293 return False
294 return True
296 def need(self, properties, dataId):
297 """Ensures all properties in the provided list are present in
298 the data identifier, looking them up as needed. This is only
299 possible for the case where the data identifies a single
300 exposure.
302 Parameters
303 ----------
304 properties : `list` of `str`
305 Properties required.
306 dataId : `dict`
307 Partial dataset identifier
309 Returns
310 -------
311 `dict`
312 Copy of dataset identifier with enhanced values.
313 """
314 newId = dataId.copy()
315 newProps = [] # Properties we don't already have
316 for prop in properties:
317 if prop not in newId:
318 newProps.append(prop)
319 if len(newProps) == 0:
320 return newId
322 lookups = self.lookup(newProps, newId)
323 if len(lookups) != 1:
324 raise NoResults("No unique lookup for %s from %s: %d matches" %
325 (newProps, newId, len(lookups)),
326 self.datasetType, dataId)
327 for i, prop in enumerate(newProps):
328 newId[prop] = lookups[0][i]
329 return newId
332def _formatMap(ch, k, datasetType):
333 """Convert a format character into a Python type."""
334 if ch in "diouxX":
335 return int
336 elif ch in "eEfFgG":
337 return float
338 elif ch in "crs":
339 return str
340 else:
341 raise RuntimeError("Unexpected format specifier %s"
342 " for field %s in template for dataset %s" %
343 (ch, k, datasetType))
346class ImageMapping(Mapping):
347 """ImageMapping is a Mapping subclass for non-camera images.
349 Parameters
350 ----------
351 datasetType : `str`
352 Butler dataset type to be mapped.
353 policy : `daf_persistence.Policy`
354 Mapping Policy.
355 registry : `lsst.obs.base.Registry`
356 Registry for metadata lookups
357 root : `str`
358 Path of root directory
359 """
361 def __init__(self, datasetType, policy, registry, root, **kwargs):
362 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
363 self.columns = policy.asArray('columns') if 'columns' in policy else None
366class ExposureMapping(Mapping):
367 """ExposureMapping is a Mapping subclass for normal exposures.
369 Parameters
370 ----------
371 datasetType : `str`
372 Butler dataset type to be mapped.
373 policy : `daf_persistence.Policy`
374 Mapping Policy.
375 registry : `lsst.obs.base.Registry`
376 Registry for metadata lookups
377 root : `str`
378 Path of root directory
379 """
381 def __init__(self, datasetType, policy, registry, root, **kwargs):
382 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
383 self.columns = policy.asArray('columns') if 'columns' in policy else None
385 def standardize(self, mapper, item, dataId):
386 return mapper._standardizeExposure(self, item, dataId)
389class CalibrationMapping(Mapping):
390 """CalibrationMapping is a Mapping subclass for calibration-type products.
392 The difference is that data properties in the query or template
393 can be looked up using a reference Mapping in addition to this one.
395 CalibrationMapping Policies can contain the following:
397 reference (string, optional)
398 a list of tables for finding missing dataset
399 identifier components (including the observation time, if a validity
400 range is required) in the exposure registry; note that the "tables"
401 entry refers to the calibration registry
403 refCols (string, optional)
404 a list of dataset properties required from the
405 reference tables for lookups in the calibration registry
407 validRange (bool)
408 true if the calibration dataset has a validity range
409 specified by a column in the tables of the reference dataset in the
410 exposure registry) and two columns in the tables of this calibration
411 dataset in the calibration registry)
413 obsTimeName (string, optional)
414 the name of the column in the reference
415 dataset tables containing the observation time (default "taiObs")
417 validStartName (string, optional)
418 the name of the column in the
419 calibration dataset tables containing the start of the validity range
420 (default "validStart")
422 validEndName (string, optional)
423 the name of the column in the
424 calibration dataset tables containing the end of the validity range
425 (default "validEnd")
427 Parameters
428 ----------
429 datasetType : `str`
430 Butler dataset type to be mapped.
431 policy : `daf_persistence.Policy`
432 Mapping Policy.
433 registry : `lsst.obs.base.Registry`
434 Registry for metadata lookups
435 calibRegistry : `lsst.obs.base.Registry`
436 Registry for calibration metadata lookups.
437 calibRoot : `str`
438 Path of calibration root directory.
439 dataRoot : `str`
440 Path of data root directory; used for outputs only.
441 """
443 def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
444 Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
445 self.reference = policy.asArray("reference") if "reference" in policy else None
446 self.refCols = policy.asArray("refCols") if "refCols" in policy else None
447 self.refRegistry = registry
448 self.dataRoot = dataRoot
449 if "validRange" in policy and policy["validRange"]:
450 self.range = ("?", policy["validStartName"], policy["validEndName"])
451 if "columns" in policy:
452 self.columns = policy.asArray("columns")
453 if "filter" in policy:
454 self.setFilter = policy["filter"]
455 self.metadataKeys = None
456 if "metadataKey" in policy:
457 self.metadataKeys = policy.asArray("metadataKey")
459 def map(self, mapper, dataId, write=False):
460 location = Mapping.map(self, mapper, dataId, write=write)
461 # Want outputs to be in the output directory
462 if write and self.dataRoot:
463 location.storage = self.dataRoot
464 return location
466 def lookup(self, properties, dataId):
467 """Look up properties for in a metadata registry given a partial
468 dataset identifier.
470 Parameters
471 ----------
472 properties : `list` of `str`
473 Properties to look up.
474 dataId : `dict`
475 Dataset identifier.
477 Returns
478 -------
479 `list` of `tuple`
480 Values of properties.
481 """
483# Either look up taiObs in reference and then all in calibRegistry
484# Or look up all in registry
486 newId = dataId.copy()
487 if self.reference is not None:
488 where = []
489 values = []
490 for k, v in dataId.items():
491 if self.refCols and k not in self.refCols:
492 continue
493 where.append(k)
494 values.append(v)
496 # Columns we need from the regular registry
497 if self.columns is not None:
498 columns = set(self.columns)
499 for k in dataId.keys():
500 columns.discard(k)
501 else:
502 columns = set(properties)
504 if not columns:
505 # Nothing to lookup in reference registry; continue with calib
506 # registry
507 return Mapping.lookup(self, properties, newId)
509 lookupDataId = dict(zip(where, values))
510 lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
511 if len(lookups) != 1:
512 raise RuntimeError("No unique lookup for %s from %s: %d matches" %
513 (columns, dataId, len(lookups)))
514 if columns == set(properties):
515 # Have everything we need
516 return lookups
517 for i, prop in enumerate(columns):
518 newId[prop] = lookups[0][i]
519 return Mapping.lookup(self, properties, newId)
521 def standardize(self, mapper, item, dataId):
522 """Default standardization function for calibration datasets.
524 If the item is of a type that should be standardized, the base class
525 ``standardizeExposure`` method is called, otherwise the item is
526 returned unmodified.
528 Parameters
529 ----------
530 mapping : `lsst.obs.base.Mapping`
531 Mapping object to pass through.
532 item : object
533 Will be standardized if of type lsst.afw.image.Exposure,
534 lsst.afw.image.DecoratedImage, lsst.afw.image.Image
535 or lsst.afw.image.MaskedImage
537 dataId : `dict`
538 Dataset identifier
540 Returns
541 -------
542 `lsst.afw.image.Exposure` or item
543 The standardized object.
544 """
545 if issubclass(doImport(self.python), (Exposure, MaskedImage, Image, DecoratedImage)):
546 return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
547 return item
550class DatasetMapping(Mapping):
551 """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
552 be retrieved by the standard daf_persistence mechanism.
554 The differences are that the Storage type must be specified and no
555 Exposure standardization is performed.
557 The "storage" entry in the Policy is mandatory; the "tables" entry is
558 optional; no "level" entry is allowed.
560 Parameters
561 ----------
562 datasetType : `str`
563 Butler dataset type to be mapped.
564 policy : `daf_persistence.Policy`
565 Mapping Policy.
566 registry : `lsst.obs.base.Registry`
567 Registry for metadata lookups
568 root : `str`
569 Path of root directory
570 """
572 def __init__(self, datasetType, policy, registry, root, **kwargs):
573 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
574 self.storage = policy["storage"] # Storage type