Coverage for python/lsst/obs/base/mapping.py: 12%
204 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-14 02:56 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-14 02:56 -0700
1# This file is part of obs_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22import os
23import re
24from collections import OrderedDict
26from lsst.afw.image import DecoratedImage, Exposure, Image, MaskedImage
27from lsst.daf.base import PropertySet
28from lsst.daf.persistence import ButlerLocation, NoResults
29from lsst.utils import doImportType
31__all__ = ["Mapping", "ImageMapping", "ExposureMapping", "CalibrationMapping", "DatasetMapping"]
34class Mapping(object):
36 """Mapping is a base class for all mappings. Mappings are used by
37 the Mapper to map (determine a path to some data given some
38 identifiers) and standardize (convert data into some standard
39 format or type) data, and to query the associated registry to see
40 what data is available.
42 Subclasses must specify self.storage or else override self.map().
44 Public methods: lookup, have, need, getKeys, map
46 Mappings are specified mainly by policy. A Mapping policy should
47 consist of:
49 template (string): a Python string providing the filename for that
50 particular dataset type based on some data identifiers. In the
51 case of redundancy in the path (e.g., file uniquely specified by
52 the exposure number, but filter in the path), the
53 redundant/dependent identifiers can be looked up in the registry.
55 python (string): the Python type for the retrieved data (e.g.
56 lsst.afw.image.ExposureF)
58 persistable (string): the Persistable registration for the on-disk data
59 (e.g. ImageU)
61 storage (string, optional): Storage type for this dataset type (e.g.
62 "FitsStorage")
64 level (string, optional): the level in the camera hierarchy at which the
65 data is stored (Amp, Ccd or skyTile), if relevant
67 tables (string, optional): a whitespace-delimited list of tables in the
68 registry that can be NATURAL JOIN-ed to look up additional
69 information.
71 Parameters
72 ----------
73 datasetType : `str`
74 Butler dataset type to be mapped.
75 policy : `daf_persistence.Policy`
76 Mapping Policy.
77 registry : `lsst.obs.base.Registry`
78 Registry for metadata lookups.
79 rootStorage : Storage subclass instance
80 Interface to persisted repository data.
81 provided : `list` of `str`
82 Keys provided by the mapper.
83 """
85 def __init__(self, datasetType, policy, registry, rootStorage, provided=None):
87 if policy is None:
88 raise RuntimeError("No policy provided for mapping")
90 self.datasetType = datasetType
91 self.registry = registry
92 self.rootStorage = rootStorage
94 self._template = policy["template"] # Template path
95 # in most cases, the template can not be used if it is empty, and is
96 # accessed via a property that will raise if it is used while
97 # `not self._template`. In this case we *do* allow it to be empty, for
98 # the purpose of fetching the key dict so that the mapping can be
99 # constructed, so that it can raise if it's invalid. I know it's a
100 # little odd, but it allows this template check to be introduced
101 # without a major refactor.
102 if self._template:
103 self.keyDict = dict(
104 [
105 (k, _formatMap(v, k, datasetType))
106 for k, v in re.findall(r"\%\((\w+)\).*?([diouxXeEfFgGcrs])", self.template)
107 ]
108 )
109 else:
110 self.keyDict = {}
111 if provided is not None:
112 for p in provided:
113 if p in self.keyDict:
114 del self.keyDict[p]
115 self.python = policy["python"] # Python type
116 self.persistable = policy["persistable"] # Persistable type
117 self.storage = policy["storage"]
118 if "level" in policy:
119 self.level = policy["level"] # Level in camera hierarchy
120 if "tables" in policy:
121 self.tables = policy.asArray("tables")
122 else:
123 self.tables = None
124 self.range = None
125 self.columns = None
126 self.obsTimeName = policy["obsTimeName"] if "obsTimeName" in policy else None
127 self.recipe = policy["recipe"] if "recipe" in policy else "default"
129 @property
130 def template(self):
131 if self._template: # template must not be an empty string or None
132 return self._template
133 else:
134 raise RuntimeError(
135 f"Template is not defined for the {self.datasetType} dataset type, "
136 "it must be set before it can be used."
137 )
139 def keys(self):
140 """Return the dict of keys and value types required by this mapping."""
141 return self.keyDict
143 def map(self, mapper, dataId, write=False):
144 """Standard implementation of map function.
146 Parameters
147 ----------
148 mapper: `lsst.daf.persistence.Mapper`
149 Object to be mapped.
150 dataId: `dict`
151 Dataset identifier.
153 Returns
154 -------
155 lsst.daf.persistence.ButlerLocation
156 Location of object that was mapped.
157 """
158 actualId = self.need(iter(self.keyDict.keys()), dataId)
159 usedDataId = {key: actualId[key] for key in self.keyDict.keys()}
160 path = mapper._mapActualToPath(self.template, actualId)
161 if os.path.isabs(path):
162 raise RuntimeError("Mapped path should not be absolute.")
163 if not write:
164 # This allows mapped files to be compressed, ending in .gz or .fz,
165 # without any indication from the policy that the file should be
166 # compressed, easily allowing repositories to contain a combination
167 # of comporessed and not-compressed files.
168 # If needed we can add a policy flag to allow compressed files or
169 # not, and perhaps a list of allowed extensions that may exist
170 # at the end of the template.
171 for ext in (None, ".gz", ".fz"):
172 if ext and path.endswith(ext):
173 continue # if the path already ends with the extension
174 extPath = path + ext if ext else path
175 newPath = self.rootStorage.instanceSearch(extPath)
176 if newPath:
177 path = newPath
178 break
179 assert path, "Fully-qualified filename is empty."
181 addFunc = "add_" + self.datasetType # Name of method for additionalData
182 if hasattr(mapper, addFunc):
183 addFunc = getattr(mapper, addFunc)
184 additionalData = addFunc(self.datasetType, actualId)
185 assert isinstance(additionalData, PropertySet), "Bad type for returned data: %s" % (
186 type(additionalData),
187 )
188 else:
189 additionalData = None
191 return ButlerLocation(
192 pythonType=self.python,
193 cppType=self.persistable,
194 storageName=self.storage,
195 locationList=path,
196 dataId=actualId.copy(),
197 mapper=mapper,
198 storage=self.rootStorage,
199 usedDataId=usedDataId,
200 datasetType=self.datasetType,
201 additionalData=additionalData,
202 )
204 def lookup(self, properties, dataId):
205 """Look up properties for in a metadata registry given a partial
206 dataset identifier.
208 Parameters
209 ----------
210 properties : `list` of `str`
211 What to look up.
212 dataId : `dict`
213 Dataset identifier
215 Returns
216 -------
217 `list` of `tuple`
218 Values of properties.
219 """
220 if self.registry is None:
221 raise RuntimeError("No registry for lookup")
223 skyMapKeys = ("tract", "patch")
225 where = []
226 values = []
228 # Prepare to remove skymap entries from properties list. These must
229 # be in the data ID, so we store which ones we're removing and create
230 # an OrderedDict that tells us where to re-insert them. That maps the
231 # name of the property to either its index in the properties list
232 # *after* the skymap ones have been removed (for entries that aren't
233 # skymap ones) or the value from the data ID (for those that are).
234 removed = set()
235 substitutions = OrderedDict()
236 index = 0
237 properties = list(properties) # don't modify the original list
238 for p in properties:
239 if p in skyMapKeys:
240 try:
241 substitutions[p] = dataId[p]
242 removed.add(p)
243 except KeyError:
244 raise RuntimeError(
245 "Cannot look up skymap key '%s'; it must be explicitly included in the data ID" % p
246 )
247 else:
248 substitutions[p] = index
249 index += 1
250 # Can't actually remove while iterating above, so we do it here.
251 for p in removed:
252 properties.remove(p)
254 fastPath = True
255 for p in properties:
256 if p not in ("filter", "expTime", "taiObs"):
257 fastPath = False
258 break
259 if fastPath and "visit" in dataId and "raw" in self.tables:
260 lookupDataId = {"visit": dataId["visit"]}
261 result = self.registry.lookup(properties, "raw_visit", lookupDataId, template=self.template)
262 else:
263 if dataId is not None:
264 for k, v in dataId.items():
265 if self.columns and k not in self.columns:
266 continue
267 if k == self.obsTimeName:
268 continue
269 if k in skyMapKeys:
270 continue
271 where.append((k, "?"))
272 values.append(v)
273 lookupDataId = {k[0]: v for k, v in zip(where, values)}
274 if self.range:
275 # format of self.range is
276 # ('?', isBetween-lowKey, isBetween-highKey)
277 # here we transform that to {(lowKey, highKey): value}
278 lookupDataId[(self.range[1], self.range[2])] = dataId[self.obsTimeName]
279 result = self.registry.lookup(properties, self.tables, lookupDataId, template=self.template)
280 if not removed:
281 return result
282 # Iterate over the query results, re-inserting the skymap entries.
283 result = [tuple(v if k in removed else item[v] for k, v in substitutions.items()) for item in result]
284 return result
286 def have(self, properties, dataId):
287 """Returns whether the provided data identifier has all
288 the properties in the provided list.
290 Parameters
291 ----------
292 properties : `list of `str`
293 Properties required.
294 dataId : `dict`
295 Dataset identifier.
297 Returns
298 -------
299 bool
300 True if all properties are present.
301 """
302 for prop in properties:
303 if prop not in dataId:
304 return False
305 return True
307 def need(self, properties, dataId):
308 """Ensures all properties in the provided list are present in
309 the data identifier, looking them up as needed. This is only
310 possible for the case where the data identifies a single
311 exposure.
313 Parameters
314 ----------
315 properties : `list` of `str`
316 Properties required.
317 dataId : `dict`
318 Partial dataset identifier
320 Returns
321 -------
322 `dict`
323 Copy of dataset identifier with enhanced values.
324 """
325 newId = dataId.copy()
326 newProps = [] # Properties we don't already have
327 for prop in properties:
328 if prop not in newId:
329 newProps.append(prop)
330 if len(newProps) == 0:
331 return newId
333 lookups = self.lookup(newProps, newId)
334 if len(lookups) != 1:
335 raise NoResults(
336 "No unique lookup for %s from %s: %d matches" % (newProps, newId, len(lookups)),
337 self.datasetType,
338 dataId,
339 )
340 for i, prop in enumerate(newProps):
341 newId[prop] = lookups[0][i]
342 return newId
345def _formatMap(ch, k, datasetType):
346 """Convert a format character into a Python type."""
347 if ch in "diouxX":
348 return int
349 elif ch in "eEfFgG":
350 return float
351 elif ch in "crs":
352 return str
353 else:
354 raise RuntimeError(
355 "Unexpected format specifier %s for field %s in template for dataset %s" % (ch, k, datasetType)
356 )
359class ImageMapping(Mapping):
360 """ImageMapping is a Mapping subclass for non-camera images.
362 Parameters
363 ----------
364 datasetType : `str`
365 Butler dataset type to be mapped.
366 policy : `daf_persistence.Policy`
367 Mapping Policy.
368 registry : `lsst.obs.base.Registry`
369 Registry for metadata lookups
370 root : `str`
371 Path of root directory
372 """
374 def __init__(self, datasetType, policy, registry, root, **kwargs):
375 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
376 self.columns = policy.asArray("columns") if "columns" in policy else None
379class ExposureMapping(Mapping):
380 """ExposureMapping is a Mapping subclass for normal exposures.
382 Parameters
383 ----------
384 datasetType : `str`
385 Butler dataset type to be mapped.
386 policy : `daf_persistence.Policy`
387 Mapping Policy.
388 registry : `lsst.obs.base.Registry`
389 Registry for metadata lookups
390 root : `str`
391 Path of root directory
392 """
394 def __init__(self, datasetType, policy, registry, root, **kwargs):
395 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
396 self.columns = policy.asArray("columns") if "columns" in policy else None
398 def standardize(self, mapper, item, dataId):
399 return mapper._standardizeExposure(self, item, dataId)
402class CalibrationMapping(Mapping):
403 """CalibrationMapping is a Mapping subclass for calibration-type products.
405 The difference is that data properties in the query or template
406 can be looked up using a reference Mapping in addition to this one.
408 CalibrationMapping Policies can contain the following:
410 reference (string, optional)
411 a list of tables for finding missing dataset
412 identifier components (including the observation time, if a validity
413 range is required) in the exposure registry; note that the "tables"
414 entry refers to the calibration registry
416 refCols (string, optional)
417 a list of dataset properties required from the
418 reference tables for lookups in the calibration registry
420 validRange (bool)
421 true if the calibration dataset has a validity range
422 specified by a column in the tables of the reference dataset in the
423 exposure registry) and two columns in the tables of this calibration
424 dataset in the calibration registry)
426 obsTimeName (string, optional)
427 the name of the column in the reference
428 dataset tables containing the observation time (default "taiObs")
430 validStartName (string, optional)
431 the name of the column in the
432 calibration dataset tables containing the start of the validity range
433 (default "validStart")
435 validEndName (string, optional)
436 the name of the column in the
437 calibration dataset tables containing the end of the validity range
438 (default "validEnd")
440 Parameters
441 ----------
442 datasetType : `str`
443 Butler dataset type to be mapped.
444 policy : `daf_persistence.Policy`
445 Mapping Policy.
446 registry : `lsst.obs.base.Registry`
447 Registry for metadata lookups
448 calibRegistry : `lsst.obs.base.Registry`
449 Registry for calibration metadata lookups.
450 calibRoot : `str`
451 Path of calibration root directory.
452 dataRoot : `str`
453 Path of data root directory; used for outputs only.
454 """
456 def __init__(self, datasetType, policy, registry, calibRegistry, calibRoot, dataRoot=None, **kwargs):
457 Mapping.__init__(self, datasetType, policy, calibRegistry, calibRoot, **kwargs)
458 self.reference = policy.asArray("reference") if "reference" in policy else None
459 self.refCols = policy.asArray("refCols") if "refCols" in policy else None
460 self.refRegistry = registry
461 self.dataRoot = dataRoot
462 if "validRange" in policy and policy["validRange"]:
463 self.range = ("?", policy["validStartName"], policy["validEndName"])
464 if "columns" in policy:
465 self.columns = policy.asArray("columns")
466 if "filter" in policy:
467 self.setFilter = policy["filter"]
468 self.metadataKeys = None
469 if "metadataKey" in policy:
470 self.metadataKeys = policy.asArray("metadataKey")
472 def map(self, mapper, dataId, write=False):
473 location = Mapping.map(self, mapper, dataId, write=write)
474 # Want outputs to be in the output directory
475 if write and self.dataRoot:
476 location.storage = self.dataRoot
477 return location
479 def lookup(self, properties, dataId):
480 """Look up properties for in a metadata registry given a partial
481 dataset identifier.
483 Parameters
484 ----------
485 properties : `list` of `str`
486 Properties to look up.
487 dataId : `dict`
488 Dataset identifier.
490 Returns
491 -------
492 `list` of `tuple`
493 Values of properties.
494 """
496 # Either look up taiObs in reference and then all in calibRegistry
497 # Or look up all in registry
499 newId = dataId.copy()
500 if self.reference is not None:
501 where = []
502 values = []
503 for k, v in dataId.items():
504 if self.refCols and k not in self.refCols:
505 continue
506 where.append(k)
507 values.append(v)
509 # Columns we need from the regular registry
510 if self.columns is not None:
511 columns = set(self.columns)
512 for k in dataId.keys():
513 columns.discard(k)
514 else:
515 columns = set(properties)
517 if not columns:
518 # Nothing to lookup in reference registry; continue with calib
519 # registry
520 return Mapping.lookup(self, properties, newId)
522 lookupDataId = dict(zip(where, values))
523 lookups = self.refRegistry.lookup(columns, self.reference, lookupDataId)
524 if len(lookups) != 1:
525 raise RuntimeError(
526 "No unique lookup for %s from %s: %d matches" % (columns, dataId, len(lookups))
527 )
528 if columns == set(properties):
529 # Have everything we need
530 return lookups
531 for i, prop in enumerate(columns):
532 newId[prop] = lookups[0][i]
533 return Mapping.lookup(self, properties, newId)
535 def standardize(self, mapper, item, dataId):
536 """Default standardization function for calibration datasets.
538 If the item is of a type that should be standardized, the base class
539 ``standardizeExposure`` method is called, otherwise the item is
540 returned unmodified.
542 Parameters
543 ----------
544 mapping : `lsst.obs.base.Mapping`
545 Mapping object to pass through.
546 item : object
547 Will be standardized if of type lsst.afw.image.Exposure,
548 lsst.afw.image.DecoratedImage, lsst.afw.image.Image
549 or lsst.afw.image.MaskedImage
551 dataId : `dict`
552 Dataset identifier
554 Returns
555 -------
556 `lsst.afw.image.Exposure` or item
557 The standardized object.
558 """
559 if issubclass(doImportType(self.python), (Exposure, MaskedImage, Image, DecoratedImage)):
560 return mapper._standardizeExposure(self, item, dataId, filter=self.setFilter)
561 return item
564class DatasetMapping(Mapping):
565 """DatasetMapping is a Mapping subclass for non-Exposure datasets that can
566 be retrieved by the standard daf_persistence mechanism.
568 The differences are that the Storage type must be specified and no
569 Exposure standardization is performed.
571 The "storage" entry in the Policy is mandatory; the "tables" entry is
572 optional; no "level" entry is allowed.
574 Parameters
575 ----------
576 datasetType : `str`
577 Butler dataset type to be mapped.
578 policy : `daf_persistence.Policy`
579 Mapping Policy.
580 registry : `lsst.obs.base.Registry`
581 Registry for metadata lookups
582 root : `str`
583 Path of root directory
584 """
586 def __init__(self, datasetType, policy, registry, root, **kwargs):
587 Mapping.__init__(self, datasetType, policy, registry, root, **kwargs)
588 self.storage = policy["storage"] # Storage type