Coverage for python/lsst/daf/butler/core/fileTemplates.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for file template string expansion."""
24__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError")
26import os.path
27import string
28import logging
29from types import MappingProxyType
31from .config import Config
32from .configSupport import processLookupConfigs, LookupKey
33from .exceptions import ValidationError
34from .dimensions import SkyPixDimension, DataCoordinate
36log = logging.getLogger(__name__)
39class FileTemplateValidationError(ValidationError):
40 """Exception thrown when a file template is not consistent with the
41 associated `DatasetType`."""
42 pass
45class FileTemplatesConfig(Config):
46 """Configuration information for `FileTemplates`"""
47 pass
50class FileTemplates:
51 """Collection of `FileTemplate` templates.
53 Parameters
54 ----------
55 config : `FileTemplatesConfig` or `str`
56 Load configuration.
57 default : `str`, optional
58 If not `None`, a default template to use if no template has
59 been specified explicitly in the configuration.
60 universe : `DimensionUniverse`
61 The set of all known dimensions, used to normalize any lookup keys
62 involving dimensions.
64 Notes
65 -----
66 The configuration can include one level of hierarchy where an
67 instrument-specific section can be defined to override more general
68 template specifications. This is represented in YAML using a
69 key of form ``instrument<name>`` which can then define templates
70 that will be returned if a `DatasetRef` contains a matching instrument
71 name in the data ID.
73 A default fallback template can be specified using the key ``default``.
74 Defaulting can be disabled in a child configuration by defining the
75 value to be an empty string or a boolean `False`.
77 The config is parsed using the function
78 `~lsst.daf.butler.configSubset.processLookupConfigs`.
79 """
81 defaultKey = LookupKey("default")
82 """Configuration key associated with the default template."""
84 def __init__(self, config, default=None, *, universe):
85 self.config = FileTemplatesConfig(config)
86 self._templates = {}
87 self.default = FileTemplate(default) if default is not None else None
88 contents = processLookupConfigs(self.config, universe=universe)
90 # Convert all the values to FileTemplate, handling defaults
91 for key, templateStr in contents.items():
92 if key == self.defaultKey:
93 if not templateStr:
94 self.default = None
95 else:
96 self.default = FileTemplate(templateStr)
97 else:
98 self._templates[key] = FileTemplate(templateStr)
100 @property
101 def templates(self):
102 """Collection of templates indexed by lookup key (`dict`)."""
103 return MappingProxyType(self._templates)
105 def __contains__(self, key):
106 """Indicates whether the supplied key is present in the templates.
108 Parameters
109 ----------
110 key : `LookupKey`
111 Key to use to determine if a corresponding value is present
112 in the templates.
114 Returns
115 -------
116 in : `bool`
117 `True` if the supplied key is present in the templates.
118 """
119 return key in self.templates
121 def __getitem__(self, key):
122 return self.templates[key]
124 def validateTemplates(self, entities, logFailures=False):
125 """Retrieve the template associated with each dataset type and
126 validate the dimensions against the template.
128 Parameters
129 ----------
130 entities : `DatasetType`, `DatasetRef`, or `StorageClass`
131 Entities to validate against the matching templates. Can be
132 differing types.
133 logFailures : `bool`, optional
134 If `True`, output a log message for every validation error
135 detected.
137 Raises
138 ------
139 FileTemplateValidationError
140 Raised if an entity failed validation.
142 Notes
143 -----
144 See `FileTemplate.validateTemplate()` for details on the validation.
145 """
146 unmatchedKeys = set(self.templates)
147 failed = []
148 for entity in entities:
149 try:
150 matchKey, template = self.getTemplateWithMatch(entity)
151 except KeyError as e:
152 # KeyError always quotes on stringification so strip here
153 errMsg = str(e).strip('"\'')
154 failed.append(errMsg)
155 if logFailures:
156 log.fatal("%s", errMsg)
157 continue
159 if matchKey in unmatchedKeys:
160 unmatchedKeys.remove(matchKey)
162 try:
163 template.validateTemplate(entity)
164 except FileTemplateValidationError as e:
165 failed.append(f"{e} (via key '{matchKey}')")
166 if logFailures:
167 log.fatal("Template failure with key '%s': %s", matchKey, e)
169 if logFailures and unmatchedKeys:
170 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys]))
172 if failed:
173 if len(failed) == 1:
174 msg = str(failed[0])
175 else:
176 failMsg = ";\n".join(failed)
177 msg = f"{len(failed)} template validation failures: {failMsg}"
178 raise FileTemplateValidationError(msg)
180 def getLookupKeys(self):
181 """Retrieve the look up keys for all the template entries.
183 Returns
184 -------
185 keys : `set` of `LookupKey`
186 The keys available for matching a template.
187 """
188 return set(self.templates)
190 def getTemplateWithMatch(self, entity):
191 """Retrieve the `FileTemplate` associated with the dataset type along
192 with the lookup key that was a match for this template.
194 If the lookup name corresponds to a component the base name for
195 the component will be examined if the full component name does
196 not match.
198 Parameters
199 ----------
200 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
201 Instance to use to look for a corresponding template.
202 A `DatasetType` name or a `StorageClass` name will be used
203 depending on the supplied entity. Priority is given to a
204 `DatasetType` name. Supports instrument override if a
205 `DatasetRef` is provided configured with an ``instrument``
206 value for the data ID.
208 Returns
209 -------
210 matchKey : `LookupKey`
211 The key that resulted in the successful match.
212 template : `FileTemplate`
213 Template instance to use with that dataset type.
215 Raises
216 ------
217 KeyError
218 Raised if no template could be located for this Dataset type.
219 """
220 # Get the names to use for lookup
221 names = entity._lookupNames()
223 # Get a location from the templates
224 template = self.default
225 source = self.defaultKey
226 for name in names:
227 if name in self.templates:
228 template = self.templates[name]
229 source = name
230 break
232 if template is None:
233 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")
235 log.debug("Got file %s from %s via %s", template, entity, source)
237 return source, template
239 def getTemplate(self, entity):
240 """Retrieve the `FileTemplate` associated with the dataset type.
242 If the lookup name corresponds to a component the base name for
243 the component will be examined if the full component name does
244 not match.
246 Parameters
247 ----------
248 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
249 Instance to use to look for a corresponding template.
250 A `DatasetType` name or a `StorageClass` name will be used
251 depending on the supplied entity. Priority is given to a
252 `DatasetType` name. Supports instrument override if a
253 `DatasetRef` is provided configured with an ``instrument``
254 value for the data ID.
256 Returns
257 -------
258 template : `FileTemplate`
259 Template instance to use with that dataset type.
261 Raises
262 ------
263 KeyError
264 Raised if no template could be located for this Dataset type.
265 """
266 _, template = self.getTemplateWithMatch(entity)
267 return template
270class FileTemplate:
271 """Format a path template into a fully expanded path.
273 Parameters
274 ----------
275 template : `str`
276 Template string.
278 Raises
279 ------
280 FileTemplateValidationError
281 Raised if the template fails basic validation.
283 Notes
284 -----
285 The templates use the standard Format Specification Mini-Language
286 with the caveat that only named fields can be used. The field names
287 are taken from the Dimensions along with several additional fields:
289 - datasetType: `str`, `DatasetType.name`
290 - component: `str`, name of the StorageClass component
291 - run: `str`, name of the run this dataset was added with
292 - collection: synonoym for ``run``
294 At least one of `run` or `collection` must be provided to ensure unique
295 filenames.
297 More detailed information can be requested from dimensions by using a dot
298 notation, so ``visit.name`` would use the name of the visit and
299 ``detector.name_in_raft`` would use the name of the detector within the
300 raft.
302 The mini-language is extended to understand a "?" in the format
303 specification. This indicates that a field is optional. If that
304 Dimension is missing the field, along with the text before the field,
305 unless it is a path separator, will be removed from the output path.
307 By default any "/" in a dataId value will be replaced by "_" to prevent
308 unexpected directories being created in the path. If the "/" should be
309 retained then a special "/" format specifier can be included in the
310 template.
311 """
313 mandatoryFields = {"collection", "run"}
314 """A set of fields, one of which must be present in a template."""
316 datasetFields = {"datasetType", "component"}
317 """Fields related to the supplied dataset, not a dimension."""
319 specialFields = mandatoryFields | datasetFields
320 """Set of special fields that are available independently of the defined
321 Dimensions."""
323 def __init__(self, template):
324 if not isinstance(template, str):
325 raise FileTemplateValidationError(f"Template ('{template}') does "
326 "not contain any format specifiers")
327 self.template = template
329 # Do basic validation without access to dimensions
330 self.validateTemplate(None)
332 def __eq__(self, other):
333 if not isinstance(other, FileTemplate):
334 return False
336 return self.template == other.template
338 def __str__(self):
339 return self.template
341 def __repr__(self):
342 return f'{self.__class__.__name__}("{self.template}")'
344 def fields(self, optionals=False, specials=False, subfields=False):
345 """Return the field names used in this template.
347 Parameters
348 ----------
349 optionals : `bool`
350 If `True`, optional fields are included in the returned set.
351 specials : `bool`
352 If `True`, non-dimension fields are included.
353 subfields : `bool`, optional
354 If `True`, fields with syntax ``a.b`` are included. If `False`,
355 the default, only ``a`` would be returned.
357 Returns
358 -------
359 names : `set`
360 Names of fields used in this template
362 Notes
363 -----
364 The returned set will include the special values such as `datasetType`
365 and `component`.
366 """
367 fmt = string.Formatter()
368 parts = fmt.parse(self.template)
370 names = set()
371 for literal, field_name, format_spec, conversion in parts:
372 if field_name is not None:
373 if "?" in format_spec and not optionals:
374 continue
376 if not specials and field_name in self.specialFields:
377 continue
379 if "." in field_name and not subfields:
380 field_name, _ = field_name.split(".")
382 names.add(field_name)
384 return names
386 def format(self, ref):
387 """Format a template string into a full path.
389 Parameters
390 ----------
391 ref : `DatasetRef`
392 The dataset to be formatted.
394 Returns
395 -------
396 path : `str`
397 Expanded path.
399 Raises
400 ------
401 KeyError
402 Raised if the requested field is not defined and the field is
403 not optional. Or, `component` is specified but "component" was
404 not part of the template.
405 """
406 # Extract defined non-None dimensions from the dataId
407 # We attempt to get the "full" dict on the assumption that ref.dataId
408 # is a ExpandedDataCoordinate, as it should be when running
409 # PipelineTasks. We should probably just require that when formatting
410 # templates (and possibly when constructing DatasetRefs), but doing so
411 # would break a ton of otherwise-useful tests that would need to be
412 # modified to provide a lot more metadata.
413 fields = {k: v for k, v in getattr(ref.dataId, "full", ref.dataId).items() if v is not None}
415 if isinstance(ref.dataId, DataCoordinate):
416 # If there is exactly one SkyPixDimension in the data ID, alias its
417 # value with the key "skypix", so we can use that to match any
418 # skypix dimension.
419 # We restrict this behavior to the (real-world) case where the
420 # data ID is a DataCoordinate, not just a dict. That should only
421 # not be true in some test code, but that test code is a pain to
422 # update to be more like the real world while still providing our
423 # only tests of important behavior.
424 skypix = [dimension for dimension in ref.dataId.graph if isinstance(dimension, SkyPixDimension)]
425 if len(skypix) == 1:
426 fields["skypix"] = fields[skypix[0]]
428 # Extra information that can be included using . syntax
429 extras = getattr(ref.dataId, "records", {})
431 datasetType = ref.datasetType
432 fields["datasetType"], component = datasetType.nameAndComponent()
434 usedComponent = False
435 if component is not None:
436 fields["component"] = component
438 usedRunOrCollection = False
439 fields["collection"] = ref.run
440 fields["run"] = ref.run
442 fmt = string.Formatter()
443 parts = fmt.parse(self.template)
444 output = ""
446 for literal, field_name, format_spec, conversion in parts:
448 if field_name == "component":
449 usedComponent = True
451 if format_spec is None:
452 output = output + literal
453 continue
455 if "?" in format_spec:
456 optional = True
457 # Remove the non-standard character from the spec
458 format_spec = format_spec.replace("?", "")
459 else:
460 optional = False
462 if field_name in ("run", "collection"):
463 usedRunOrCollection = True
465 # Check for request for additional information from the dataId
466 if "." in field_name:
467 primary, secondary = field_name.split(".")
468 if primary in extras:
469 record = extras[primary]
470 # Only fill in the fields if we have a value, the
471 # KeyError will trigger below if the attribute is missing.
472 if hasattr(record, secondary):
473 fields[field_name] = getattr(record, secondary)
475 if field_name in fields:
476 value = fields[field_name]
477 elif optional:
478 # If this is optional ignore the format spec
479 # and do not include the literal text prior to the optional
480 # field unless it contains a "/" path separator
481 format_spec = ""
482 value = ""
483 if "/" not in literal:
484 literal = ""
485 else:
486 raise KeyError(f"'{field_name}' requested in template via '{self.template}' "
487 "but not defined and not optional")
489 # Handle "/" in values since we do not want to be surprised by
490 # unexpected directories turning up
491 replace_slash = True
492 if "/" in format_spec:
493 # Remove the non-standard character from the spec
494 format_spec = format_spec.replace("/", "")
495 replace_slash = False
497 if isinstance(value, str):
498 if replace_slash:
499 value = value.replace("/", "_")
501 # Now use standard formatting
502 output = output + literal + format(value, format_spec)
504 # Complain if we were meant to use a component
505 if component is not None and not usedComponent:
506 raise KeyError("Component '{}' specified but template {} did not use it".format(component,
507 self.template))
509 # Complain if there's no run or collection
510 if not usedRunOrCollection:
511 raise KeyError("Template does not include 'run' or 'collection'.")
513 # Since this is known to be a path, normalize it in case some double
514 # slashes have crept in
515 path = os.path.normpath(output)
517 # It should not be an absolute path (may happen with optionals)
518 if os.path.isabs(path):
519 path = os.path.relpath(path, start="/")
521 return path
523 def validateTemplate(self, entity):
524 """Compare the template against a representative entity that would
525 like to use template.
527 Parameters
528 ----------
529 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
530 Entity to compare against template.
532 Raises
533 ------
534 FileTemplateValidationError
535 Raised if the template is inconsistent with the supplied entity.
537 Notes
538 -----
539 Validation will always include a check that mandatory fields
540 are present and that at least one field refers to a dimension.
541 If the supplied entity includes a `DimensionGraph` then it will be
542 used to compare the available dimensions with those specified in the
543 template.
544 """
546 # Check that the template has run or collection
547 withSpecials = self.fields(specials=True, optionals=True)
548 if not withSpecials & self.mandatoryFields:
549 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field"
550 f" from {self.mandatoryFields}")
552 # Check that there are some dimension fields in the template
553 allfields = self.fields(optionals=True)
554 if not allfields:
555 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields"
556 " corresponding to dimensions.")
558 # If we do not have dimensions available then all we can do is shrug
559 if not hasattr(entity, "dimensions"):
560 return
562 # if this entity represents a component then insist that component
563 # is present in the template. If the entity is not a component
564 # make sure that component is not mandatory.
565 try:
566 if entity.isComponent():
567 if "component" not in withSpecials:
568 raise FileTemplateValidationError(f"Template '{self}' has no component but "
569 f"{entity} refers to a component.")
570 else:
571 mandatorySpecials = self.fields(specials=True)
572 if "component" in mandatorySpecials:
573 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but "
574 f"{entity} does not refer to a component.")
575 except AttributeError:
576 pass
578 # Get the dimension links to get the full set of available field names
579 # Fall back to dataId keys if we have them but no links.
580 # dataId keys must still be present in the template
581 try:
582 minimal = set(entity.dimensions.required.names)
583 maximal = set(entity.dimensions.names)
584 except AttributeError:
585 try:
586 minimal = set(entity.dataId.keys())
587 maximal = minimal
588 except AttributeError:
589 return
591 required = self.fields(optionals=False)
593 # Calculate any field usage that does not match a dimension
594 if not required.issubset(maximal):
595 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
596 f" {required} is not a subset of {maximal}.")
598 if not allfields.issuperset(minimal):
599 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
600 f" {allfields} is not a superset of {minimal}.")
602 return