Coverage for python/lsst/daf/butler/core/fileTemplates.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for file template string expansion."""
24__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError")
26import os.path
27import string
28import logging
29from types import MappingProxyType
31from .config import Config
32from .configSupport import processLookupConfigs, LookupKey
33from .exceptions import ValidationError
34from .dimensions import SkyPixDimension, DataCoordinate
36log = logging.getLogger(__name__)
39class FileTemplateValidationError(ValidationError):
40 """Exception thrown when a file template is not consistent with the
41 associated `DatasetType`."""
42 pass
45class FileTemplatesConfig(Config):
46 """Configuration information for `FileTemplates`"""
47 pass
50class FileTemplates:
51 """Collection of `FileTemplate` templates.
53 Parameters
54 ----------
55 config : `FileTemplatesConfig` or `str`
56 Load configuration.
57 default : `str`, optional
58 If not `None`, a default template to use if no template has
59 been specified explicitly in the configuration.
60 universe : `DimensionUniverse`
61 The set of all known dimensions, used to normalize any lookup keys
62 involving dimensions.
64 Notes
65 -----
66 The configuration can include one level of hierarchy where an
67 instrument-specific section can be defined to override more general
68 template specifications. This is represented in YAML using a
69 key of form ``instrument<name>`` which can then define templates
70 that will be returned if a `DatasetRef` contains a matching instrument
71 name in the data ID.
73 A default fallback template can be specified using the key ``default``.
74 Defaulting can be disabled in a child configuration by defining the
75 value to be an empty string or a boolean `False`.
77 The config is parsed using the function
78 `~lsst.daf.butler.configSubset.processLookupConfigs`.
79 """
81 defaultKey = LookupKey("default")
82 """Configuration key associated with the default template."""
84 def __init__(self, config, default=None, *, universe):
85 self.config = FileTemplatesConfig(config)
86 self._templates = {}
87 self.default = FileTemplate(default) if default is not None else None
88 contents = processLookupConfigs(self.config, universe=universe)
90 # Convert all the values to FileTemplate, handling defaults
91 for key, templateStr in contents.items():
92 if key == self.defaultKey:
93 if not templateStr:
94 self.default = None
95 else:
96 self.default = FileTemplate(templateStr)
97 else:
98 self._templates[key] = FileTemplate(templateStr)
100 @property
101 def templates(self):
102 """Collection of templates indexed by lookup key (`dict`)."""
103 return MappingProxyType(self._templates)
105 def __contains__(self, key):
106 """Indicates whether the supplied key is present in the templates.
108 Parameters
109 ----------
110 key : `LookupKey`
111 Key to use to determine if a corresponding value is present
112 in the templates.
114 Returns
115 -------
116 in : `bool`
117 `True` if the supplied key is present in the templates.
118 """
119 return key in self.templates
121 def __getitem__(self, key):
122 return self.templates[key]
124 def validateTemplates(self, entities, logFailures=False):
125 """Retrieve the template associated with each dataset type and
126 validate the dimensions against the template.
128 Parameters
129 ----------
130 entities : `DatasetType`, `DatasetRef`, or `StorageClass`
131 Entities to validate against the matching templates. Can be
132 differing types.
133 logFailures : `bool`, optional
134 If `True`, output a log message for every validation error
135 detected.
137 Raises
138 ------
139 FileTemplateValidationError
140 Raised if an entity failed validation.
142 Notes
143 -----
144 See `FileTemplate.validateTemplate()` for details on the validation.
145 """
146 unmatchedKeys = set(self.templates)
147 failed = []
148 for entity in entities:
149 try:
150 matchKey, template = self.getTemplateWithMatch(entity)
151 except KeyError as e:
152 # KeyError always quotes on stringification so strip here
153 errMsg = str(e).strip('"\'')
154 failed.append(errMsg)
155 if logFailures:
156 log.fatal("%s", errMsg)
157 continue
159 if matchKey in unmatchedKeys:
160 unmatchedKeys.remove(matchKey)
162 try:
163 template.validateTemplate(entity)
164 except FileTemplateValidationError as e:
165 failed.append(f"{e} (via key '{matchKey}')")
166 if logFailures:
167 log.fatal("Template failure with key '%s': %s", matchKey, e)
169 if logFailures and unmatchedKeys:
170 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys]))
172 if failed:
173 if len(failed) == 1:
174 msg = str(failed[0])
175 else:
176 failMsg = ";\n".join(failed)
177 msg = f"{len(failed)} template validation failures: {failMsg}"
178 raise FileTemplateValidationError(msg)
180 def getLookupKeys(self):
181 """Retrieve the look up keys for all the template entries.
183 Returns
184 -------
185 keys : `set` of `LookupKey`
186 The keys available for matching a template.
187 """
188 return set(self.templates)
190 def getTemplateWithMatch(self, entity):
191 """Retrieve the `FileTemplate` associated with the dataset type along
192 with the lookup key that was a match for this template.
194 If the lookup name corresponds to a component the base name for
195 the component will be examined if the full component name does
196 not match.
198 Parameters
199 ----------
200 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
201 Instance to use to look for a corresponding template.
202 A `DatasetType` name or a `StorageClass` name will be used
203 depending on the supplied entity. Priority is given to a
204 `DatasetType` name. Supports instrument override if a
205 `DatasetRef` is provided configured with an ``instrument``
206 value for the data ID.
208 Returns
209 -------
210 matchKey : `LookupKey`
211 The key that resulted in the successful match.
212 template : `FileTemplate`
213 Template instance to use with that dataset type.
215 Raises
216 ------
217 KeyError
218 Raised if no template could be located for this Dataset type.
219 """
220 # Get the names to use for lookup
221 names = entity._lookupNames()
223 # Get a location from the templates
224 template = self.default
225 source = self.defaultKey
226 for name in names:
227 if name in self.templates:
228 template = self.templates[name]
229 source = name
230 break
232 if template is None:
233 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")
235 log.debug("Got file %s from %s via %s", template, entity, source)
237 return source, template
239 def getTemplate(self, entity):
240 """Retrieve the `FileTemplate` associated with the dataset type.
242 If the lookup name corresponds to a component the base name for
243 the component will be examined if the full component name does
244 not match.
246 Parameters
247 ----------
248 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
249 Instance to use to look for a corresponding template.
250 A `DatasetType` name or a `StorageClass` name will be used
251 depending on the supplied entity. Priority is given to a
252 `DatasetType` name. Supports instrument override if a
253 `DatasetRef` is provided configured with an ``instrument``
254 value for the data ID.
256 Returns
257 -------
258 template : `FileTemplate`
259 Template instance to use with that dataset type.
261 Raises
262 ------
263 KeyError
264 Raised if no template could be located for this Dataset type.
265 """
266 _, template = self.getTemplateWithMatch(entity)
267 return template
270class FileTemplate:
271 """Format a path template into a fully expanded path.
273 Parameters
274 ----------
275 template : `str`
276 Template string.
278 Raises
279 ------
280 FileTemplateValidationError
281 Raised if the template fails basic validation.
283 Notes
284 -----
285 The templates use the standard Format Specification Mini-Language
286 with the caveat that only named fields can be used. The field names
287 are taken from the Dimensions along with several additional fields:
289 - datasetType: `str`, `DatasetType.name`
290 - component: `str`, name of the StorageClass component
291 - run: `str`, name of the run this dataset was added with
293 `run` must always be provided to ensure unique paths.
295 More detailed information can be requested from dimensions by using a dot
296 notation, so ``visit.name`` would use the name of the visit and
297 ``detector.name_in_raft`` would use the name of the detector within the
298 raft.
300 The mini-language is extended to understand a "?" in the format
301 specification. This indicates that a field is optional. If that
302 Dimension is missing the field, along with the text before the field,
303 unless it is a path separator, will be removed from the output path.
305 By default any "/" in a dataId value will be replaced by "_" to prevent
306 unexpected directories being created in the path. If the "/" should be
307 retained then a special "/" format specifier can be included in the
308 template.
309 """
311 mandatoryFields = {"run"}
312 """A set of fields, one of which must be present in a template."""
314 datasetFields = {"datasetType", "component"}
315 """Fields related to the supplied dataset, not a dimension."""
317 specialFields = mandatoryFields | datasetFields
318 """Set of special fields that are available independently of the defined
319 Dimensions."""
321 def __init__(self, template):
322 if not isinstance(template, str):
323 raise FileTemplateValidationError(f"Template ('{template}') does "
324 "not contain any format specifiers")
325 self.template = template
327 # Do basic validation without access to dimensions
328 self.validateTemplate(None)
330 def __eq__(self, other):
331 if not isinstance(other, FileTemplate):
332 return False
334 return self.template == other.template
336 def __str__(self):
337 return self.template
339 def __repr__(self):
340 return f'{self.__class__.__name__}("{self.template}")'
342 def fields(self, optionals=False, specials=False, subfields=False):
343 """Return the field names used in this template.
345 Parameters
346 ----------
347 optionals : `bool`
348 If `True`, optional fields are included in the returned set.
349 specials : `bool`
350 If `True`, non-dimension fields are included.
351 subfields : `bool`, optional
352 If `True`, fields with syntax ``a.b`` are included. If `False`,
353 the default, only ``a`` would be returned.
355 Returns
356 -------
357 names : `set`
358 Names of fields used in this template
360 Notes
361 -----
362 The returned set will include the special values such as `datasetType`
363 and `component`.
364 """
365 fmt = string.Formatter()
366 parts = fmt.parse(self.template)
368 names = set()
369 for literal, field_name, format_spec, conversion in parts:
370 if field_name is not None:
371 if "?" in format_spec and not optionals:
372 continue
374 if not specials and field_name in self.specialFields:
375 continue
377 if "." in field_name and not subfields:
378 field_name, _ = field_name.split(".")
380 names.add(field_name)
382 return names
384 def format(self, ref):
385 """Format a template string into a full path.
387 Parameters
388 ----------
389 ref : `DatasetRef`
390 The dataset to be formatted.
392 Returns
393 -------
394 path : `str`
395 Expanded path.
397 Raises
398 ------
399 KeyError
400 Raised if the requested field is not defined and the field is
401 not optional. Or, `component` is specified but "component" was
402 not part of the template.
403 """
404 # Extract defined non-None dimensions from the dataId
405 # We attempt to get the "full" dict on the assumption that ref.dataId
406 # is a ExpandedDataCoordinate, as it should be when running
407 # PipelineTasks. We should probably just require that when formatting
408 # templates (and possibly when constructing DatasetRefs), but doing so
409 # would break a ton of otherwise-useful tests that would need to be
410 # modified to provide a lot more metadata.
411 fields = {k: v for k, v in getattr(ref.dataId, "full", ref.dataId).items() if v is not None}
413 if isinstance(ref.dataId, DataCoordinate):
414 # If there is exactly one SkyPixDimension in the data ID, alias its
415 # value with the key "skypix", so we can use that to match any
416 # skypix dimension.
417 # We restrict this behavior to the (real-world) case where the
418 # data ID is a DataCoordinate, not just a dict. That should only
419 # not be true in some test code, but that test code is a pain to
420 # update to be more like the real world while still providing our
421 # only tests of important behavior.
422 skypix = [dimension for dimension in ref.dataId.graph if isinstance(dimension, SkyPixDimension)]
423 if len(skypix) == 1:
424 fields["skypix"] = fields[skypix[0]]
426 # Extra information that can be included using . syntax
427 extras = getattr(ref.dataId, "records", {})
429 datasetType = ref.datasetType
430 fields["datasetType"], component = datasetType.nameAndComponent()
432 usedComponent = False
433 if component is not None:
434 fields["component"] = component
436 usedRun = False
437 fields["run"] = ref.run
439 fmt = string.Formatter()
440 parts = fmt.parse(self.template)
441 output = ""
443 for literal, field_name, format_spec, conversion in parts:
445 if field_name == "component":
446 usedComponent = True
448 if format_spec is None:
449 output = output + literal
450 continue
452 if "?" in format_spec:
453 optional = True
454 # Remove the non-standard character from the spec
455 format_spec = format_spec.replace("?", "")
456 else:
457 optional = False
459 if field_name == "run":
460 usedRun = True
462 if field_name == "collection":
463 raise KeyError("'collection' is no longer supported as a "
464 "file template placeholder; use 'run' instead.")
466 # Check for request for additional information from the dataId
467 if "." in field_name:
468 primary, secondary = field_name.split(".")
469 if primary in extras:
470 record = extras[primary]
471 # Only fill in the fields if we have a value, the
472 # KeyError will trigger below if the attribute is missing.
473 if hasattr(record, secondary):
474 fields[field_name] = getattr(record, secondary)
476 if field_name in fields:
477 value = fields[field_name]
478 elif optional:
479 # If this is optional ignore the format spec
480 # and do not include the literal text prior to the optional
481 # field unless it contains a "/" path separator
482 format_spec = ""
483 value = ""
484 if "/" not in literal:
485 literal = ""
486 else:
487 raise KeyError(f"'{field_name}' requested in template via '{self.template}' "
488 "but not defined and not optional")
490 # Handle "/" in values since we do not want to be surprised by
491 # unexpected directories turning up
492 replace_slash = True
493 if "/" in format_spec:
494 # Remove the non-standard character from the spec
495 format_spec = format_spec.replace("/", "")
496 replace_slash = False
498 if isinstance(value, str):
499 if replace_slash:
500 value = value.replace("/", "_")
502 # Now use standard formatting
503 output = output + literal + format(value, format_spec)
505 # Replace periods with underscores in the non-directory part to
506 # prevent file extension confusion.
507 head, tail = os.path.split(output)
508 output = os.path.join(head, tail.replace(".", "_"))
510 # Complain if we were meant to use a component
511 if component is not None and not usedComponent:
512 raise KeyError("Component '{}' specified but template {} did not use it".format(component,
513 self.template))
515 # Complain if there's no run
516 if not usedRun:
517 raise KeyError("Template does not include 'run'.")
519 # Since this is known to be a path, normalize it in case some double
520 # slashes have crept in
521 path = os.path.normpath(output)
523 # It should not be an absolute path (may happen with optionals)
524 if os.path.isabs(path):
525 path = os.path.relpath(path, start="/")
527 return path
529 def validateTemplate(self, entity):
530 """Compare the template against a representative entity that would
531 like to use template.
533 Parameters
534 ----------
535 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
536 Entity to compare against template.
538 Raises
539 ------
540 FileTemplateValidationError
541 Raised if the template is inconsistent with the supplied entity.
543 Notes
544 -----
545 Validation will always include a check that mandatory fields
546 are present and that at least one field refers to a dimension.
547 If the supplied entity includes a `DimensionGraph` then it will be
548 used to compare the available dimensions with those specified in the
549 template.
550 """
552 # Check that the template has run
553 withSpecials = self.fields(specials=True, optionals=True)
554 if not withSpecials & self.mandatoryFields:
555 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field"
556 f" from {self.mandatoryFields}")
558 # Check that there are some dimension fields in the template
559 allfields = self.fields(optionals=True)
560 if not allfields:
561 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields"
562 " corresponding to dimensions.")
564 # If we do not have dimensions available then all we can do is shrug
565 if not hasattr(entity, "dimensions"):
566 return
568 # if this entity represents a component then insist that component
569 # is present in the template. If the entity is not a component
570 # make sure that component is not mandatory.
571 try:
572 if entity.isComponent():
573 if "component" not in withSpecials:
574 raise FileTemplateValidationError(f"Template '{self}' has no component but "
575 f"{entity} refers to a component.")
576 else:
577 mandatorySpecials = self.fields(specials=True)
578 if "component" in mandatorySpecials:
579 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but "
580 f"{entity} does not refer to a component.")
581 except AttributeError:
582 pass
584 # Get the dimension links to get the full set of available field names
585 # Fall back to dataId keys if we have them but no links.
586 # dataId keys must still be present in the template
587 try:
588 minimal = set(entity.dimensions.required.names)
589 maximal = set(entity.dimensions.names)
590 except AttributeError:
591 try:
592 minimal = set(entity.dataId.keys())
593 maximal = minimal
594 except AttributeError:
595 return
597 required = self.fields(optionals=False)
599 # Calculate any field usage that does not match a dimension
600 if not required.issubset(maximal):
601 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
602 f" {required} is not a subset of {maximal}.")
604 if not allfields.issuperset(minimal):
605 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
606 f" {allfields} is not a superset of {minimal}.")
608 return