Coverage for python/lsst/daf/butler/core/fileTemplates.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for file template string expansion."""
26__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError")
28import os.path
29import string
30import logging
31from types import MappingProxyType
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Iterable,
37 Mapping,
38 Optional,
39 Set,
40 Tuple,
41 Union,
42)
44from .config import Config
45from .configSupport import processLookupConfigs, LookupKey
46from .exceptions import ValidationError
47from .dimensions import SkyPixDimension, DataCoordinate
48from .datasets import DatasetRef
49from .storageClass import StorageClass
51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true
52 from .dimensions import DimensionUniverse
53 from .datasets import DatasetType
55log = logging.getLogger(__name__)
58class FileTemplateValidationError(ValidationError):
59 """Exception thrown when a file template is not consistent with the
60 associated `DatasetType`."""
61 pass
64class FileTemplatesConfig(Config):
65 """Configuration information for `FileTemplates`"""
66 pass
69class FileTemplates:
70 """Collection of `FileTemplate` templates.
72 Parameters
73 ----------
74 config : `FileTemplatesConfig` or `str`
75 Load configuration.
76 default : `str`, optional
77 If not `None`, a default template to use if no template has
78 been specified explicitly in the configuration.
79 universe : `DimensionUniverse`
80 The set of all known dimensions, used to normalize any lookup keys
81 involving dimensions.
83 Notes
84 -----
85 The configuration can include one level of hierarchy where an
86 instrument-specific section can be defined to override more general
87 template specifications. This is represented in YAML using a
88 key of form ``instrument<name>`` which can then define templates
89 that will be returned if a `DatasetRef` contains a matching instrument
90 name in the data ID.
92 A default fallback template can be specified using the key ``default``.
93 Defaulting can be disabled in a child configuration by defining the
94 value to be an empty string or a boolean `False`.
96 The config is parsed using the function
97 `~lsst.daf.butler.configSubset.processLookupConfigs`.
98 """
100 defaultKey = LookupKey("default")
101 """Configuration key associated with the default template."""
103 def __init__(self, config: Union[FileTemplatesConfig, str],
104 default: Optional[str] = None, *,
105 universe: DimensionUniverse):
106 self.config = FileTemplatesConfig(config)
107 self._templates = {}
109 contents = processLookupConfigs(self.config, universe=universe)
111 # Determine default to use -- defaults can be disabled if
112 # we get a False or None
113 defaultValue = contents.get(self.defaultKey, default)
114 if defaultValue and not isinstance(defaultValue, str):
115 raise RuntimeError("Default template value should be str or False, or None. "
116 f"Got '{defaultValue}'")
117 self.default = FileTemplate(defaultValue) if isinstance(defaultValue, str) and defaultValue else None
119 # Convert all the values to FileTemplate, handling defaults
120 for key, templateStr in contents.items():
121 if key == self.defaultKey:
122 continue
123 if not isinstance(templateStr, str):
124 raise RuntimeError(f"Unexpected value in file template key {key}: {templateStr}")
125 self._templates[key] = FileTemplate(templateStr)
127 @property
128 def templates(self) -> Mapping[LookupKey, FileTemplate]:
129 """Collection of templates indexed by lookup key (`dict`)."""
130 return MappingProxyType(self._templates)
132 def __contains__(self, key: LookupKey) -> bool:
133 """Indicates whether the supplied key is present in the templates.
135 Parameters
136 ----------
137 key : `LookupKey`
138 Key to use to determine if a corresponding value is present
139 in the templates.
141 Returns
142 -------
143 in : `bool`
144 `True` if the supplied key is present in the templates.
145 """
146 return key in self.templates
148 def __getitem__(self, key: LookupKey) -> FileTemplate:
149 return self.templates[key]
151 def validateTemplates(self, entities: Iterable[Union[DatasetType, DatasetRef, StorageClass]],
152 logFailures: bool = False) -> None:
153 """Retrieve the template associated with each dataset type and
154 validate the dimensions against the template.
156 Parameters
157 ----------
158 entities : `DatasetType`, `DatasetRef`, or `StorageClass`
159 Entities to validate against the matching templates. Can be
160 differing types.
161 logFailures : `bool`, optional
162 If `True`, output a log message for every validation error
163 detected.
165 Raises
166 ------
167 FileTemplateValidationError
168 Raised if an entity failed validation.
170 Notes
171 -----
172 See `FileTemplate.validateTemplate()` for details on the validation.
173 """
174 unmatchedKeys = set(self.templates)
175 failed = []
176 for entity in entities:
177 try:
178 matchKey, template = self.getTemplateWithMatch(entity)
179 except KeyError as e:
180 # KeyError always quotes on stringification so strip here
181 errMsg = str(e).strip('"\'')
182 failed.append(errMsg)
183 if logFailures:
184 log.fatal("%s", errMsg)
185 continue
187 if matchKey in unmatchedKeys:
188 unmatchedKeys.remove(matchKey)
190 try:
191 template.validateTemplate(entity)
192 except FileTemplateValidationError as e:
193 failed.append(f"{e} (via key '{matchKey}')")
194 if logFailures:
195 log.fatal("Template failure with key '%s': %s", matchKey, e)
197 if logFailures and unmatchedKeys:
198 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys]))
200 if failed:
201 if len(failed) == 1:
202 msg = str(failed[0])
203 else:
204 failMsg = ";\n".join(failed)
205 msg = f"{len(failed)} template validation failures: {failMsg}"
206 raise FileTemplateValidationError(msg)
208 def getLookupKeys(self) -> Set[LookupKey]:
209 """Retrieve the look up keys for all the template entries.
211 Returns
212 -------
213 keys : `set` of `LookupKey`
214 The keys available for matching a template.
215 """
216 return set(self.templates)
218 def getTemplateWithMatch(self, entity: Union[DatasetRef,
219 DatasetType, StorageClass]) -> Tuple[LookupKey,
220 FileTemplate]:
221 """Retrieve the `FileTemplate` associated with the dataset type along
222 with the lookup key that was a match for this template.
224 If the lookup name corresponds to a component the base name for
225 the component will be examined if the full component name does
226 not match.
228 Parameters
229 ----------
230 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
231 Instance to use to look for a corresponding template.
232 A `DatasetType` name or a `StorageClass` name will be used
233 depending on the supplied entity. Priority is given to a
234 `DatasetType` name. Supports instrument override if a
235 `DatasetRef` is provided configured with an ``instrument``
236 value for the data ID.
238 Returns
239 -------
240 matchKey : `LookupKey`
241 The key that resulted in the successful match.
242 template : `FileTemplate`
243 Template instance to use with that dataset type.
245 Raises
246 ------
247 KeyError
248 Raised if no template could be located for this Dataset type.
249 """
250 # Get the names to use for lookup
251 names = entity._lookupNames()
253 # Get a location from the templates
254 template = self.default
255 source = self.defaultKey
256 for name in names:
257 if name in self.templates:
258 template = self.templates[name]
259 source = name
260 break
262 if template is None:
263 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")
265 log.debug("Got file %s from %s via %s", template, entity, source)
267 return source, template
269 def getTemplate(self, entity: Union[DatasetType, DatasetRef, StorageClass]) -> FileTemplate:
270 """Retrieve the `FileTemplate` associated with the dataset type.
272 If the lookup name corresponds to a component the base name for
273 the component will be examined if the full component name does
274 not match.
276 Parameters
277 ----------
278 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
279 Instance to use to look for a corresponding template.
280 A `DatasetType` name or a `StorageClass` name will be used
281 depending on the supplied entity. Priority is given to a
282 `DatasetType` name. Supports instrument override if a
283 `DatasetRef` is provided configured with an ``instrument``
284 value for the data ID.
286 Returns
287 -------
288 template : `FileTemplate`
289 Template instance to use with that dataset type.
291 Raises
292 ------
293 KeyError
294 Raised if no template could be located for this Dataset type.
295 """
296 _, template = self.getTemplateWithMatch(entity)
297 return template
300class FileTemplate:
301 """Format a path template into a fully expanded path.
303 Parameters
304 ----------
305 template : `str`
306 Template string.
308 Raises
309 ------
310 FileTemplateValidationError
311 Raised if the template fails basic validation.
313 Notes
314 -----
315 The templates use the standard Format Specification Mini-Language
316 with the caveat that only named fields can be used. The field names
317 are taken from the Dimensions along with several additional fields:
319 - datasetType: `str`, `DatasetType.name`
320 - component: `str`, name of the StorageClass component
321 - run: `str`, name of the run this dataset was added with
323 `run` must always be provided to ensure unique paths.
325 More detailed information can be requested from dimensions by using a dot
326 notation, so ``visit.name`` would use the name of the visit and
327 ``detector.name_in_raft`` would use the name of the detector within the
328 raft.
330 The mini-language is extended to understand a "?" in the format
331 specification. This indicates that a field is optional. If that
332 Dimension is missing the field, along with the text before the field,
333 unless it is a path separator, will be removed from the output path.
335 By default any "/" in a dataId value will be replaced by "_" to prevent
336 unexpected directories being created in the path. If the "/" should be
337 retained then a special "/" format specifier can be included in the
338 template.
339 """
341 mandatoryFields = {"run"}
342 """A set of fields, one of which must be present in a template."""
344 datasetFields = {"datasetType", "component"}
345 """Fields related to the supplied dataset, not a dimension."""
347 specialFields = mandatoryFields | datasetFields
348 """Set of special fields that are available independently of the defined
349 Dimensions."""
351 def __init__(self, template: str):
352 if not isinstance(template, str):
353 raise FileTemplateValidationError(f"Template ('{template}') does "
354 "not contain any format specifiers")
355 self.template = template
357 # Do basic validation without access to dimensions
358 self.validateTemplate(None)
360 def __eq__(self, other: Any) -> bool:
361 if not isinstance(other, FileTemplate):
362 return False
364 return self.template == other.template
366 def __str__(self) -> str:
367 return self.template
369 def __repr__(self) -> str:
370 return f'{self.__class__.__name__}("{self.template}")'
372 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> Set[str]:
373 """Return the field names used in this template.
375 Parameters
376 ----------
377 optionals : `bool`
378 If `True`, optional fields are included in the returned set.
379 specials : `bool`
380 If `True`, non-dimension fields are included.
381 subfields : `bool`, optional
382 If `True`, fields with syntax ``a.b`` are included. If `False`,
383 the default, only ``a`` would be returned.
385 Returns
386 -------
387 names : `set`
388 Names of fields used in this template
390 Notes
391 -----
392 The returned set will include the special values such as `datasetType`
393 and `component`.
394 """
395 fmt = string.Formatter()
396 parts = fmt.parse(self.template)
398 names = set()
399 for literal, field_name, format_spec, conversion in parts:
400 if field_name is not None and format_spec is not None:
401 if "?" in format_spec and not optionals:
402 continue
404 if not specials and field_name in self.specialFields:
405 continue
407 if "." in field_name and not subfields:
408 field_name, _ = field_name.split(".")
410 names.add(field_name)
412 return names
414 def format(self, ref: DatasetRef) -> str:
415 """Format a template string into a full path.
417 Parameters
418 ----------
419 ref : `DatasetRef`
420 The dataset to be formatted.
422 Returns
423 -------
424 path : `str`
425 Expanded path.
427 Raises
428 ------
429 KeyError
430 Raised if the requested field is not defined and the field is
431 not optional. Or, `component` is specified but "component" was
432 not part of the template.
433 """
434 # Extract defined non-None dimensions from the dataId.
435 # This guards against Nones being explicitly present in the data ID
436 # (which can happen if, say, an exposure has no filter), as well as
437 # the case where only required dimensions are present (which in this
438 # context should only happen in unit tests; in general we need all
439 # dimensions to fill out templates).
440 fields = {k: ref.dataId.get(k) for k in ref.datasetType.dimensions.names
441 if ref.dataId.get(k) is not None}
442 # Extra information that can be included using . syntax
443 extras = {}
444 if isinstance(ref.dataId, DataCoordinate):
445 if ref.dataId.hasRecords():
446 extras = ref.dataId.records.byName()
447 skypix_alias = self._determine_skypix_alias(ref)
448 if skypix_alias is not None:
449 fields["skypix"] = fields[skypix_alias]
450 if extras:
451 extras["skypix"] = extras[skypix_alias]
453 datasetType = ref.datasetType
454 fields["datasetType"], component = datasetType.nameAndComponent()
456 usedComponent = False
457 if component is not None:
458 fields["component"] = component
460 usedRun = False
461 fields["run"] = ref.run
463 fmt = string.Formatter()
464 parts = fmt.parse(self.template)
465 output = ""
467 for literal, field_name, format_spec, conversion in parts:
469 if field_name == "component":
470 usedComponent = True
472 if format_spec is None:
473 output = output + literal
474 continue
476 # Should only happen if format_spec is None
477 if field_name is None:
478 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]")
480 if "?" in format_spec:
481 optional = True
482 # Remove the non-standard character from the spec
483 format_spec = format_spec.replace("?", "")
484 else:
485 optional = False
487 if field_name == "run":
488 usedRun = True
490 if field_name == "collection":
491 raise KeyError("'collection' is no longer supported as a "
492 "file template placeholder; use 'run' instead.")
494 # Check for request for additional information from the dataId
495 if "." in field_name:
496 primary, secondary = field_name.split(".")
497 if primary in extras:
498 record = extras[primary]
499 # Only fill in the fields if we have a value, the
500 # KeyError will trigger below if the attribute is missing.
501 if hasattr(record, secondary):
502 fields[field_name] = getattr(record, secondary)
504 if field_name in fields:
505 value = fields[field_name]
506 elif optional:
507 # If this is optional ignore the format spec
508 # and do not include the literal text prior to the optional
509 # field unless it contains a "/" path separator
510 format_spec = ""
511 value = ""
512 if "/" not in literal:
513 literal = ""
514 else:
515 raise KeyError(f"'{field_name}' requested in template via '{self.template}' "
516 "but not defined and not optional")
518 # Handle "/" in values since we do not want to be surprised by
519 # unexpected directories turning up
520 replace_slash = True
521 if "/" in format_spec:
522 # Remove the non-standard character from the spec
523 format_spec = format_spec.replace("/", "")
524 replace_slash = False
526 if isinstance(value, str):
527 if replace_slash:
528 value = value.replace("/", "_")
530 # Now use standard formatting
531 output = output + literal + format(value, format_spec)
533 # Replace periods with underscores in the non-directory part to
534 # prevent file extension confusion.
535 head, tail = os.path.split(output)
536 output = os.path.join(head, tail.replace(".", "_"))
538 # Complain if we were meant to use a component
539 if component is not None and not usedComponent:
540 raise KeyError("Component '{}' specified but template {} did not use it".format(component,
541 self.template))
543 # Complain if there's no run
544 if not usedRun:
545 raise KeyError("Template does not include 'run'.")
547 # Since this is known to be a path, normalize it in case some double
548 # slashes have crept in
549 path = os.path.normpath(output)
551 # It should not be an absolute path (may happen with optionals)
552 if os.path.isabs(path):
553 path = os.path.relpath(path, start="/")
555 return path
557 def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass, None]) -> None:
558 """Compare the template against a representative entity that would
559 like to use template.
561 Parameters
562 ----------
563 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
564 Entity to compare against template. If `None` is given only
565 very basic validation of templates will be performed.
567 Raises
568 ------
569 FileTemplateValidationError
570 Raised if the template is inconsistent with the supplied entity.
572 Notes
573 -----
574 Validation will always include a check that mandatory fields
575 are present and that at least one field refers to a dimension.
576 If the supplied entity includes a `DimensionGraph` then it will be
577 used to compare the available dimensions with those specified in the
578 template.
579 """
581 # Check that the template has run
582 withSpecials = self.fields(specials=True, optionals=True)
583 if not withSpecials & self.mandatoryFields:
584 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field"
585 f" from {self.mandatoryFields}")
587 # Check that there are some dimension fields in the template
588 allfields = self.fields(optionals=True)
589 if not allfields:
590 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields"
591 " corresponding to dimensions.")
593 # If we do not have dimensions available then all we can do is shrug
594 if not hasattr(entity, "dimensions"):
595 return
597 # Mypy does not know about hasattr so help it out
598 if entity is None:
599 return
601 # if this entity represents a component then insist that component
602 # is present in the template. If the entity is not a component
603 # make sure that component is not mandatory.
604 try:
605 # mypy does not see the except block so complains about
606 # StorageClass not supporting isComponent
607 if entity.isComponent(): # type: ignore
608 if "component" not in withSpecials:
609 raise FileTemplateValidationError(f"Template '{self}' has no component but "
610 f"{entity} refers to a component.")
611 else:
612 mandatorySpecials = self.fields(specials=True)
613 if "component" in mandatorySpecials:
614 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but "
615 f"{entity} does not refer to a component.")
616 except AttributeError:
617 pass
619 # From here on we need at least a DatasetType
620 # Mypy doesn't understand the AttributeError clause below
621 if isinstance(entity, StorageClass):
622 return
624 # Get the dimension links to get the full set of available field names
625 # Fall back to dataId keys if we have them but no links.
626 # dataId keys must still be present in the template
627 try:
628 minimal = set(entity.dimensions.required.names)
629 maximal = set(entity.dimensions.names)
630 except AttributeError:
631 try:
632 minimal = set(entity.dataId.keys()) # type: ignore
633 maximal = minimal
634 except AttributeError:
635 return
637 # Replace specific skypix dimensions with generic one
638 skypix_alias = self._determine_skypix_alias(entity)
639 if skypix_alias is not None:
640 minimal.add("skypix")
641 maximal.add("skypix")
642 minimal.remove(skypix_alias)
643 maximal.remove(skypix_alias)
645 required = self.fields(optionals=False)
647 # Calculate any field usage that does not match a dimension
648 if not required.issubset(maximal):
649 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
650 f" {required} is not a subset of {maximal}.")
652 if not allfields.issuperset(minimal):
653 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
654 f" {allfields} is not a superset of {minimal}.")
656 return
658 def _determine_skypix_alias(self, entity: Union[DatasetRef, DatasetType]) -> Optional[str]:
659 """Given a `DatasetRef` return the dimension name that refers to a sky
660 pixel.
662 Parameters
663 ----------
664 ref : `DatasetRef` or `DatasetType`
665 The entity to examine.
667 Returns
668 -------
669 alias : `str`
670 If there is a sky pixelization in the supplied dataId, return
671 its name, else returns `None`. Will return `None` also if there
672 is more than one sky pix dimension in the data ID or if the
673 dataID is not a `DataCoordinate`
674 """
675 alias = None
677 if isinstance(entity, DatasetRef):
678 entity = entity.datasetType
680 # If there is exactly one SkyPixDimension in the data ID, alias its
681 # value with the key "skypix", so we can use that to match any
682 # skypix dimension.
683 # We restrict this behavior to the (real-world) case where the
684 # data ID is a DataCoordinate, not just a dict. That should only
685 # not be true in some test code, but that test code is a pain to
686 # update to be more like the real world while still providing our
687 # only tests of important behavior.
688 skypix = [dimension for dimension in entity.dimensions
689 if isinstance(dimension, SkyPixDimension)]
690 if len(skypix) == 1:
691 alias = skypix[0].name
692 return alias