Coverage for python/lsst/daf/butler/core/fileTemplates.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for file template string expansion."""
26__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError")
28import os.path
29import string
30import logging
31from types import MappingProxyType
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Iterable,
37 Mapping,
38 Optional,
39 Set,
40 Tuple,
41 Union,
42)
44from .config import Config
45from .configSupport import processLookupConfigs, LookupKey
46from .exceptions import ValidationError
47from .dimensions import SkyPixDimension, DataCoordinate
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from .dimensions import DimensionUniverse
51 from .datasets import DatasetType, DatasetRef
52 from .storageClass import StorageClass
54log = logging.getLogger(__name__)
57class FileTemplateValidationError(ValidationError):
58 """Exception thrown when a file template is not consistent with the
59 associated `DatasetType`."""
60 pass
63class FileTemplatesConfig(Config):
64 """Configuration information for `FileTemplates`"""
65 pass
68class FileTemplates:
69 """Collection of `FileTemplate` templates.
71 Parameters
72 ----------
73 config : `FileTemplatesConfig` or `str`
74 Load configuration.
75 default : `str`, optional
76 If not `None`, a default template to use if no template has
77 been specified explicitly in the configuration.
78 universe : `DimensionUniverse`
79 The set of all known dimensions, used to normalize any lookup keys
80 involving dimensions.
82 Notes
83 -----
84 The configuration can include one level of hierarchy where an
85 instrument-specific section can be defined to override more general
86 template specifications. This is represented in YAML using a
87 key of form ``instrument<name>`` which can then define templates
88 that will be returned if a `DatasetRef` contains a matching instrument
89 name in the data ID.
91 A default fallback template can be specified using the key ``default``.
92 Defaulting can be disabled in a child configuration by defining the
93 value to be an empty string or a boolean `False`.
95 The config is parsed using the function
96 `~lsst.daf.butler.configSubset.processLookupConfigs`.
97 """
99 defaultKey = LookupKey("default")
100 """Configuration key associated with the default template."""
102 def __init__(self, config: Union[FileTemplatesConfig, str],
103 default: Optional[str] = None, *,
104 universe: DimensionUniverse):
105 self.config = FileTemplatesConfig(config)
106 self._templates = {}
107 self.default = FileTemplate(default) if default is not None else None
108 contents = processLookupConfigs(self.config, universe=universe)
110 # Convert all the values to FileTemplate, handling defaults
111 for key, templateStr in contents.items():
112 if key == self.defaultKey:
113 if not templateStr:
114 self.default = None
115 else:
116 self.default = FileTemplate(templateStr)
117 else:
118 self._templates[key] = FileTemplate(templateStr)
120 @property
121 def templates(self) -> Mapping[LookupKey, FileTemplate]:
122 """Collection of templates indexed by lookup key (`dict`)."""
123 return MappingProxyType(self._templates)
125 def __contains__(self, key: LookupKey) -> bool:
126 """Indicates whether the supplied key is present in the templates.
128 Parameters
129 ----------
130 key : `LookupKey`
131 Key to use to determine if a corresponding value is present
132 in the templates.
134 Returns
135 -------
136 in : `bool`
137 `True` if the supplied key is present in the templates.
138 """
139 return key in self.templates
141 def __getitem__(self, key: LookupKey) -> FileTemplate:
142 return self.templates[key]
144 def validateTemplates(self, entities: Iterable[Union[DatasetType, DatasetRef, StorageClass]],
145 logFailures: bool = False) -> None:
146 """Retrieve the template associated with each dataset type and
147 validate the dimensions against the template.
149 Parameters
150 ----------
151 entities : `DatasetType`, `DatasetRef`, or `StorageClass`
152 Entities to validate against the matching templates. Can be
153 differing types.
154 logFailures : `bool`, optional
155 If `True`, output a log message for every validation error
156 detected.
158 Raises
159 ------
160 FileTemplateValidationError
161 Raised if an entity failed validation.
163 Notes
164 -----
165 See `FileTemplate.validateTemplate()` for details on the validation.
166 """
167 unmatchedKeys = set(self.templates)
168 failed = []
169 for entity in entities:
170 try:
171 matchKey, template = self.getTemplateWithMatch(entity)
172 except KeyError as e:
173 # KeyError always quotes on stringification so strip here
174 errMsg = str(e).strip('"\'')
175 failed.append(errMsg)
176 if logFailures:
177 log.fatal("%s", errMsg)
178 continue
180 if matchKey in unmatchedKeys:
181 unmatchedKeys.remove(matchKey)
183 try:
184 template.validateTemplate(entity)
185 except FileTemplateValidationError as e:
186 failed.append(f"{e} (via key '{matchKey}')")
187 if logFailures:
188 log.fatal("Template failure with key '%s': %s", matchKey, e)
190 if logFailures and unmatchedKeys:
191 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys]))
193 if failed:
194 if len(failed) == 1:
195 msg = str(failed[0])
196 else:
197 failMsg = ";\n".join(failed)
198 msg = f"{len(failed)} template validation failures: {failMsg}"
199 raise FileTemplateValidationError(msg)
201 def getLookupKeys(self) -> Set[LookupKey]:
202 """Retrieve the look up keys for all the template entries.
204 Returns
205 -------
206 keys : `set` of `LookupKey`
207 The keys available for matching a template.
208 """
209 return set(self.templates)
211 def getTemplateWithMatch(self, entity: Union[DatasetRef,
212 DatasetType, StorageClass]) -> Tuple[LookupKey,
213 FileTemplate]:
214 """Retrieve the `FileTemplate` associated with the dataset type along
215 with the lookup key that was a match for this template.
217 If the lookup name corresponds to a component the base name for
218 the component will be examined if the full component name does
219 not match.
221 Parameters
222 ----------
223 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
224 Instance to use to look for a corresponding template.
225 A `DatasetType` name or a `StorageClass` name will be used
226 depending on the supplied entity. Priority is given to a
227 `DatasetType` name. Supports instrument override if a
228 `DatasetRef` is provided configured with an ``instrument``
229 value for the data ID.
231 Returns
232 -------
233 matchKey : `LookupKey`
234 The key that resulted in the successful match.
235 template : `FileTemplate`
236 Template instance to use with that dataset type.
238 Raises
239 ------
240 KeyError
241 Raised if no template could be located for this Dataset type.
242 """
243 # Get the names to use for lookup
244 names = entity._lookupNames()
246 # Get a location from the templates
247 template = self.default
248 source = self.defaultKey
249 for name in names:
250 if name in self.templates:
251 template = self.templates[name]
252 source = name
253 break
255 if template is None:
256 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")
258 log.debug("Got file %s from %s via %s", template, entity, source)
260 return source, template
262 def getTemplate(self, entity: Union[DatasetType, DatasetRef, StorageClass]) -> FileTemplate:
263 """Retrieve the `FileTemplate` associated with the dataset type.
265 If the lookup name corresponds to a component the base name for
266 the component will be examined if the full component name does
267 not match.
269 Parameters
270 ----------
271 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
272 Instance to use to look for a corresponding template.
273 A `DatasetType` name or a `StorageClass` name will be used
274 depending on the supplied entity. Priority is given to a
275 `DatasetType` name. Supports instrument override if a
276 `DatasetRef` is provided configured with an ``instrument``
277 value for the data ID.
279 Returns
280 -------
281 template : `FileTemplate`
282 Template instance to use with that dataset type.
284 Raises
285 ------
286 KeyError
287 Raised if no template could be located for this Dataset type.
288 """
289 _, template = self.getTemplateWithMatch(entity)
290 return template
293class FileTemplate:
294 """Format a path template into a fully expanded path.
296 Parameters
297 ----------
298 template : `str`
299 Template string.
301 Raises
302 ------
303 FileTemplateValidationError
304 Raised if the template fails basic validation.
306 Notes
307 -----
308 The templates use the standard Format Specification Mini-Language
309 with the caveat that only named fields can be used. The field names
310 are taken from the Dimensions along with several additional fields:
312 - datasetType: `str`, `DatasetType.name`
313 - component: `str`, name of the StorageClass component
314 - run: `str`, name of the run this dataset was added with
316 `run` must always be provided to ensure unique paths.
318 More detailed information can be requested from dimensions by using a dot
319 notation, so ``visit.name`` would use the name of the visit and
320 ``detector.name_in_raft`` would use the name of the detector within the
321 raft.
323 The mini-language is extended to understand a "?" in the format
324 specification. This indicates that a field is optional. If that
325 Dimension is missing the field, along with the text before the field,
326 unless it is a path separator, will be removed from the output path.
328 By default any "/" in a dataId value will be replaced by "_" to prevent
329 unexpected directories being created in the path. If the "/" should be
330 retained then a special "/" format specifier can be included in the
331 template.
332 """
334 mandatoryFields = {"run"}
335 """A set of fields, one of which must be present in a template."""
337 datasetFields = {"datasetType", "component"}
338 """Fields related to the supplied dataset, not a dimension."""
340 specialFields = mandatoryFields | datasetFields
341 """Set of special fields that are available independently of the defined
342 Dimensions."""
344 def __init__(self, template: str):
345 if not isinstance(template, str):
346 raise FileTemplateValidationError(f"Template ('{template}') does "
347 "not contain any format specifiers")
348 self.template = template
350 # Do basic validation without access to dimensions
351 self.validateTemplate(None)
353 def __eq__(self, other: Any) -> bool:
354 if not isinstance(other, FileTemplate):
355 return False
357 return self.template == other.template
359 def __str__(self) -> str:
360 return self.template
362 def __repr__(self) -> str:
363 return f'{self.__class__.__name__}("{self.template}")'
365 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> Set[str]:
366 """Return the field names used in this template.
368 Parameters
369 ----------
370 optionals : `bool`
371 If `True`, optional fields are included in the returned set.
372 specials : `bool`
373 If `True`, non-dimension fields are included.
374 subfields : `bool`, optional
375 If `True`, fields with syntax ``a.b`` are included. If `False`,
376 the default, only ``a`` would be returned.
378 Returns
379 -------
380 names : `set`
381 Names of fields used in this template
383 Notes
384 -----
385 The returned set will include the special values such as `datasetType`
386 and `component`.
387 """
388 fmt = string.Formatter()
389 parts = fmt.parse(self.template)
391 names = set()
392 for literal, field_name, format_spec, conversion in parts:
393 if field_name is not None and format_spec is not None:
394 if "?" in format_spec and not optionals:
395 continue
397 if not specials and field_name in self.specialFields:
398 continue
400 if "." in field_name and not subfields:
401 field_name, _ = field_name.split(".")
403 names.add(field_name)
405 return names
407 def format(self, ref: DatasetRef) -> str:
408 """Format a template string into a full path.
410 Parameters
411 ----------
412 ref : `DatasetRef`
413 The dataset to be formatted.
415 Returns
416 -------
417 path : `str`
418 Expanded path.
420 Raises
421 ------
422 KeyError
423 Raised if the requested field is not defined and the field is
424 not optional. Or, `component` is specified but "component" was
425 not part of the template.
426 """
427 # Extract defined non-None dimensions from the dataId
428 # We attempt to get the "full" dict on the assumption that ref.dataId
429 # is a ExpandedDataCoordinate, as it should be when running
430 # PipelineTasks. We should probably just require that when formatting
431 # templates (and possibly when constructing DatasetRefs), but doing so
432 # would break a ton of otherwise-useful tests that would need to be
433 # modified to provide a lot more metadata.
434 fields = {k: v for k, v in getattr(ref.dataId, "full", ref.dataId).items() if v is not None}
436 if isinstance(ref.dataId, DataCoordinate):
437 # If there is exactly one SkyPixDimension in the data ID, alias its
438 # value with the key "skypix", so we can use that to match any
439 # skypix dimension.
440 # We restrict this behavior to the (real-world) case where the
441 # data ID is a DataCoordinate, not just a dict. That should only
442 # not be true in some test code, but that test code is a pain to
443 # update to be more like the real world while still providing our
444 # only tests of important behavior.
445 skypix = [dimension for dimension in ref.dataId.graph if isinstance(dimension, SkyPixDimension)]
446 if len(skypix) == 1:
447 fields["skypix"] = fields[skypix[0]]
449 # Extra information that can be included using . syntax
450 extras = getattr(ref.dataId, "records", {})
452 datasetType = ref.datasetType
453 fields["datasetType"], component = datasetType.nameAndComponent()
455 usedComponent = False
456 if component is not None:
457 fields["component"] = component
459 usedRun = False
460 fields["run"] = ref.run
462 fmt = string.Formatter()
463 parts = fmt.parse(self.template)
464 output = ""
466 for literal, field_name, format_spec, conversion in parts:
468 if field_name == "component":
469 usedComponent = True
471 if format_spec is None:
472 output = output + literal
473 continue
475 # Should only happen if format_spec is None
476 if field_name is None:
477 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]")
479 if "?" in format_spec:
480 optional = True
481 # Remove the non-standard character from the spec
482 format_spec = format_spec.replace("?", "")
483 else:
484 optional = False
486 if field_name == "run":
487 usedRun = True
489 if field_name == "collection":
490 raise KeyError("'collection' is no longer supported as a "
491 "file template placeholder; use 'run' instead.")
493 # Check for request for additional information from the dataId
494 if "." in field_name:
495 primary, secondary = field_name.split(".")
496 if primary in extras:
497 record = extras[primary]
498 # Only fill in the fields if we have a value, the
499 # KeyError will trigger below if the attribute is missing.
500 if hasattr(record, secondary):
501 fields[field_name] = getattr(record, secondary)
503 if field_name in fields:
504 value = fields[field_name]
505 elif optional:
506 # If this is optional ignore the format spec
507 # and do not include the literal text prior to the optional
508 # field unless it contains a "/" path separator
509 format_spec = ""
510 value = ""
511 if "/" not in literal:
512 literal = ""
513 else:
514 raise KeyError(f"'{field_name}' requested in template via '{self.template}' "
515 "but not defined and not optional")
517 # Handle "/" in values since we do not want to be surprised by
518 # unexpected directories turning up
519 replace_slash = True
520 if "/" in format_spec:
521 # Remove the non-standard character from the spec
522 format_spec = format_spec.replace("/", "")
523 replace_slash = False
525 if isinstance(value, str):
526 if replace_slash:
527 value = value.replace("/", "_")
529 # Now use standard formatting
530 output = output + literal + format(value, format_spec)
532 # Replace periods with underscores in the non-directory part to
533 # prevent file extension confusion.
534 head, tail = os.path.split(output)
535 output = os.path.join(head, tail.replace(".", "_"))
537 # Complain if we were meant to use a component
538 if component is not None and not usedComponent:
539 raise KeyError("Component '{}' specified but template {} did not use it".format(component,
540 self.template))
542 # Complain if there's no run
543 if not usedRun:
544 raise KeyError("Template does not include 'run'.")
546 # Since this is known to be a path, normalize it in case some double
547 # slashes have crept in
548 path = os.path.normpath(output)
550 # It should not be an absolute path (may happen with optionals)
551 if os.path.isabs(path):
552 path = os.path.relpath(path, start="/")
554 return path
556 def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
557 """Compare the template against a representative entity that would
558 like to use template.
560 Parameters
561 ----------
562 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
563 Entity to compare against template.
565 Raises
566 ------
567 FileTemplateValidationError
568 Raised if the template is inconsistent with the supplied entity.
570 Notes
571 -----
572 Validation will always include a check that mandatory fields
573 are present and that at least one field refers to a dimension.
574 If the supplied entity includes a `DimensionGraph` then it will be
575 used to compare the available dimensions with those specified in the
576 template.
577 """
579 # Check that the template has run
580 withSpecials = self.fields(specials=True, optionals=True)
581 if not withSpecials & self.mandatoryFields:
582 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field"
583 f" from {self.mandatoryFields}")
585 # Check that there are some dimension fields in the template
586 allfields = self.fields(optionals=True)
587 if not allfields:
588 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields"
589 " corresponding to dimensions.")
591 # If we do not have dimensions available then all we can do is shrug
592 if not hasattr(entity, "dimensions"):
593 return
595 # if this entity represents a component then insist that component
596 # is present in the template. If the entity is not a component
597 # make sure that component is not mandatory.
598 try:
599 if entity.isComponent():
600 if "component" not in withSpecials:
601 raise FileTemplateValidationError(f"Template '{self}' has no component but "
602 f"{entity} refers to a component.")
603 else:
604 mandatorySpecials = self.fields(specials=True)
605 if "component" in mandatorySpecials:
606 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but "
607 f"{entity} does not refer to a component.")
608 except AttributeError:
609 pass
611 # Get the dimension links to get the full set of available field names
612 # Fall back to dataId keys if we have them but no links.
613 # dataId keys must still be present in the template
614 try:
615 minimal = set(entity.dimensions.required.names)
616 maximal = set(entity.dimensions.names)
617 except AttributeError:
618 try:
619 minimal = set(entity.dataId.keys())
620 maximal = minimal
621 except AttributeError:
622 return
624 required = self.fields(optionals=False)
626 # Calculate any field usage that does not match a dimension
627 if not required.issubset(maximal):
628 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
629 f" {required} is not a subset of {maximal}.")
631 if not allfields.issuperset(minimal):
632 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
633 f" {allfields} is not a superset of {minimal}.")
635 return