Coverage for python/lsst/daf/butler/core/fileTemplates.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for file template string expansion."""
26__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError")
28import os.path
29import string
30import logging
31from types import MappingProxyType
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Iterable,
37 Mapping,
38 Optional,
39 Set,
40 Tuple,
41 Union,
42)
44from .config import Config
45from .configSupport import processLookupConfigs, LookupKey
46from .exceptions import ValidationError
47from .dimensions import SkyPixDimension, DataCoordinate
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from .dimensions import DimensionUniverse
51 from .datasets import DatasetType, DatasetRef
52 from .storageClass import StorageClass
54log = logging.getLogger(__name__)
57class FileTemplateValidationError(ValidationError):
58 """Exception thrown when a file template is not consistent with the
59 associated `DatasetType`."""
60 pass
63class FileTemplatesConfig(Config):
64 """Configuration information for `FileTemplates`"""
65 pass
68class FileTemplates:
69 """Collection of `FileTemplate` templates.
71 Parameters
72 ----------
73 config : `FileTemplatesConfig` or `str`
74 Load configuration.
75 default : `str`, optional
76 If not `None`, a default template to use if no template has
77 been specified explicitly in the configuration.
78 universe : `DimensionUniverse`
79 The set of all known dimensions, used to normalize any lookup keys
80 involving dimensions.
82 Notes
83 -----
84 The configuration can include one level of hierarchy where an
85 instrument-specific section can be defined to override more general
86 template specifications. This is represented in YAML using a
87 key of form ``instrument<name>`` which can then define templates
88 that will be returned if a `DatasetRef` contains a matching instrument
89 name in the data ID.
91 A default fallback template can be specified using the key ``default``.
92 Defaulting can be disabled in a child configuration by defining the
93 value to be an empty string or a boolean `False`.
95 The config is parsed using the function
96 `~lsst.daf.butler.configSubset.processLookupConfigs`.
97 """
99 defaultKey = LookupKey("default")
100 """Configuration key associated with the default template."""
102 def __init__(self, config: Union[FileTemplatesConfig, str],
103 default: Optional[str] = None, *,
104 universe: DimensionUniverse):
105 self.config = FileTemplatesConfig(config)
106 self._templates = {}
108 contents = processLookupConfigs(self.config, universe=universe)
110 # Determine default to use -- defaults can be disabled if
111 # we get a False or None
112 defaultValue = contents.get(self.defaultKey, default)
113 if defaultValue and not isinstance(defaultValue, str):
114 raise RuntimeError("Default template value should be str or False, or None. "
115 f"Got '{defaultValue}'")
116 self.default = FileTemplate(defaultValue) if isinstance(defaultValue, str) and defaultValue else None
118 # Convert all the values to FileTemplate, handling defaults
119 for key, templateStr in contents.items():
120 if key == self.defaultKey:
121 continue
122 if not isinstance(templateStr, str):
123 raise RuntimeError(f"Unexpected value in file template key {key}: {templateStr}")
124 self._templates[key] = FileTemplate(templateStr)
126 @property
127 def templates(self) -> Mapping[LookupKey, FileTemplate]:
128 """Collection of templates indexed by lookup key (`dict`)."""
129 return MappingProxyType(self._templates)
131 def __contains__(self, key: LookupKey) -> bool:
132 """Indicates whether the supplied key is present in the templates.
134 Parameters
135 ----------
136 key : `LookupKey`
137 Key to use to determine if a corresponding value is present
138 in the templates.
140 Returns
141 -------
142 in : `bool`
143 `True` if the supplied key is present in the templates.
144 """
145 return key in self.templates
147 def __getitem__(self, key: LookupKey) -> FileTemplate:
148 return self.templates[key]
150 def validateTemplates(self, entities: Iterable[Union[DatasetType, DatasetRef, StorageClass]],
151 logFailures: bool = False) -> None:
152 """Retrieve the template associated with each dataset type and
153 validate the dimensions against the template.
155 Parameters
156 ----------
157 entities : `DatasetType`, `DatasetRef`, or `StorageClass`
158 Entities to validate against the matching templates. Can be
159 differing types.
160 logFailures : `bool`, optional
161 If `True`, output a log message for every validation error
162 detected.
164 Raises
165 ------
166 FileTemplateValidationError
167 Raised if an entity failed validation.
169 Notes
170 -----
171 See `FileTemplate.validateTemplate()` for details on the validation.
172 """
173 unmatchedKeys = set(self.templates)
174 failed = []
175 for entity in entities:
176 try:
177 matchKey, template = self.getTemplateWithMatch(entity)
178 except KeyError as e:
179 # KeyError always quotes on stringification so strip here
180 errMsg = str(e).strip('"\'')
181 failed.append(errMsg)
182 if logFailures:
183 log.fatal("%s", errMsg)
184 continue
186 if matchKey in unmatchedKeys:
187 unmatchedKeys.remove(matchKey)
189 try:
190 template.validateTemplate(entity)
191 except FileTemplateValidationError as e:
192 failed.append(f"{e} (via key '{matchKey}')")
193 if logFailures:
194 log.fatal("Template failure with key '%s': %s", matchKey, e)
196 if logFailures and unmatchedKeys:
197 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys]))
199 if failed:
200 if len(failed) == 1:
201 msg = str(failed[0])
202 else:
203 failMsg = ";\n".join(failed)
204 msg = f"{len(failed)} template validation failures: {failMsg}"
205 raise FileTemplateValidationError(msg)
207 def getLookupKeys(self) -> Set[LookupKey]:
208 """Retrieve the look up keys for all the template entries.
210 Returns
211 -------
212 keys : `set` of `LookupKey`
213 The keys available for matching a template.
214 """
215 return set(self.templates)
217 def getTemplateWithMatch(self, entity: Union[DatasetRef,
218 DatasetType, StorageClass]) -> Tuple[LookupKey,
219 FileTemplate]:
220 """Retrieve the `FileTemplate` associated with the dataset type along
221 with the lookup key that was a match for this template.
223 If the lookup name corresponds to a component the base name for
224 the component will be examined if the full component name does
225 not match.
227 Parameters
228 ----------
229 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
230 Instance to use to look for a corresponding template.
231 A `DatasetType` name or a `StorageClass` name will be used
232 depending on the supplied entity. Priority is given to a
233 `DatasetType` name. Supports instrument override if a
234 `DatasetRef` is provided configured with an ``instrument``
235 value for the data ID.
237 Returns
238 -------
239 matchKey : `LookupKey`
240 The key that resulted in the successful match.
241 template : `FileTemplate`
242 Template instance to use with that dataset type.
244 Raises
245 ------
246 KeyError
247 Raised if no template could be located for this Dataset type.
248 """
249 # Get the names to use for lookup
250 names = entity._lookupNames()
252 # Get a location from the templates
253 template = self.default
254 source = self.defaultKey
255 for name in names:
256 if name in self.templates:
257 template = self.templates[name]
258 source = name
259 break
261 if template is None:
262 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")
264 log.debug("Got file %s from %s via %s", template, entity, source)
266 return source, template
268 def getTemplate(self, entity: Union[DatasetType, DatasetRef, StorageClass]) -> FileTemplate:
269 """Retrieve the `FileTemplate` associated with the dataset type.
271 If the lookup name corresponds to a component the base name for
272 the component will be examined if the full component name does
273 not match.
275 Parameters
276 ----------
277 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
278 Instance to use to look for a corresponding template.
279 A `DatasetType` name or a `StorageClass` name will be used
280 depending on the supplied entity. Priority is given to a
281 `DatasetType` name. Supports instrument override if a
282 `DatasetRef` is provided configured with an ``instrument``
283 value for the data ID.
285 Returns
286 -------
287 template : `FileTemplate`
288 Template instance to use with that dataset type.
290 Raises
291 ------
292 KeyError
293 Raised if no template could be located for this Dataset type.
294 """
295 _, template = self.getTemplateWithMatch(entity)
296 return template
299class FileTemplate:
300 """Format a path template into a fully expanded path.
302 Parameters
303 ----------
304 template : `str`
305 Template string.
307 Raises
308 ------
309 FileTemplateValidationError
310 Raised if the template fails basic validation.
312 Notes
313 -----
314 The templates use the standard Format Specification Mini-Language
315 with the caveat that only named fields can be used. The field names
316 are taken from the Dimensions along with several additional fields:
318 - datasetType: `str`, `DatasetType.name`
319 - component: `str`, name of the StorageClass component
320 - run: `str`, name of the run this dataset was added with
322 `run` must always be provided to ensure unique paths.
324 More detailed information can be requested from dimensions by using a dot
325 notation, so ``visit.name`` would use the name of the visit and
326 ``detector.name_in_raft`` would use the name of the detector within the
327 raft.
329 The mini-language is extended to understand a "?" in the format
330 specification. This indicates that a field is optional. If that
331 Dimension is missing the field, along with the text before the field,
332 unless it is a path separator, will be removed from the output path.
334 By default any "/" in a dataId value will be replaced by "_" to prevent
335 unexpected directories being created in the path. If the "/" should be
336 retained then a special "/" format specifier can be included in the
337 template.
338 """
340 mandatoryFields = {"run"}
341 """A set of fields, one of which must be present in a template."""
343 datasetFields = {"datasetType", "component"}
344 """Fields related to the supplied dataset, not a dimension."""
346 specialFields = mandatoryFields | datasetFields
347 """Set of special fields that are available independently of the defined
348 Dimensions."""
350 def __init__(self, template: str):
351 if not isinstance(template, str):
352 raise FileTemplateValidationError(f"Template ('{template}') does "
353 "not contain any format specifiers")
354 self.template = template
356 # Do basic validation without access to dimensions
357 self.validateTemplate(None)
359 def __eq__(self, other: Any) -> bool:
360 if not isinstance(other, FileTemplate):
361 return False
363 return self.template == other.template
365 def __str__(self) -> str:
366 return self.template
368 def __repr__(self) -> str:
369 return f'{self.__class__.__name__}("{self.template}")'
371 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> Set[str]:
372 """Return the field names used in this template.
374 Parameters
375 ----------
376 optionals : `bool`
377 If `True`, optional fields are included in the returned set.
378 specials : `bool`
379 If `True`, non-dimension fields are included.
380 subfields : `bool`, optional
381 If `True`, fields with syntax ``a.b`` are included. If `False`,
382 the default, only ``a`` would be returned.
384 Returns
385 -------
386 names : `set`
387 Names of fields used in this template
389 Notes
390 -----
391 The returned set will include the special values such as `datasetType`
392 and `component`.
393 """
394 fmt = string.Formatter()
395 parts = fmt.parse(self.template)
397 names = set()
398 for literal, field_name, format_spec, conversion in parts:
399 if field_name is not None and format_spec is not None:
400 if "?" in format_spec and not optionals:
401 continue
403 if not specials and field_name in self.specialFields:
404 continue
406 if "." in field_name and not subfields:
407 field_name, _ = field_name.split(".")
409 names.add(field_name)
411 return names
413 def format(self, ref: DatasetRef) -> str:
414 """Format a template string into a full path.
416 Parameters
417 ----------
418 ref : `DatasetRef`
419 The dataset to be formatted.
421 Returns
422 -------
423 path : `str`
424 Expanded path.
426 Raises
427 ------
428 KeyError
429 Raised if the requested field is not defined and the field is
430 not optional. Or, `component` is specified but "component" was
431 not part of the template.
432 """
433 # Extract defined non-None dimensions from the dataId.
434 # This guards against Nones being explicitly present in the data ID
435 # (which can happen if, say, an exposure has no filter), as well as
436 # the case where only required dimensions are present (which in this
437 # context should only happen in unit tests; in general we need all
438 # dimensions to fill out templates).
439 fields = {k: ref.dataId.get(k) for k in ref.datasetType.dimensions.names
440 if ref.dataId.get(k) is not None}
441 # Extra information that can be included using . syntax
442 extras = {}
443 if isinstance(ref.dataId, DataCoordinate):
444 if ref.dataId.hasRecords():
445 extras = ref.dataId.records.byName()
446 # If there is exactly one SkyPixDimension in the data ID, alias its
447 # value with the key "skypix", so we can use that to match any
448 # skypix dimension.
449 # We restrict this behavior to the (real-world) case where the
450 # data ID is a DataCoordinate, not just a dict. That should only
451 # not be true in some test code, but that test code is a pain to
452 # update to be more like the real world while still providing our
453 # only tests of important behavior.
454 skypix = [dimension for dimension in ref.datasetType.dimensions
455 if isinstance(dimension, SkyPixDimension)]
456 if len(skypix) == 1:
457 fields["skypix"] = fields[skypix[0].name]
458 if extras:
459 extras["skypix"] = extras[skypix[0].name]
461 datasetType = ref.datasetType
462 fields["datasetType"], component = datasetType.nameAndComponent()
464 usedComponent = False
465 if component is not None:
466 fields["component"] = component
468 usedRun = False
469 fields["run"] = ref.run
471 fmt = string.Formatter()
472 parts = fmt.parse(self.template)
473 output = ""
475 for literal, field_name, format_spec, conversion in parts:
477 if field_name == "component":
478 usedComponent = True
480 if format_spec is None:
481 output = output + literal
482 continue
484 # Should only happen if format_spec is None
485 if field_name is None:
486 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]")
488 if "?" in format_spec:
489 optional = True
490 # Remove the non-standard character from the spec
491 format_spec = format_spec.replace("?", "")
492 else:
493 optional = False
495 if field_name == "run":
496 usedRun = True
498 if field_name == "collection":
499 raise KeyError("'collection' is no longer supported as a "
500 "file template placeholder; use 'run' instead.")
502 # Check for request for additional information from the dataId
503 if "." in field_name:
504 primary, secondary = field_name.split(".")
505 if primary in extras:
506 record = extras[primary]
507 # Only fill in the fields if we have a value, the
508 # KeyError will trigger below if the attribute is missing.
509 if hasattr(record, secondary):
510 fields[field_name] = getattr(record, secondary)
512 if field_name in fields:
513 value = fields[field_name]
514 elif optional:
515 # If this is optional ignore the format spec
516 # and do not include the literal text prior to the optional
517 # field unless it contains a "/" path separator
518 format_spec = ""
519 value = ""
520 if "/" not in literal:
521 literal = ""
522 else:
523 raise KeyError(f"'{field_name}' requested in template via '{self.template}' "
524 "but not defined and not optional")
526 # Handle "/" in values since we do not want to be surprised by
527 # unexpected directories turning up
528 replace_slash = True
529 if "/" in format_spec:
530 # Remove the non-standard character from the spec
531 format_spec = format_spec.replace("/", "")
532 replace_slash = False
534 if isinstance(value, str):
535 if replace_slash:
536 value = value.replace("/", "_")
538 # Now use standard formatting
539 output = output + literal + format(value, format_spec)
541 # Replace periods with underscores in the non-directory part to
542 # prevent file extension confusion.
543 head, tail = os.path.split(output)
544 output = os.path.join(head, tail.replace(".", "_"))
546 # Complain if we were meant to use a component
547 if component is not None and not usedComponent:
548 raise KeyError("Component '{}' specified but template {} did not use it".format(component,
549 self.template))
551 # Complain if there's no run
552 if not usedRun:
553 raise KeyError("Template does not include 'run'.")
555 # Since this is known to be a path, normalize it in case some double
556 # slashes have crept in
557 path = os.path.normpath(output)
559 # It should not be an absolute path (may happen with optionals)
560 if os.path.isabs(path):
561 path = os.path.relpath(path, start="/")
563 return path
565 def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass, None]) -> None:
566 """Compare the template against a representative entity that would
567 like to use template.
569 Parameters
570 ----------
571 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
572 Entity to compare against template. If `None` is given only
573 very basic validation of templates will be performed.
575 Raises
576 ------
577 FileTemplateValidationError
578 Raised if the template is inconsistent with the supplied entity.
580 Notes
581 -----
582 Validation will always include a check that mandatory fields
583 are present and that at least one field refers to a dimension.
584 If the supplied entity includes a `DimensionGraph` then it will be
585 used to compare the available dimensions with those specified in the
586 template.
587 """
589 # Check that the template has run
590 withSpecials = self.fields(specials=True, optionals=True)
591 if not withSpecials & self.mandatoryFields:
592 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field"
593 f" from {self.mandatoryFields}")
595 # Check that there are some dimension fields in the template
596 allfields = self.fields(optionals=True)
597 if not allfields:
598 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields"
599 " corresponding to dimensions.")
601 # If we do not have dimensions available then all we can do is shrug
602 if not hasattr(entity, "dimensions"):
603 return
605 # if this entity represents a component then insist that component
606 # is present in the template. If the entity is not a component
607 # make sure that component is not mandatory.
608 try:
609 # mypy does not see the except block so complains about
610 # StorageClass not supporting isComponent
611 if entity.isComponent(): # type: ignore
612 if "component" not in withSpecials:
613 raise FileTemplateValidationError(f"Template '{self}' has no component but "
614 f"{entity} refers to a component.")
615 else:
616 mandatorySpecials = self.fields(specials=True)
617 if "component" in mandatorySpecials:
618 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but "
619 f"{entity} does not refer to a component.")
620 except AttributeError:
621 pass
623 # Get the dimension links to get the full set of available field names
624 # Fall back to dataId keys if we have them but no links.
625 # dataId keys must still be present in the template
626 # Ignore warnings from mypy concerning StorageClass and DatasetType
627 # not supporting the full API.
628 try:
629 minimal = set(entity.dimensions.required.names) # type: ignore
630 maximal = set(entity.dimensions.names) # type: ignore
631 except AttributeError:
632 try:
633 minimal = set(entity.dataId.keys()) # type: ignore
634 maximal = minimal
635 except AttributeError:
636 return
638 required = self.fields(optionals=False)
640 # Calculate any field usage that does not match a dimension
641 if not required.issubset(maximal):
642 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
643 f" {required} is not a subset of {maximal}.")
645 if not allfields.issuperset(minimal):
646 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:"
647 f" {allfields} is not a superset of {minimal}.")
649 return