Coverage for python/lsst/daf/butler/datastore/file_templates.py: 11%
316 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Support for file template string expansion."""
30from __future__ import annotations
32__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError")
34import logging
35import os.path
36import string
37from collections.abc import Iterable, Mapping
38from types import MappingProxyType
39from typing import TYPE_CHECKING, Any, TypedDict, cast
41from .._config import Config
42from .._config_support import LookupKey, processLookupConfigs
43from .._dataset_ref import DatasetId, DatasetRef
44from .._exceptions import ValidationError
45from .._storage_class import StorageClass
46from ..dimensions import DataCoordinate, DimensionGraph, DimensionGroup
48if TYPE_CHECKING:
49 from .._dataset_type import DatasetType
50 from ..dimensions import DimensionRecord, DimensionUniverse
52log = logging.getLogger(__name__)
55class FileTemplateValidationError(ValidationError):
56 """Exception for file template inconsistent with associated DatasetType."""
58 pass
61class FileTemplatesConfig(Config):
62 """Configuration information for `FileTemplates`."""
64 pass
67class FieldDict(TypedDict):
68 """Dictionary containing the grouped fields from a template."""
70 standard: set[str]
71 special: set[str]
72 subfield: set[str]
73 parent: set[str]
76class FileTemplates:
77 """Collection of `FileTemplate` templates.
79 Parameters
80 ----------
81 config : `FileTemplatesConfig` or `str`
82 Load configuration.
83 default : `str`, optional
84 If not `None`, a default template to use if no template has
85 been specified explicitly in the configuration.
86 universe : `DimensionUniverse`
87 The set of all known dimensions, used to normalize any lookup keys
88 involving dimensions.
90 Notes
91 -----
92 The configuration can include one level of hierarchy where an
93 instrument-specific section can be defined to override more general
94 template specifications. This is represented in YAML using a
95 key of form ``instrument<name>`` which can then define templates
96 that will be returned if a `DatasetRef` contains a matching instrument
97 name in the data ID.
99 A default fallback template can be specified using the key ``default``.
100 Defaulting can be disabled in a child configuration by defining the
101 value to be an empty string or a boolean `False`.
103 The config is parsed using the function
104 `~lsst.daf.butler.configSubset.processLookupConfigs`.
105 """
107 defaultKey = LookupKey("default")
108 """Configuration key associated with the default template."""
110 def __init__(
111 self,
112 config: FileTemplatesConfig | str,
113 default: str | None = None,
114 *,
115 universe: DimensionUniverse,
116 ):
117 self.config = FileTemplatesConfig(config)
118 self._templates = {}
120 contents = processLookupConfigs(self.config, universe=universe)
122 # Determine default to use -- defaults can be disabled if
123 # we get a False or None
124 defaultValue = contents.get(self.defaultKey, default)
125 if defaultValue and not isinstance(defaultValue, str):
126 raise RuntimeError(
127 f"Default template value should be str or False, or None. Got '{defaultValue}'"
128 )
129 self.default = FileTemplate(defaultValue) if isinstance(defaultValue, str) and defaultValue else None
131 # Convert all the values to FileTemplate, handling defaults
132 for key, templateStr in contents.items():
133 if key == self.defaultKey:
134 continue
135 if not isinstance(templateStr, str):
136 raise RuntimeError(f"Unexpected value in file template key {key}: {templateStr}")
137 self._templates[key] = FileTemplate(templateStr)
139 @property
140 def templates(self) -> Mapping[LookupKey, FileTemplate]:
141 """Return collection of templates indexed by lookup key (`dict`)."""
142 return MappingProxyType(self._templates)
144 def __contains__(self, key: LookupKey) -> bool:
145 """Indicate whether the supplied key is present in the templates.
147 Parameters
148 ----------
149 key : `LookupKey`
150 Key to use to determine if a corresponding value is present
151 in the templates.
153 Returns
154 -------
155 in : `bool`
156 `True` if the supplied key is present in the templates.
157 """
158 return key in self.templates
160 def __getitem__(self, key: LookupKey) -> FileTemplate:
161 return self.templates[key]
163 def validateTemplates(
164 self, entities: Iterable[DatasetType | DatasetRef | StorageClass], logFailures: bool = False
165 ) -> None:
166 """Validate the templates.
168 Retrieves the template associated with each dataset type and
169 validates the dimensions against the template.
171 Parameters
172 ----------
173 entities : `DatasetType`, `DatasetRef`, or `StorageClass`
174 Entities to validate against the matching templates. Can be
175 differing types.
176 logFailures : `bool`, optional
177 If `True`, output a log message for every validation error
178 detected.
180 Raises
181 ------
182 FileTemplateValidationError
183 Raised if an entity failed validation.
185 Notes
186 -----
187 See `FileTemplate.validateTemplate()` for details on the validation.
188 """
189 unmatchedKeys = set(self.templates)
190 failed = []
191 for entity in entities:
192 try:
193 matchKey, template = self.getTemplateWithMatch(entity)
194 except KeyError as e:
195 # KeyError always quotes on stringification so strip here
196 errMsg = str(e).strip("\"'")
197 failed.append(errMsg)
198 if logFailures:
199 log.critical("%s", errMsg)
200 continue
202 if matchKey in unmatchedKeys:
203 unmatchedKeys.remove(matchKey)
205 try:
206 template.validateTemplate(entity)
207 except FileTemplateValidationError as e:
208 failed.append(f"{e} (via key '{matchKey}')")
209 if logFailures:
210 log.critical("Template failure with key '%s': %s", matchKey, e)
212 if logFailures and unmatchedKeys:
213 log.warning("Unchecked keys: '%s'", ", ".join([str(k) for k in unmatchedKeys]))
215 if failed:
216 if len(failed) == 1:
217 msg = str(failed[0])
218 else:
219 failMsg = ";\n".join(failed)
220 msg = f"{len(failed)} template validation failures: {failMsg}"
221 raise FileTemplateValidationError(msg)
223 def getLookupKeys(self) -> set[LookupKey]:
224 """Retrieve the look up keys for all the template entries.
226 Returns
227 -------
228 keys : `set` of `LookupKey`
229 The keys available for matching a template.
230 """
231 return set(self.templates)
233 def getTemplateWithMatch(
234 self, entity: DatasetRef | DatasetType | StorageClass
235 ) -> tuple[LookupKey, FileTemplate]:
236 """Retrieve the `FileTemplate` associated with the dataset type.
238 Also retrieves the lookup key that was a match for this template.
240 If the lookup name corresponds to a component the base name for
241 the component will be examined if the full component name does
242 not match.
244 Parameters
245 ----------
246 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
247 Instance to use to look for a corresponding template.
248 A `DatasetType` name or a `StorageClass` name will be used
249 depending on the supplied entity. Priority is given to a
250 `DatasetType` name. Supports instrument override if a
251 `DatasetRef` is provided configured with an ``instrument``
252 value for the data ID.
254 Returns
255 -------
256 matchKey : `LookupKey`
257 The key that resulted in the successful match.
258 template : `FileTemplate`
259 Template instance to use with that dataset type.
261 Raises
262 ------
263 KeyError
264 Raised if no template could be located for this Dataset type.
265 """
266 # Get the names to use for lookup
267 names = entity._lookupNames()
269 # Get a location from the templates
270 template = self.default
271 source = self.defaultKey
272 for name in names:
273 if name in self.templates:
274 template = self.templates[name]
275 source = name
276 break
278 if template is None:
279 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]")
281 log.debug("Got file %s from %s via %s", template, entity, source)
283 return source, template
285 def getTemplate(self, entity: DatasetType | DatasetRef | StorageClass) -> FileTemplate:
286 """Retrieve the `FileTemplate` associated with the dataset type.
288 If the lookup name corresponds to a component the base name for
289 the component will be examined if the full component name does
290 not match.
292 Parameters
293 ----------
294 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
295 Instance to use to look for a corresponding template.
296 A `DatasetType` name or a `StorageClass` name will be used
297 depending on the supplied entity. Priority is given to a
298 `DatasetType` name. Supports instrument override if a
299 `DatasetRef` is provided configured with an ``instrument``
300 value for the data ID.
302 Returns
303 -------
304 template : `FileTemplate`
305 Template instance to use with that dataset type.
307 Raises
308 ------
309 KeyError
310 Raised if no template could be located for this Dataset type.
311 """
312 _, template = self.getTemplateWithMatch(entity)
313 return template
316class FileTemplate:
317 """Format a path template into a fully expanded path.
319 Parameters
320 ----------
321 template : `str`
322 Template string.
324 Raises
325 ------
326 FileTemplateValidationError
327 Raised if the template fails basic validation.
329 Notes
330 -----
331 The templates use the standard Format Specification Mini-Language
332 with the caveat that only named fields can be used. The field names
333 are taken from the Dimensions along with several additional fields:
335 - datasetType: `str`, `DatasetType.name`
336 - component: `str`, name of the StorageClass component
337 - run: `str`, name of the run this dataset was added with
339 `run` must always be provided to ensure unique paths.
341 More detailed information can be requested from dimensions by using a dot
342 notation, so ``visit.name`` would use the name of the visit and
343 ``detector.name_in_raft`` would use the name of the detector within the
344 raft.
346 In some cases the template may want to support multiple options for a
347 single part of the template. For example, you may not want to include
348 ``group`` if ``exposure`` is in the data ID. To handle this situation a
349 ``|`` character can be used to specify multiple data Id keys in the
350 same format specifier. For example ``{exposure.obs_id|group}`` would
351 choose ``exposure.obs_id`` if ``exposure`` is in the data ID but otherwise
352 would use ``group``.
354 The mini-language is extended to understand a "?" in the format
355 specification. This indicates that a field is optional. If that
356 Dimension is missing the field, along with the text before the field,
357 unless it is a path separator, will be removed from the output path.
359 By default any "/" in a dataId value will be replaced by "_" to prevent
360 unexpected directories being created in the path. If the "/" should be
361 retained then a special "/" format specifier can be included in the
362 template.
363 """
365 mandatoryFields = {"run", "id"}
366 """A set of fields, one of which must be present in a template."""
368 datasetFields = {"datasetType", "component"}
369 """Fields related to the supplied dataset, not a dimension."""
371 specialFields = mandatoryFields | datasetFields
372 """Set of special fields that are available independently of the defined
373 Dimensions."""
375 def __init__(self, template: str):
376 if not isinstance(template, str):
377 raise FileTemplateValidationError(
378 f"Template ('{template}') does not contain any format specifiers"
379 )
380 self.template = template
382 # Do basic validation without access to dimensions
383 self.validateTemplate(None)
385 def __eq__(self, other: Any) -> bool:
386 if not isinstance(other, FileTemplate):
387 return False
389 return self.template == other.template
391 def __str__(self) -> str:
392 return self.template
394 def __repr__(self) -> str:
395 return f'{self.__class__.__name__}("{self.template}")'
397 def grouped_fields(
398 self, dimensions: DimensionGroup | DimensionGraph | None = None
399 ) -> tuple[FieldDict, FieldDict]:
400 """Return all the fields, grouped by their type.
402 Parameters
403 ----------
404 dimensions : `lsst.daf.butler.DimensionGroup` or `None`
405 If present, can be used to filter unknown or unused dimensions out
406 of the template when alternates are used. This allows a template to
407 have newer dimensions within it that are not known to an older
408 universe so long as an alternative is given that works with an
409 older universe. If none of the alternates are present in the
410 dimensions the first will be returned. The caller can determine how
411 to handle the situation.
413 Returns
414 -------
415 grouped : `FieldDict`
416 The fields grouped by their type. The keys for this dict are
417 ``standard``, ``special``, ``subfield``, and
418 ``parent``. If field ``a.b`` is present, ``a`` will not be
419 included in ``standard`` but will be included in ``parent``.
420 grouped_optional : `FieldDict`
421 As for ``grouped`` but the optional fields.
422 """
423 fmt = string.Formatter()
424 parts = fmt.parse(self.template)
426 grouped: FieldDict = {
427 "standard": set(),
428 "special": set(),
429 "subfield": set(),
430 "parent": set(),
431 }
432 grouped_optional: FieldDict = {
433 "standard": set(),
434 "special": set(),
435 "subfield": set(),
436 "parent": set(),
437 }
439 for _, field_names, format_spec, _ in parts:
440 if field_names is not None and format_spec is not None:
441 # Determine which fields are in the dimension universe.
442 given_fields = field_names.split("|")
443 validated_fields: list[str] = []
444 if dimensions is not None:
445 for field in given_fields:
446 if "." in field:
447 field_name, _ = field.split(".")
448 else:
449 field_name = field
450 if field_name in dimensions or field_name in self.specialFields:
451 # Found one that is in the relevant dimensions
452 # so stop searching.
453 validated_fields.append(field)
454 break
455 if not validated_fields:
456 # None of them were in the dimensions or we had no
457 # dimensions. Use all of them below and let the caller work
458 # it (some of these may be skypix).
459 validated_fields = given_fields
461 if "?" in format_spec:
462 target = grouped_optional
463 else:
464 target = grouped
466 for field_name in validated_fields: # Treat alternates as equals.
467 subfield = None
468 if field_name in self.specialFields:
469 field_set = target["special"]
470 elif "." in field_name:
471 # This needs to be added twice.
472 subfield = field_name
473 field_set = target["parent"]
474 field_name, _ = field_name.split(".")
475 target["subfield"].add(subfield)
476 else:
477 field_set = target["standard"]
479 field_set.add(field_name)
481 return grouped, grouped_optional
483 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> set[str]:
484 """Return the field names used in this template.
486 Parameters
487 ----------
488 optionals : `bool`
489 If `True`, optional fields are included in the returned set.
490 specials : `bool`
491 If `True`, non-dimension fields are included.
492 subfields : `bool`, optional
493 If `True`, fields with syntax ``a.b`` are included. If `False`,
494 the default, only ``a`` would be returned.
496 Returns
497 -------
498 names : `set`
499 Names of fields used in this template.
501 Notes
502 -----
503 The returned set will include the special values such as `datasetType`
504 and `component`.
505 """
506 fmt = string.Formatter()
507 parts = fmt.parse(self.template)
509 names = set()
510 for _, field_names, format_spec, _ in parts:
511 if field_names is not None and format_spec is not None:
512 if not optionals and "?" in format_spec:
513 continue
514 for field_name in field_names.split("|"):
515 if not specials and field_name in self.specialFields:
516 continue
518 if not subfields and "." in field_name:
519 field_name, _ = field_name.split(".")
521 names.add(field_name)
523 return names
525 def format(self, ref: DatasetRef) -> str:
526 """Format a template string into a full path.
528 Parameters
529 ----------
530 ref : `DatasetRef`
531 The dataset to be formatted.
533 Returns
534 -------
535 path : `str`
536 Expanded path.
538 Raises
539 ------
540 KeyError
541 Raised if the requested field is not defined and the field is
542 not optional. Or, `component` is specified but "component" was
543 not part of the template.
544 RuntimeError
545 Raised if a template uses dimension record metadata but no
546 records are attached to the `DatasetRef`.
547 """
548 # Get the dimension values. Should all be non None.
549 # Will want to store a DatasetId in it later.
550 fields = cast(dict[str, int | str | DatasetId], dict(ref.dataId.mapping))
551 # Extra information that can be included using . syntax
552 extras: dict[str, DimensionRecord | None] = {}
553 skypix_alias: str | None = None
554 can_use_extra_records = False
555 if isinstance(ref.dataId, DataCoordinate):
556 if ref.dataId.hasRecords():
557 can_use_extra_records = True
558 skypix_alias = self._determine_skypix_alias(ref)
559 if skypix_alias is not None:
560 fields["skypix"] = fields[skypix_alias]
562 datasetType = ref.datasetType
563 fields["datasetType"], component = datasetType.nameAndComponent()
565 usedComponent = False
566 if component is not None:
567 fields["component"] = component
569 fields["run"] = ref.run
570 fields["id"] = ref.id
572 fmt = string.Formatter()
573 parts = fmt.parse(self.template)
574 output = ""
576 for literal, field_name, format_spec, _ in parts:
577 if field_name and "|" in field_name:
578 alternates = field_name.split("|")
579 for alt in alternates:
580 if "." in alt:
581 primary, _ = alt.split(".")
582 else:
583 primary = alt
584 # If the alternate is known to this data ID then we use
585 # it and drop the lower priority fields.
586 if primary in fields:
587 field_name = alt
588 break
589 else:
590 # None of these were found in the field list. Select the
591 # first and let downstream code handle whether this
592 # is optional or not.
593 field_name = alternates[0]
595 if field_name == "component":
596 usedComponent = True
598 if format_spec is None:
599 output = output + literal
600 continue
602 # Should only happen if format_spec is None
603 if field_name is None:
604 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]")
606 if "?" in format_spec:
607 optional = True
608 # Remove the non-standard character from the spec
609 format_spec = format_spec.replace("?", "")
610 else:
611 optional = False
613 # Check for request for additional information from the dataId
614 if "." in field_name:
615 primary, secondary = field_name.split(".")
616 if can_use_extra_records and primary not in extras and primary in fields:
617 record_key = primary
618 if primary == "skypix" and skypix_alias is not None:
619 record_key = skypix_alias
620 extras[record_key] = ref.dataId.records[record_key]
621 if record_key != primary:
622 # Make sure that htm7 and skypix both work.
623 extras[primary] = extras[record_key]
625 if primary in extras:
626 record = extras[primary]
627 # Only fill in the fields if we have a value, the
628 # KeyError will trigger below if the attribute is missing,
629 # but only if it is not optional. This is most likely
630 # a typo in the metadata field and so should be reported
631 # even if optional.
632 if hasattr(record, secondary):
633 fields[field_name] = getattr(record, secondary)
634 else:
635 # Is a log message sufficient?
636 log.info(
637 "Template field %s could not be resolved because metadata field %s"
638 " is not understood for dimension %s. Template entry will be ignored",
639 field_name,
640 secondary,
641 primary,
642 )
643 elif primary in fields:
644 # We do have an entry for the primary but do not have any
645 # secondary entries. This is likely a problem with the
646 # code failing to attach a record to the DatasetRef.
647 raise RuntimeError(
648 f"No metadata records attached to dataset {ref}"
649 f" when attempting to expand field {field_name}."
650 " Either expand the DatasetRef or change the template."
651 )
653 if field_name in fields:
654 value = fields[field_name]
655 elif optional:
656 # If this is optional ignore the format spec
657 # and do not include the literal text prior to the optional
658 # field unless it contains a "/" path separator
659 format_spec = ""
660 value = ""
661 if "/" not in literal:
662 literal = ""
663 else:
664 raise KeyError(
665 f"'{field_name}' requested in template via '{self.template}' "
666 "but not defined and not optional"
667 )
669 # Handle "/" in values since we do not want to be surprised by
670 # unexpected directories turning up
671 replace_slash = True
672 if "/" in format_spec:
673 # Remove the non-standard character from the spec
674 format_spec = format_spec.replace("/", "")
675 replace_slash = False
677 if isinstance(value, str):
678 # Replace spaces with underscores for more friendly file paths
679 value = value.replace(" ", "_")
680 if replace_slash:
681 value = value.replace("/", "_")
683 # Now use standard formatting
684 output = output + literal + format(value, format_spec)
686 # Replace periods with underscores in the non-directory part to
687 # prevent file extension confusion. Also replace # in the non-dir
688 # part to avoid confusion with URI fragments
689 head, tail = os.path.split(output)
690 tail = tail.replace(".", "_")
691 tail = tail.replace("#", "HASH")
692 output = os.path.join(head, tail)
694 # Complain if we were meant to use a component
695 if component is not None and not usedComponent:
696 raise KeyError(f"Component '{component}' specified but template {self.template} did not use it")
698 # Since this is known to be a path, normalize it in case some double
699 # slashes have crept in
700 path = os.path.normpath(output)
702 # It should not be an absolute path (may happen with optionals)
703 if os.path.isabs(path):
704 path = os.path.relpath(path, start="/")
706 return path
708 def validateTemplate(self, entity: DatasetRef | DatasetType | StorageClass | None) -> None:
709 """Compare the template against supplied entity that wants to use it.
711 Parameters
712 ----------
713 entity : `DatasetType`, `DatasetRef`, or `StorageClass`
714 Entity to compare against template. If `None` is given only
715 very basic validation of templates will be performed.
717 Raises
718 ------
719 FileTemplateValidationError
720 Raised if the template is inconsistent with the supplied entity.
722 Notes
723 -----
724 Validation will always include a check that mandatory fields
725 are present and that at least one field refers to a dimension.
726 If the supplied entity includes a `DimensionGraph` then it will be
727 used to compare the available dimensions with those specified in the
728 template.
729 """
730 # A universe can be used to filter out alternates that are
731 # not known.
732 dimensions = getattr(entity, "dimensions", None)
733 grouped_fields, grouped_optionals = self.grouped_fields(dimensions)
735 # Check that the template has run
736 withSpecials = (
737 grouped_fields["standard"]
738 | grouped_fields["parent"]
739 | grouped_fields["special"]
740 | grouped_optionals["standard"]
741 | grouped_optionals["parent"]
742 | grouped_optionals["special"]
743 )
745 if "collection" in withSpecials:
746 raise FileTemplateValidationError(
747 "'collection' is no longer supported as a file template placeholder; use 'run' instead."
748 )
750 if not withSpecials & self.mandatoryFields:
751 raise FileTemplateValidationError(
752 f"Template '{self}' is missing a mandatory field from {self.mandatoryFields}"
753 )
755 # Check that there are some dimension fields in the template
756 # The id is allowed instead if present since that also uniquely
757 # identifies the file in the datastore.
758 allfields = (
759 grouped_fields["standard"]
760 | grouped_fields["parent"]
761 | grouped_optionals["standard"]
762 | grouped_optionals["parent"]
763 )
764 if not allfields and "id" not in withSpecials:
765 raise FileTemplateValidationError(
766 f"Template '{self}' does not seem to have any fields corresponding to dimensions."
767 )
769 # Do not allow ../ in the template to confuse where the file might
770 # end up.
771 if "../" in self.template:
772 raise FileTemplateValidationError("A file template should not include jump to parent directory.")
774 # Require that if "id" is in the template then it must exist in the
775 # file part -- this avoids templates like "{id}/fixed" where the file
776 # name is fixed but the directory has the ID.
777 if "id" in withSpecials:
778 file_part = os.path.split(self.template)[-1]
779 if "{id}" not in file_part:
780 raise FileTemplateValidationError(
781 f"Template '{self}' includes the 'id' but that ID is not part of the file name."
782 )
784 # If we do not have dimensions available then all we can do is shrug
785 if not hasattr(entity, "dimensions"):
786 return
788 # Mypy does not know about hasattr so help it out
789 if entity is None:
790 return
792 # if this entity represents a component then insist that component
793 # is present in the template. If the entity is not a component
794 # make sure that component is not mandatory.
795 try:
796 # mypy does not see the except block so complains about
797 # StorageClass not supporting isComponent
798 if entity.isComponent(): # type: ignore
799 if "component" not in withSpecials:
800 raise FileTemplateValidationError(
801 f"Template '{self}' has no component but {entity} refers to a component."
802 )
803 else:
804 mandatorySpecials = (
805 grouped_fields["standard"] | grouped_fields["parent"] | grouped_fields["special"]
806 )
807 if "component" in mandatorySpecials:
808 raise FileTemplateValidationError(
809 f"Template '{self}' has mandatory component but "
810 f"{entity} does not refer to a component."
811 )
812 except AttributeError:
813 pass
815 # From here on we need at least a DatasetType
816 # Mypy doesn't understand the AttributeError clause below
817 if isinstance(entity, StorageClass):
818 return
820 # Get the dimension links to get the full set of available field names
821 # Fall back to dataId keys if we have them but no links.
822 # dataId keys must still be present in the template
823 try:
824 minimal = set(entity.dimensions.required.names)
825 maximal = set(entity.dimensions.names)
826 except AttributeError:
827 try:
828 minimal = set(entity.dataId.keys().names) # type: ignore
829 maximal = minimal
830 except AttributeError:
831 return
833 required = grouped_fields["standard"] | grouped_fields["parent"]
835 # Replace specific skypix dimensions with generic one
836 skypix_alias = self._determine_skypix_alias(entity)
837 if skypix_alias is not None:
838 minimal.add("skypix")
839 maximal.add("skypix")
840 minimal.remove(skypix_alias)
841 maximal.remove(skypix_alias)
842 if skypix_alias in required:
843 required.remove(skypix_alias)
844 required.add("skypix")
845 if skypix_alias in allfields:
846 allfields.remove(skypix_alias)
847 allfields.add("skypix")
849 # Calculate any field usage that does not match a dimension
850 if not required.issubset(maximal):
851 raise FileTemplateValidationError(
852 f"Template '{self}' is inconsistent with {entity}: {required} is not a subset of {maximal}."
853 )
855 if not allfields.issuperset(minimal):
856 raise FileTemplateValidationError(
857 f"Template '{self}' is inconsistent with {entity}:"
858 f" {allfields} is not a superset of {minimal}."
859 )
861 return
863 def _determine_skypix_alias(self, entity: DatasetRef | DatasetType) -> str | None:
864 """Return the dimension name that refers to a sky pixel.
866 Parameters
867 ----------
868 entity : `DatasetRef` or `DatasetType`
869 The entity to examine.
871 Returns
872 -------
873 alias : `str`
874 If there is a sky pixelization in the supplied dataId, return
875 its name, else returns `None`. Will return `None` also if there
876 is more than one sky pix dimension in the data ID or if the
877 dataID is not a `DataCoordinate`
878 """
879 alias = None
881 if isinstance(entity, DatasetRef):
882 entity = entity.datasetType
884 # If there is exactly one SkyPixDimension in the data ID, alias its
885 # value with the key "skypix", so we can use that to match any
886 # skypix dimension.
887 # We restrict this behavior to the (real-world) case where the
888 # data ID is a DataCoordinate, not just a dict. That should only
889 # not be true in some test code, but that test code is a pain to
890 # update to be more like the real world while still providing our
891 # only tests of important behavior.
892 if len(entity.dimensions.skypix) == 1:
893 (alias,) = entity.dimensions.skypix.names
894 return alias