Coverage for python/lsst/daf/butler/datastore/file_templates.py: 12%

297 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-02 03:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for file template string expansion.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError") 

33 

34import logging 

35import os.path 

36import string 

37from collections.abc import Iterable, Mapping 

38from types import MappingProxyType 

39from typing import TYPE_CHECKING, Any, TypedDict, cast 

40 

41from .._config import Config 

42from .._config_support import LookupKey, processLookupConfigs 

43from .._dataset_ref import DatasetId, DatasetRef 

44from .._exceptions import ValidationError 

45from .._storage_class import StorageClass 

46from ..dimensions import DataCoordinate 

47 

48if TYPE_CHECKING: 

49 from .._dataset_type import DatasetType 

50 from ..dimensions import DimensionRecord, DimensionUniverse 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class FileTemplateValidationError(ValidationError): 

56 """Exception for file template inconsistent with associated DatasetType.""" 

57 

58 pass 

59 

60 

61class FileTemplatesConfig(Config): 

62 """Configuration information for `FileTemplates`.""" 

63 

64 pass 

65 

66 

67class FieldDict(TypedDict): 

68 """Dictionary containing the grouped fields from a template.""" 

69 

70 standard: set[str] 

71 special: set[str] 

72 subfield: set[str] 

73 parent: set[str] 

74 

75 

76class FileTemplates: 

77 """Collection of `FileTemplate` templates. 

78 

79 Parameters 

80 ---------- 

81 config : `FileTemplatesConfig` or `str` 

82 Load configuration. 

83 default : `str`, optional 

84 If not `None`, a default template to use if no template has 

85 been specified explicitly in the configuration. 

86 universe : `DimensionUniverse` 

87 The set of all known dimensions, used to normalize any lookup keys 

88 involving dimensions. 

89 

90 Notes 

91 ----- 

92 The configuration can include one level of hierarchy where an 

93 instrument-specific section can be defined to override more general 

94 template specifications. This is represented in YAML using a 

95 key of form ``instrument<name>`` which can then define templates 

96 that will be returned if a `DatasetRef` contains a matching instrument 

97 name in the data ID. 

98 

99 A default fallback template can be specified using the key ``default``. 

100 Defaulting can be disabled in a child configuration by defining the 

101 value to be an empty string or a boolean `False`. 

102 

103 The config is parsed using the function 

104 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

105 """ 

106 

107 defaultKey = LookupKey("default") 

108 """Configuration key associated with the default template.""" 

109 

110 def __init__( 

111 self, 

112 config: FileTemplatesConfig | str, 

113 default: str | None = None, 

114 *, 

115 universe: DimensionUniverse, 

116 ): 

117 self.config = FileTemplatesConfig(config) 

118 self._templates = {} 

119 

120 contents = processLookupConfigs(self.config, universe=universe) 

121 

122 # Determine default to use -- defaults can be disabled if 

123 # we get a False or None 

124 defaultValue = contents.get(self.defaultKey, default) 

125 if defaultValue and not isinstance(defaultValue, str): 

126 raise RuntimeError( 

127 f"Default template value should be str or False, or None. Got '{defaultValue}'" 

128 ) 

129 self.default = FileTemplate(defaultValue) if isinstance(defaultValue, str) and defaultValue else None 

130 

131 # Convert all the values to FileTemplate, handling defaults 

132 for key, templateStr in contents.items(): 

133 if key == self.defaultKey: 

134 continue 

135 if not isinstance(templateStr, str): 

136 raise RuntimeError(f"Unexpected value in file template key {key}: {templateStr}") 

137 self._templates[key] = FileTemplate(templateStr) 

138 

139 @property 

140 def templates(self) -> Mapping[LookupKey, FileTemplate]: 

141 """Return collection of templates indexed by lookup key (`dict`).""" 

142 return MappingProxyType(self._templates) 

143 

144 def __contains__(self, key: LookupKey) -> bool: 

145 """Indicate whether the supplied key is present in the templates. 

146 

147 Parameters 

148 ---------- 

149 key : `LookupKey` 

150 Key to use to determine if a corresponding value is present 

151 in the templates. 

152 

153 Returns 

154 ------- 

155 in : `bool` 

156 `True` if the supplied key is present in the templates. 

157 """ 

158 return key in self.templates 

159 

160 def __getitem__(self, key: LookupKey) -> FileTemplate: 

161 return self.templates[key] 

162 

163 def validateTemplates( 

164 self, entities: Iterable[DatasetType | DatasetRef | StorageClass], logFailures: bool = False 

165 ) -> None: 

166 """Validate the templates. 

167 

168 Retrieves the template associated with each dataset type and 

169 validates the dimensions against the template. 

170 

171 Parameters 

172 ---------- 

173 entities : `DatasetType`, `DatasetRef`, or `StorageClass` 

174 Entities to validate against the matching templates. Can be 

175 differing types. 

176 logFailures : `bool`, optional 

177 If `True`, output a log message for every validation error 

178 detected. 

179 

180 Raises 

181 ------ 

182 FileTemplateValidationError 

183 Raised if an entity failed validation. 

184 

185 Notes 

186 ----- 

187 See `FileTemplate.validateTemplate()` for details on the validation. 

188 """ 

189 unmatchedKeys = set(self.templates) 

190 failed = [] 

191 for entity in entities: 

192 try: 

193 matchKey, template = self.getTemplateWithMatch(entity) 

194 except KeyError as e: 

195 # KeyError always quotes on stringification so strip here 

196 errMsg = str(e).strip("\"'") 

197 failed.append(errMsg) 

198 if logFailures: 

199 log.critical("%s", errMsg) 

200 continue 

201 

202 if matchKey in unmatchedKeys: 

203 unmatchedKeys.remove(matchKey) 

204 

205 try: 

206 template.validateTemplate(entity) 

207 except FileTemplateValidationError as e: 

208 failed.append(f"{e} (via key '{matchKey}')") 

209 if logFailures: 

210 log.critical("Template failure with key '%s': %s", matchKey, e) 

211 

212 if logFailures and unmatchedKeys: 

213 log.warning("Unchecked keys: '%s'", ", ".join([str(k) for k in unmatchedKeys])) 

214 

215 if failed: 

216 if len(failed) == 1: 

217 msg = str(failed[0]) 

218 else: 

219 failMsg = ";\n".join(failed) 

220 msg = f"{len(failed)} template validation failures: {failMsg}" 

221 raise FileTemplateValidationError(msg) 

222 

223 def getLookupKeys(self) -> set[LookupKey]: 

224 """Retrieve the look up keys for all the template entries. 

225 

226 Returns 

227 ------- 

228 keys : `set` of `LookupKey` 

229 The keys available for matching a template. 

230 """ 

231 return set(self.templates) 

232 

233 def getTemplateWithMatch( 

234 self, entity: DatasetRef | DatasetType | StorageClass 

235 ) -> tuple[LookupKey, FileTemplate]: 

236 """Retrieve the `FileTemplate` associated with the dataset type. 

237 

238 Also retrieves the lookup key that was a match for this template. 

239 

240 If the lookup name corresponds to a component the base name for 

241 the component will be examined if the full component name does 

242 not match. 

243 

244 Parameters 

245 ---------- 

246 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

247 Instance to use to look for a corresponding template. 

248 A `DatasetType` name or a `StorageClass` name will be used 

249 depending on the supplied entity. Priority is given to a 

250 `DatasetType` name. Supports instrument override if a 

251 `DatasetRef` is provided configured with an ``instrument`` 

252 value for the data ID. 

253 

254 Returns 

255 ------- 

256 matchKey : `LookupKey` 

257 The key that resulted in the successful match. 

258 template : `FileTemplate` 

259 Template instance to use with that dataset type. 

260 

261 Raises 

262 ------ 

263 KeyError 

264 Raised if no template could be located for this Dataset type. 

265 """ 

266 # Get the names to use for lookup 

267 names = entity._lookupNames() 

268 

269 # Get a location from the templates 

270 template = self.default 

271 source = self.defaultKey 

272 for name in names: 

273 if name in self.templates: 

274 template = self.templates[name] 

275 source = name 

276 break 

277 

278 if template is None: 

279 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]") 

280 

281 log.debug("Got file %s from %s via %s", template, entity, source) 

282 

283 return source, template 

284 

285 def getTemplate(self, entity: DatasetType | DatasetRef | StorageClass) -> FileTemplate: 

286 """Retrieve the `FileTemplate` associated with the dataset type. 

287 

288 If the lookup name corresponds to a component the base name for 

289 the component will be examined if the full component name does 

290 not match. 

291 

292 Parameters 

293 ---------- 

294 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

295 Instance to use to look for a corresponding template. 

296 A `DatasetType` name or a `StorageClass` name will be used 

297 depending on the supplied entity. Priority is given to a 

298 `DatasetType` name. Supports instrument override if a 

299 `DatasetRef` is provided configured with an ``instrument`` 

300 value for the data ID. 

301 

302 Returns 

303 ------- 

304 template : `FileTemplate` 

305 Template instance to use with that dataset type. 

306 

307 Raises 

308 ------ 

309 KeyError 

310 Raised if no template could be located for this Dataset type. 

311 """ 

312 _, template = self.getTemplateWithMatch(entity) 

313 return template 

314 

315 

316class FileTemplate: 

317 """Format a path template into a fully expanded path. 

318 

319 Parameters 

320 ---------- 

321 template : `str` 

322 Template string. 

323 

324 Raises 

325 ------ 

326 FileTemplateValidationError 

327 Raised if the template fails basic validation. 

328 

329 Notes 

330 ----- 

331 The templates use the standard Format Specification Mini-Language 

332 with the caveat that only named fields can be used. The field names 

333 are taken from the Dimensions along with several additional fields: 

334 

335 - datasetType: `str`, `DatasetType.name` 

336 - component: `str`, name of the StorageClass component 

337 - run: `str`, name of the run this dataset was added with 

338 

339 `run` must always be provided to ensure unique paths. 

340 

341 More detailed information can be requested from dimensions by using a dot 

342 notation, so ``visit.name`` would use the name of the visit and 

343 ``detector.name_in_raft`` would use the name of the detector within the 

344 raft. 

345 

346 In some cases the template may want to support multiple options for a 

347 single part of the template. For example, you may not want to include 

348 ``group`` if ``exposure`` is in the data ID. To handle this situation a 

349 ``|`` character can be used to specify multiple data Id keys in the 

350 same format specifier. For example ``{exposure.obs_id|group}`` would 

351 choose ``exposure.obs_id`` if ``exposure`` is in the data ID but otherwise 

352 would use ``group``. 

353 

354 The mini-language is extended to understand a "?" in the format 

355 specification. This indicates that a field is optional. If that 

356 Dimension is missing the field, along with the text before the field, 

357 unless it is a path separator, will be removed from the output path. 

358 

359 By default any "/" in a dataId value will be replaced by "_" to prevent 

360 unexpected directories being created in the path. If the "/" should be 

361 retained then a special "/" format specifier can be included in the 

362 template. 

363 """ 

364 

365 mandatoryFields = {"run", "id"} 

366 """A set of fields, one of which must be present in a template.""" 

367 

368 datasetFields = {"datasetType", "component"} 

369 """Fields related to the supplied dataset, not a dimension.""" 

370 

371 specialFields = mandatoryFields | datasetFields 

372 """Set of special fields that are available independently of the defined 

373 Dimensions.""" 

374 

375 def __init__(self, template: str): 

376 if not isinstance(template, str): 

377 raise FileTemplateValidationError( 

378 f"Template ('{template}') does not contain any format specifiers" 

379 ) 

380 self.template = template 

381 

382 # Do basic validation without access to dimensions 

383 self.validateTemplate(None) 

384 

385 def __eq__(self, other: Any) -> bool: 

386 if not isinstance(other, FileTemplate): 

387 return False 

388 

389 return self.template == other.template 

390 

391 def __str__(self) -> str: 

392 return self.template 

393 

394 def __repr__(self) -> str: 

395 return f'{self.__class__.__name__}("{self.template}")' 

396 

397 def grouped_fields(self) -> tuple[FieldDict, FieldDict]: 

398 """Return all the fields, grouped by their type. 

399 

400 Returns 

401 ------- 

402 grouped : `FieldDict` 

403 The fields grouped by their type. The keys for this dict are 

404 ``standard``, ``special``, ``subfield``, and 

405 ``parent``. If field ``a.b`` is present, ``a`` will not be 

406 included in ``standard`` but will be included in ``parent``. 

407 grouped_optional : `FieldDict` 

408 As for ``grouped`` but the optional fields. 

409 """ 

410 fmt = string.Formatter() 

411 parts = fmt.parse(self.template) 

412 

413 grouped: FieldDict = { 

414 "standard": set(), 

415 "special": set(), 

416 "subfield": set(), 

417 "parent": set(), 

418 } 

419 grouped_optional: FieldDict = { 

420 "standard": set(), 

421 "special": set(), 

422 "subfield": set(), 

423 "parent": set(), 

424 } 

425 

426 for _, field_names, format_spec, _ in parts: 

427 if field_names is not None and format_spec is not None: 

428 for field_name in field_names.split("|"): # Treat alternates as equals. 

429 subfield = None 

430 if "?" in format_spec: 

431 target = grouped_optional 

432 else: 

433 target = grouped 

434 

435 if field_name in self.specialFields: 

436 field_set = target["special"] 

437 elif "." in field_name: 

438 # This needs to be added twice. 

439 subfield = field_name 

440 field_set = target["parent"] 

441 field_name, _ = field_name.split(".") 

442 target["subfield"].add(subfield) 

443 else: 

444 field_set = target["standard"] 

445 

446 field_set.add(field_name) 

447 

448 return grouped, grouped_optional 

449 

450 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> set[str]: 

451 """Return the field names used in this template. 

452 

453 Parameters 

454 ---------- 

455 optionals : `bool` 

456 If `True`, optional fields are included in the returned set. 

457 specials : `bool` 

458 If `True`, non-dimension fields are included. 

459 subfields : `bool`, optional 

460 If `True`, fields with syntax ``a.b`` are included. If `False`, 

461 the default, only ``a`` would be returned. 

462 

463 Returns 

464 ------- 

465 names : `set` 

466 Names of fields used in this template. 

467 

468 Notes 

469 ----- 

470 The returned set will include the special values such as `datasetType` 

471 and `component`. 

472 """ 

473 fmt = string.Formatter() 

474 parts = fmt.parse(self.template) 

475 

476 names = set() 

477 for _, field_names, format_spec, _ in parts: 

478 if field_names is not None and format_spec is not None: 

479 if not optionals and "?" in format_spec: 

480 continue 

481 for field_name in field_names.split("|"): 

482 if not specials and field_name in self.specialFields: 

483 continue 

484 

485 if not subfields and "." in field_name: 

486 field_name, _ = field_name.split(".") 

487 

488 names.add(field_name) 

489 

490 return names 

491 

492 def format(self, ref: DatasetRef) -> str: 

493 """Format a template string into a full path. 

494 

495 Parameters 

496 ---------- 

497 ref : `DatasetRef` 

498 The dataset to be formatted. 

499 

500 Returns 

501 ------- 

502 path : `str` 

503 Expanded path. 

504 

505 Raises 

506 ------ 

507 KeyError 

508 Raised if the requested field is not defined and the field is 

509 not optional. Or, `component` is specified but "component" was 

510 not part of the template. 

511 RuntimeError 

512 Raised if a template uses dimension record metadata but no 

513 records are attached to the `DatasetRef`. 

514 """ 

515 # Get the dimension values. Should all be non None. 

516 # Will want to store a DatasetId in it later. 

517 fields = cast(dict[str, int | str | DatasetId], dict(ref.dataId.mapping)) 

518 # Extra information that can be included using . syntax 

519 extras: dict[str, DimensionRecord | None] = {} 

520 skypix_alias: str | None = None 

521 can_use_extra_records = False 

522 if isinstance(ref.dataId, DataCoordinate): 

523 if ref.dataId.hasRecords(): 

524 can_use_extra_records = True 

525 skypix_alias = self._determine_skypix_alias(ref) 

526 if skypix_alias is not None: 

527 fields["skypix"] = fields[skypix_alias] 

528 

529 datasetType = ref.datasetType 

530 fields["datasetType"], component = datasetType.nameAndComponent() 

531 

532 usedComponent = False 

533 if component is not None: 

534 fields["component"] = component 

535 

536 fields["run"] = ref.run 

537 fields["id"] = ref.id 

538 

539 fmt = string.Formatter() 

540 parts = fmt.parse(self.template) 

541 output = "" 

542 

543 for literal, field_name, format_spec, _ in parts: 

544 if field_name and "|" in field_name: 

545 alternates = field_name.split("|") 

546 for alt in alternates: 

547 if "." in alt: 

548 primary, _ = alt.split(".") 

549 else: 

550 primary = alt 

551 # If the alternate is known to this data ID then we use 

552 # it and drop the lower priority fields. 

553 if primary in fields: 

554 field_name = alt 

555 break 

556 else: 

557 # None of these were found in the field list. Select the 

558 # first and let downstream code handle whether this 

559 # is optional or not. 

560 field_name = alternates[0] 

561 

562 if field_name == "component": 

563 usedComponent = True 

564 

565 if format_spec is None: 

566 output = output + literal 

567 continue 

568 

569 # Should only happen if format_spec is None 

570 if field_name is None: 

571 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]") 

572 

573 if "?" in format_spec: 

574 optional = True 

575 # Remove the non-standard character from the spec 

576 format_spec = format_spec.replace("?", "") 

577 else: 

578 optional = False 

579 

580 # Check for request for additional information from the dataId 

581 if "." in field_name: 

582 primary, secondary = field_name.split(".") 

583 if can_use_extra_records and primary not in extras and primary in fields: 

584 record_key = primary 

585 if primary == "skypix" and skypix_alias is not None: 

586 record_key = skypix_alias 

587 extras[record_key] = ref.dataId.records[record_key] 

588 if record_key != primary: 

589 # Make sure that htm7 and skypix both work. 

590 extras[primary] = extras[record_key] 

591 

592 if primary in extras: 

593 record = extras[primary] 

594 # Only fill in the fields if we have a value, the 

595 # KeyError will trigger below if the attribute is missing, 

596 # but only if it is not optional. This is most likely 

597 # a typo in the metadata field and so should be reported 

598 # even if optional. 

599 if hasattr(record, secondary): 

600 fields[field_name] = getattr(record, secondary) 

601 else: 

602 # Is a log message sufficient? 

603 log.info( 

604 "Template field %s could not be resolved because metadata field %s" 

605 " is not understood for dimension %s. Template entry will be ignored", 

606 field_name, 

607 secondary, 

608 primary, 

609 ) 

610 elif primary in fields: 

611 # We do have an entry for the primary but do not have any 

612 # secondary entries. This is likely a problem with the 

613 # code failing to attach a record to the DatasetRef. 

614 raise RuntimeError( 

615 f"No metadata records attached to dataset {ref}" 

616 f" when attempting to expand field {field_name}." 

617 " Either expand the DatasetRef or change the template." 

618 ) 

619 

620 if field_name in fields: 

621 value = fields[field_name] 

622 elif optional: 

623 # If this is optional ignore the format spec 

624 # and do not include the literal text prior to the optional 

625 # field unless it contains a "/" path separator 

626 format_spec = "" 

627 value = "" 

628 if "/" not in literal: 

629 literal = "" 

630 else: 

631 raise KeyError( 

632 f"'{field_name}' requested in template via '{self.template}' " 

633 "but not defined and not optional" 

634 ) 

635 

636 # Handle "/" in values since we do not want to be surprised by 

637 # unexpected directories turning up 

638 replace_slash = True 

639 if "/" in format_spec: 

640 # Remove the non-standard character from the spec 

641 format_spec = format_spec.replace("/", "") 

642 replace_slash = False 

643 

644 if isinstance(value, str): 

645 # Replace spaces with underscores for more friendly file paths 

646 value = value.replace(" ", "_") 

647 if replace_slash: 

648 value = value.replace("/", "_") 

649 

650 # Now use standard formatting 

651 output = output + literal + format(value, format_spec) 

652 

653 # Replace periods with underscores in the non-directory part to 

654 # prevent file extension confusion. Also replace # in the non-dir 

655 # part to avoid confusion with URI fragments 

656 head, tail = os.path.split(output) 

657 tail = tail.replace(".", "_") 

658 tail = tail.replace("#", "HASH") 

659 output = os.path.join(head, tail) 

660 

661 # Complain if we were meant to use a component 

662 if component is not None and not usedComponent: 

663 raise KeyError(f"Component '{component}' specified but template {self.template} did not use it") 

664 

665 # Since this is known to be a path, normalize it in case some double 

666 # slashes have crept in 

667 path = os.path.normpath(output) 

668 

669 # It should not be an absolute path (may happen with optionals) 

670 if os.path.isabs(path): 

671 path = os.path.relpath(path, start="/") 

672 

673 return path 

674 

675 def validateTemplate(self, entity: DatasetRef | DatasetType | StorageClass | None) -> None: 

676 """Compare the template against supplied entity that wants to use it. 

677 

678 Parameters 

679 ---------- 

680 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

681 Entity to compare against template. If `None` is given only 

682 very basic validation of templates will be performed. 

683 

684 Raises 

685 ------ 

686 FileTemplateValidationError 

687 Raised if the template is inconsistent with the supplied entity. 

688 

689 Notes 

690 ----- 

691 Validation will always include a check that mandatory fields 

692 are present and that at least one field refers to a dimension. 

693 If the supplied entity includes a `DimensionGraph` then it will be 

694 used to compare the available dimensions with those specified in the 

695 template. 

696 """ 

697 grouped_fields, grouped_optionals = self.grouped_fields() 

698 

699 # Check that the template has run 

700 withSpecials = ( 

701 grouped_fields["standard"] 

702 | grouped_fields["parent"] 

703 | grouped_fields["special"] 

704 | grouped_optionals["standard"] 

705 | grouped_optionals["parent"] 

706 | grouped_optionals["special"] 

707 ) 

708 

709 if "collection" in withSpecials: 

710 raise FileTemplateValidationError( 

711 "'collection' is no longer supported as a file template placeholder; use 'run' instead." 

712 ) 

713 

714 if not withSpecials & self.mandatoryFields: 

715 raise FileTemplateValidationError( 

716 f"Template '{self}' is missing a mandatory field from {self.mandatoryFields}" 

717 ) 

718 

719 # Check that there are some dimension fields in the template 

720 # The id is allowed instead if present since that also uniquely 

721 # identifies the file in the datastore. 

722 allfields = ( 

723 grouped_fields["standard"] 

724 | grouped_fields["parent"] 

725 | grouped_optionals["standard"] 

726 | grouped_optionals["parent"] 

727 ) 

728 if not allfields and "id" not in withSpecials: 

729 raise FileTemplateValidationError( 

730 f"Template '{self}' does not seem to have any fields corresponding to dimensions." 

731 ) 

732 

733 # Do not allow ../ in the template to confuse where the file might 

734 # end up. 

735 if "../" in self.template: 

736 raise FileTemplateValidationError("A file template should not include jump to parent directory.") 

737 

738 # Require that if "id" is in the template then it must exist in the 

739 # file part -- this avoids templates like "{id}/fixed" where the file 

740 # name is fixed but the directory has the ID. 

741 if "id" in withSpecials: 

742 file_part = os.path.split(self.template)[-1] 

743 if "{id}" not in file_part: 

744 raise FileTemplateValidationError( 

745 f"Template '{self}' includes the 'id' but that ID is not part of the file name." 

746 ) 

747 

748 # If we do not have dimensions available then all we can do is shrug 

749 if not hasattr(entity, "dimensions"): 

750 return 

751 

752 # Mypy does not know about hasattr so help it out 

753 if entity is None: 

754 return 

755 

756 # if this entity represents a component then insist that component 

757 # is present in the template. If the entity is not a component 

758 # make sure that component is not mandatory. 

759 try: 

760 # mypy does not see the except block so complains about 

761 # StorageClass not supporting isComponent 

762 if entity.isComponent(): # type: ignore 

763 if "component" not in withSpecials: 

764 raise FileTemplateValidationError( 

765 f"Template '{self}' has no component but {entity} refers to a component." 

766 ) 

767 else: 

768 mandatorySpecials = ( 

769 grouped_fields["standard"] | grouped_fields["parent"] | grouped_fields["special"] 

770 ) 

771 if "component" in mandatorySpecials: 

772 raise FileTemplateValidationError( 

773 f"Template '{self}' has mandatory component but " 

774 f"{entity} does not refer to a component." 

775 ) 

776 except AttributeError: 

777 pass 

778 

779 # From here on we need at least a DatasetType 

780 # Mypy doesn't understand the AttributeError clause below 

781 if isinstance(entity, StorageClass): 

782 return 

783 

784 # Get the dimension links to get the full set of available field names 

785 # Fall back to dataId keys if we have them but no links. 

786 # dataId keys must still be present in the template 

787 try: 

788 minimal = set(entity.dimensions.required.names) 

789 maximal = set(entity.dimensions.names) 

790 except AttributeError: 

791 try: 

792 minimal = set(entity.dataId.keys().names) # type: ignore 

793 maximal = minimal 

794 except AttributeError: 

795 return 

796 

797 # Replace specific skypix dimensions with generic one 

798 skypix_alias = self._determine_skypix_alias(entity) 

799 if skypix_alias is not None: 

800 minimal.add("skypix") 

801 maximal.add("skypix") 

802 minimal.remove(skypix_alias) 

803 maximal.remove(skypix_alias) 

804 

805 required = grouped_fields["standard"] | grouped_fields["parent"] 

806 

807 # Calculate any field usage that does not match a dimension 

808 if not required.issubset(maximal): 

809 raise FileTemplateValidationError( 

810 f"Template '{self}' is inconsistent with {entity}: {required} is not a subset of {maximal}." 

811 ) 

812 

813 if not allfields.issuperset(minimal): 

814 raise FileTemplateValidationError( 

815 f"Template '{self}' is inconsistent with {entity}:" 

816 f" {allfields} is not a superset of {minimal}." 

817 ) 

818 

819 return 

820 

821 def _determine_skypix_alias(self, entity: DatasetRef | DatasetType) -> str | None: 

822 """Return the dimension name that refers to a sky pixel. 

823 

824 Parameters 

825 ---------- 

826 entity : `DatasetRef` or `DatasetType` 

827 The entity to examine. 

828 

829 Returns 

830 ------- 

831 alias : `str` 

832 If there is a sky pixelization in the supplied dataId, return 

833 its name, else returns `None`. Will return `None` also if there 

834 is more than one sky pix dimension in the data ID or if the 

835 dataID is not a `DataCoordinate` 

836 """ 

837 alias = None 

838 

839 if isinstance(entity, DatasetRef): 

840 entity = entity.datasetType 

841 

842 # If there is exactly one SkyPixDimension in the data ID, alias its 

843 # value with the key "skypix", so we can use that to match any 

844 # skypix dimension. 

845 # We restrict this behavior to the (real-world) case where the 

846 # data ID is a DataCoordinate, not just a dict. That should only 

847 # not be true in some test code, but that test code is a pain to 

848 # update to be more like the real world while still providing our 

849 # only tests of important behavior. 

850 if len(entity.dimensions.skypix) == 1: 

851 (alias,) = entity.dimensions.skypix.names 

852 return alias