Coverage for python/lsst/daf/butler/datastore/file_templates.py: 11%

316 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for file template string expansion.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError") 

33 

34import logging 

35import os.path 

36import string 

37from collections.abc import Iterable, Mapping 

38from types import MappingProxyType 

39from typing import TYPE_CHECKING, Any, TypedDict, cast 

40 

41from .._config import Config 

42from .._config_support import LookupKey, processLookupConfigs 

43from .._dataset_ref import DatasetId, DatasetRef 

44from .._exceptions import ValidationError 

45from .._storage_class import StorageClass 

46from ..dimensions import DataCoordinate, DimensionGraph, DimensionGroup 

47 

48if TYPE_CHECKING: 

49 from .._dataset_type import DatasetType 

50 from ..dimensions import DimensionRecord, DimensionUniverse 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class FileTemplateValidationError(ValidationError): 

56 """Exception for file template inconsistent with associated DatasetType.""" 

57 

58 pass 

59 

60 

61class FileTemplatesConfig(Config): 

62 """Configuration information for `FileTemplates`.""" 

63 

64 pass 

65 

66 

67class FieldDict(TypedDict): 

68 """Dictionary containing the grouped fields from a template.""" 

69 

70 standard: set[str] 

71 special: set[str] 

72 subfield: set[str] 

73 parent: set[str] 

74 

75 

76class FileTemplates: 

77 """Collection of `FileTemplate` templates. 

78 

79 Parameters 

80 ---------- 

81 config : `FileTemplatesConfig` or `str` 

82 Load configuration. 

83 default : `str`, optional 

84 If not `None`, a default template to use if no template has 

85 been specified explicitly in the configuration. 

86 universe : `DimensionUniverse` 

87 The set of all known dimensions, used to normalize any lookup keys 

88 involving dimensions. 

89 

90 Notes 

91 ----- 

92 The configuration can include one level of hierarchy where an 

93 instrument-specific section can be defined to override more general 

94 template specifications. This is represented in YAML using a 

95 key of form ``instrument<name>`` which can then define templates 

96 that will be returned if a `DatasetRef` contains a matching instrument 

97 name in the data ID. 

98 

99 A default fallback template can be specified using the key ``default``. 

100 Defaulting can be disabled in a child configuration by defining the 

101 value to be an empty string or a boolean `False`. 

102 

103 The config is parsed using the function 

104 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

105 """ 

106 

107 defaultKey = LookupKey("default") 

108 """Configuration key associated with the default template.""" 

109 

110 def __init__( 

111 self, 

112 config: FileTemplatesConfig | str, 

113 default: str | None = None, 

114 *, 

115 universe: DimensionUniverse, 

116 ): 

117 self.config = FileTemplatesConfig(config) 

118 self._templates = {} 

119 

120 contents = processLookupConfigs(self.config, universe=universe) 

121 

122 # Determine default to use -- defaults can be disabled if 

123 # we get a False or None 

124 defaultValue = contents.get(self.defaultKey, default) 

125 if defaultValue and not isinstance(defaultValue, str): 

126 raise RuntimeError( 

127 f"Default template value should be str or False, or None. Got '{defaultValue}'" 

128 ) 

129 self.default = FileTemplate(defaultValue) if isinstance(defaultValue, str) and defaultValue else None 

130 

131 # Convert all the values to FileTemplate, handling defaults 

132 for key, templateStr in contents.items(): 

133 if key == self.defaultKey: 

134 continue 

135 if not isinstance(templateStr, str): 

136 raise RuntimeError(f"Unexpected value in file template key {key}: {templateStr}") 

137 self._templates[key] = FileTemplate(templateStr) 

138 

139 @property 

140 def templates(self) -> Mapping[LookupKey, FileTemplate]: 

141 """Return collection of templates indexed by lookup key (`dict`).""" 

142 return MappingProxyType(self._templates) 

143 

144 def __contains__(self, key: LookupKey) -> bool: 

145 """Indicate whether the supplied key is present in the templates. 

146 

147 Parameters 

148 ---------- 

149 key : `LookupKey` 

150 Key to use to determine if a corresponding value is present 

151 in the templates. 

152 

153 Returns 

154 ------- 

155 in : `bool` 

156 `True` if the supplied key is present in the templates. 

157 """ 

158 return key in self.templates 

159 

160 def __getitem__(self, key: LookupKey) -> FileTemplate: 

161 return self.templates[key] 

162 

163 def validateTemplates( 

164 self, entities: Iterable[DatasetType | DatasetRef | StorageClass], logFailures: bool = False 

165 ) -> None: 

166 """Validate the templates. 

167 

168 Retrieves the template associated with each dataset type and 

169 validates the dimensions against the template. 

170 

171 Parameters 

172 ---------- 

173 entities : `DatasetType`, `DatasetRef`, or `StorageClass` 

174 Entities to validate against the matching templates. Can be 

175 differing types. 

176 logFailures : `bool`, optional 

177 If `True`, output a log message for every validation error 

178 detected. 

179 

180 Raises 

181 ------ 

182 FileTemplateValidationError 

183 Raised if an entity failed validation. 

184 

185 Notes 

186 ----- 

187 See `FileTemplate.validateTemplate()` for details on the validation. 

188 """ 

189 unmatchedKeys = set(self.templates) 

190 failed = [] 

191 for entity in entities: 

192 try: 

193 matchKey, template = self.getTemplateWithMatch(entity) 

194 except KeyError as e: 

195 # KeyError always quotes on stringification so strip here 

196 errMsg = str(e).strip("\"'") 

197 failed.append(errMsg) 

198 if logFailures: 

199 log.critical("%s", errMsg) 

200 continue 

201 

202 if matchKey in unmatchedKeys: 

203 unmatchedKeys.remove(matchKey) 

204 

205 try: 

206 template.validateTemplate(entity) 

207 except FileTemplateValidationError as e: 

208 failed.append(f"{e} (via key '{matchKey}')") 

209 if logFailures: 

210 log.critical("Template failure with key '%s': %s", matchKey, e) 

211 

212 if logFailures and unmatchedKeys: 

213 log.warning("Unchecked keys: '%s'", ", ".join([str(k) for k in unmatchedKeys])) 

214 

215 if failed: 

216 if len(failed) == 1: 

217 msg = str(failed[0]) 

218 else: 

219 failMsg = ";\n".join(failed) 

220 msg = f"{len(failed)} template validation failures: {failMsg}" 

221 raise FileTemplateValidationError(msg) 

222 

223 def getLookupKeys(self) -> set[LookupKey]: 

224 """Retrieve the look up keys for all the template entries. 

225 

226 Returns 

227 ------- 

228 keys : `set` of `LookupKey` 

229 The keys available for matching a template. 

230 """ 

231 return set(self.templates) 

232 

233 def getTemplateWithMatch( 

234 self, entity: DatasetRef | DatasetType | StorageClass 

235 ) -> tuple[LookupKey, FileTemplate]: 

236 """Retrieve the `FileTemplate` associated with the dataset type. 

237 

238 Also retrieves the lookup key that was a match for this template. 

239 

240 If the lookup name corresponds to a component the base name for 

241 the component will be examined if the full component name does 

242 not match. 

243 

244 Parameters 

245 ---------- 

246 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

247 Instance to use to look for a corresponding template. 

248 A `DatasetType` name or a `StorageClass` name will be used 

249 depending on the supplied entity. Priority is given to a 

250 `DatasetType` name. Supports instrument override if a 

251 `DatasetRef` is provided configured with an ``instrument`` 

252 value for the data ID. 

253 

254 Returns 

255 ------- 

256 matchKey : `LookupKey` 

257 The key that resulted in the successful match. 

258 template : `FileTemplate` 

259 Template instance to use with that dataset type. 

260 

261 Raises 

262 ------ 

263 KeyError 

264 Raised if no template could be located for this Dataset type. 

265 """ 

266 # Get the names to use for lookup 

267 names = entity._lookupNames() 

268 

269 # Get a location from the templates 

270 template = self.default 

271 source = self.defaultKey 

272 for name in names: 

273 if name in self.templates: 

274 template = self.templates[name] 

275 source = name 

276 break 

277 

278 if template is None: 

279 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]") 

280 

281 log.debug("Got file %s from %s via %s", template, entity, source) 

282 

283 return source, template 

284 

285 def getTemplate(self, entity: DatasetType | DatasetRef | StorageClass) -> FileTemplate: 

286 """Retrieve the `FileTemplate` associated with the dataset type. 

287 

288 If the lookup name corresponds to a component the base name for 

289 the component will be examined if the full component name does 

290 not match. 

291 

292 Parameters 

293 ---------- 

294 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

295 Instance to use to look for a corresponding template. 

296 A `DatasetType` name or a `StorageClass` name will be used 

297 depending on the supplied entity. Priority is given to a 

298 `DatasetType` name. Supports instrument override if a 

299 `DatasetRef` is provided configured with an ``instrument`` 

300 value for the data ID. 

301 

302 Returns 

303 ------- 

304 template : `FileTemplate` 

305 Template instance to use with that dataset type. 

306 

307 Raises 

308 ------ 

309 KeyError 

310 Raised if no template could be located for this Dataset type. 

311 """ 

312 _, template = self.getTemplateWithMatch(entity) 

313 return template 

314 

315 

316class FileTemplate: 

317 """Format a path template into a fully expanded path. 

318 

319 Parameters 

320 ---------- 

321 template : `str` 

322 Template string. 

323 

324 Raises 

325 ------ 

326 FileTemplateValidationError 

327 Raised if the template fails basic validation. 

328 

329 Notes 

330 ----- 

331 The templates use the standard Format Specification Mini-Language 

332 with the caveat that only named fields can be used. The field names 

333 are taken from the Dimensions along with several additional fields: 

334 

335 - datasetType: `str`, `DatasetType.name` 

336 - component: `str`, name of the StorageClass component 

337 - run: `str`, name of the run this dataset was added with 

338 

339 `run` must always be provided to ensure unique paths. 

340 

341 More detailed information can be requested from dimensions by using a dot 

342 notation, so ``visit.name`` would use the name of the visit and 

343 ``detector.name_in_raft`` would use the name of the detector within the 

344 raft. 

345 

346 In some cases the template may want to support multiple options for a 

347 single part of the template. For example, you may not want to include 

348 ``group`` if ``exposure`` is in the data ID. To handle this situation a 

349 ``|`` character can be used to specify multiple data Id keys in the 

350 same format specifier. For example ``{exposure.obs_id|group}`` would 

351 choose ``exposure.obs_id`` if ``exposure`` is in the data ID but otherwise 

352 would use ``group``. 

353 

354 The mini-language is extended to understand a "?" in the format 

355 specification. This indicates that a field is optional. If that 

356 Dimension is missing the field, along with the text before the field, 

357 unless it is a path separator, will be removed from the output path. 

358 

359 By default any "/" in a dataId value will be replaced by "_" to prevent 

360 unexpected directories being created in the path. If the "/" should be 

361 retained then a special "/" format specifier can be included in the 

362 template. 

363 """ 

364 

365 mandatoryFields = {"run", "id"} 

366 """A set of fields, one of which must be present in a template.""" 

367 

368 datasetFields = {"datasetType", "component"} 

369 """Fields related to the supplied dataset, not a dimension.""" 

370 

371 specialFields = mandatoryFields | datasetFields 

372 """Set of special fields that are available independently of the defined 

373 Dimensions.""" 

374 

375 def __init__(self, template: str): 

376 if not isinstance(template, str): 

377 raise FileTemplateValidationError( 

378 f"Template ('{template}') does not contain any format specifiers" 

379 ) 

380 self.template = template 

381 

382 # Do basic validation without access to dimensions 

383 self.validateTemplate(None) 

384 

385 def __eq__(self, other: Any) -> bool: 

386 if not isinstance(other, FileTemplate): 

387 return False 

388 

389 return self.template == other.template 

390 

391 def __str__(self) -> str: 

392 return self.template 

393 

394 def __repr__(self) -> str: 

395 return f'{self.__class__.__name__}("{self.template}")' 

396 

397 def grouped_fields( 

398 self, dimensions: DimensionGroup | DimensionGraph | None = None 

399 ) -> tuple[FieldDict, FieldDict]: 

400 """Return all the fields, grouped by their type. 

401 

402 Parameters 

403 ---------- 

404 dimensions : `lsst.daf.butler.DimensionGroup` or `None` 

405 If present, can be used to filter unknown or unused dimensions out 

406 of the template when alternates are used. This allows a template to 

407 have newer dimensions within it that are not known to an older 

408 universe so long as an alternative is given that works with an 

409 older universe. If none of the alternates are present in the 

410 dimensions the first will be returned. The caller can determine how 

411 to handle the situation. 

412 

413 Returns 

414 ------- 

415 grouped : `FieldDict` 

416 The fields grouped by their type. The keys for this dict are 

417 ``standard``, ``special``, ``subfield``, and 

418 ``parent``. If field ``a.b`` is present, ``a`` will not be 

419 included in ``standard`` but will be included in ``parent``. 

420 grouped_optional : `FieldDict` 

421 As for ``grouped`` but the optional fields. 

422 """ 

423 fmt = string.Formatter() 

424 parts = fmt.parse(self.template) 

425 

426 grouped: FieldDict = { 

427 "standard": set(), 

428 "special": set(), 

429 "subfield": set(), 

430 "parent": set(), 

431 } 

432 grouped_optional: FieldDict = { 

433 "standard": set(), 

434 "special": set(), 

435 "subfield": set(), 

436 "parent": set(), 

437 } 

438 

439 for _, field_names, format_spec, _ in parts: 

440 if field_names is not None and format_spec is not None: 

441 # Determine which fields are in the dimension universe. 

442 given_fields = field_names.split("|") 

443 validated_fields: list[str] = [] 

444 if dimensions is not None: 

445 for field in given_fields: 

446 if "." in field: 

447 field_name, _ = field.split(".") 

448 else: 

449 field_name = field 

450 if field_name in dimensions or field_name in self.specialFields: 

451 # Found one that is in the relevant dimensions 

452 # so stop searching. 

453 validated_fields.append(field) 

454 break 

455 if not validated_fields: 

456 # None of them were in the dimensions or we had no 

457 # dimensions. Use all of them below and let the caller work 

458 # it (some of these may be skypix). 

459 validated_fields = given_fields 

460 

461 if "?" in format_spec: 

462 target = grouped_optional 

463 else: 

464 target = grouped 

465 

466 for field_name in validated_fields: # Treat alternates as equals. 

467 subfield = None 

468 if field_name in self.specialFields: 

469 field_set = target["special"] 

470 elif "." in field_name: 

471 # This needs to be added twice. 

472 subfield = field_name 

473 field_set = target["parent"] 

474 field_name, _ = field_name.split(".") 

475 target["subfield"].add(subfield) 

476 else: 

477 field_set = target["standard"] 

478 

479 field_set.add(field_name) 

480 

481 return grouped, grouped_optional 

482 

483 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> set[str]: 

484 """Return the field names used in this template. 

485 

486 Parameters 

487 ---------- 

488 optionals : `bool` 

489 If `True`, optional fields are included in the returned set. 

490 specials : `bool` 

491 If `True`, non-dimension fields are included. 

492 subfields : `bool`, optional 

493 If `True`, fields with syntax ``a.b`` are included. If `False`, 

494 the default, only ``a`` would be returned. 

495 

496 Returns 

497 ------- 

498 names : `set` 

499 Names of fields used in this template. 

500 

501 Notes 

502 ----- 

503 The returned set will include the special values such as `datasetType` 

504 and `component`. 

505 """ 

506 fmt = string.Formatter() 

507 parts = fmt.parse(self.template) 

508 

509 names = set() 

510 for _, field_names, format_spec, _ in parts: 

511 if field_names is not None and format_spec is not None: 

512 if not optionals and "?" in format_spec: 

513 continue 

514 for field_name in field_names.split("|"): 

515 if not specials and field_name in self.specialFields: 

516 continue 

517 

518 if not subfields and "." in field_name: 

519 field_name, _ = field_name.split(".") 

520 

521 names.add(field_name) 

522 

523 return names 

524 

525 def format(self, ref: DatasetRef) -> str: 

526 """Format a template string into a full path. 

527 

528 Parameters 

529 ---------- 

530 ref : `DatasetRef` 

531 The dataset to be formatted. 

532 

533 Returns 

534 ------- 

535 path : `str` 

536 Expanded path. 

537 

538 Raises 

539 ------ 

540 KeyError 

541 Raised if the requested field is not defined and the field is 

542 not optional. Or, `component` is specified but "component" was 

543 not part of the template. 

544 RuntimeError 

545 Raised if a template uses dimension record metadata but no 

546 records are attached to the `DatasetRef`. 

547 """ 

548 # Get the dimension values. Should all be non None. 

549 # Will want to store a DatasetId in it later. 

550 fields = cast(dict[str, int | str | DatasetId], dict(ref.dataId.mapping)) 

551 # Extra information that can be included using . syntax 

552 extras: dict[str, DimensionRecord | None] = {} 

553 skypix_alias: str | None = None 

554 can_use_extra_records = False 

555 if isinstance(ref.dataId, DataCoordinate): 

556 if ref.dataId.hasRecords(): 

557 can_use_extra_records = True 

558 skypix_alias = self._determine_skypix_alias(ref) 

559 if skypix_alias is not None: 

560 fields["skypix"] = fields[skypix_alias] 

561 

562 datasetType = ref.datasetType 

563 fields["datasetType"], component = datasetType.nameAndComponent() 

564 

565 usedComponent = False 

566 if component is not None: 

567 fields["component"] = component 

568 

569 fields["run"] = ref.run 

570 fields["id"] = ref.id 

571 

572 fmt = string.Formatter() 

573 parts = fmt.parse(self.template) 

574 output = "" 

575 

576 for literal, field_name, format_spec, _ in parts: 

577 if field_name and "|" in field_name: 

578 alternates = field_name.split("|") 

579 for alt in alternates: 

580 if "." in alt: 

581 primary, _ = alt.split(".") 

582 else: 

583 primary = alt 

584 # If the alternate is known to this data ID then we use 

585 # it and drop the lower priority fields. 

586 if primary in fields: 

587 field_name = alt 

588 break 

589 else: 

590 # None of these were found in the field list. Select the 

591 # first and let downstream code handle whether this 

592 # is optional or not. 

593 field_name = alternates[0] 

594 

595 if field_name == "component": 

596 usedComponent = True 

597 

598 if format_spec is None: 

599 output = output + literal 

600 continue 

601 

602 # Should only happen if format_spec is None 

603 if field_name is None: 

604 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]") 

605 

606 if "?" in format_spec: 

607 optional = True 

608 # Remove the non-standard character from the spec 

609 format_spec = format_spec.replace("?", "") 

610 else: 

611 optional = False 

612 

613 # Check for request for additional information from the dataId 

614 if "." in field_name: 

615 primary, secondary = field_name.split(".") 

616 if can_use_extra_records and primary not in extras and primary in fields: 

617 record_key = primary 

618 if primary == "skypix" and skypix_alias is not None: 

619 record_key = skypix_alias 

620 extras[record_key] = ref.dataId.records[record_key] 

621 if record_key != primary: 

622 # Make sure that htm7 and skypix both work. 

623 extras[primary] = extras[record_key] 

624 

625 if primary in extras: 

626 record = extras[primary] 

627 # Only fill in the fields if we have a value, the 

628 # KeyError will trigger below if the attribute is missing, 

629 # but only if it is not optional. This is most likely 

630 # a typo in the metadata field and so should be reported 

631 # even if optional. 

632 if hasattr(record, secondary): 

633 fields[field_name] = getattr(record, secondary) 

634 else: 

635 # Is a log message sufficient? 

636 log.info( 

637 "Template field %s could not be resolved because metadata field %s" 

638 " is not understood for dimension %s. Template entry will be ignored", 

639 field_name, 

640 secondary, 

641 primary, 

642 ) 

643 elif primary in fields: 

644 # We do have an entry for the primary but do not have any 

645 # secondary entries. This is likely a problem with the 

646 # code failing to attach a record to the DatasetRef. 

647 raise RuntimeError( 

648 f"No metadata records attached to dataset {ref}" 

649 f" when attempting to expand field {field_name}." 

650 " Either expand the DatasetRef or change the template." 

651 ) 

652 

653 if field_name in fields: 

654 value = fields[field_name] 

655 elif optional: 

656 # If this is optional ignore the format spec 

657 # and do not include the literal text prior to the optional 

658 # field unless it contains a "/" path separator 

659 format_spec = "" 

660 value = "" 

661 if "/" not in literal: 

662 literal = "" 

663 else: 

664 raise KeyError( 

665 f"'{field_name}' requested in template via '{self.template}' " 

666 "but not defined and not optional" 

667 ) 

668 

669 # Handle "/" in values since we do not want to be surprised by 

670 # unexpected directories turning up 

671 replace_slash = True 

672 if "/" in format_spec: 

673 # Remove the non-standard character from the spec 

674 format_spec = format_spec.replace("/", "") 

675 replace_slash = False 

676 

677 if isinstance(value, str): 

678 # Replace spaces with underscores for more friendly file paths 

679 value = value.replace(" ", "_") 

680 if replace_slash: 

681 value = value.replace("/", "_") 

682 

683 # Now use standard formatting 

684 output = output + literal + format(value, format_spec) 

685 

686 # Replace periods with underscores in the non-directory part to 

687 # prevent file extension confusion. Also replace # in the non-dir 

688 # part to avoid confusion with URI fragments 

689 head, tail = os.path.split(output) 

690 tail = tail.replace(".", "_") 

691 tail = tail.replace("#", "HASH") 

692 output = os.path.join(head, tail) 

693 

694 # Complain if we were meant to use a component 

695 if component is not None and not usedComponent: 

696 raise KeyError(f"Component '{component}' specified but template {self.template} did not use it") 

697 

698 # Since this is known to be a path, normalize it in case some double 

699 # slashes have crept in 

700 path = os.path.normpath(output) 

701 

702 # It should not be an absolute path (may happen with optionals) 

703 if os.path.isabs(path): 

704 path = os.path.relpath(path, start="/") 

705 

706 return path 

707 

708 def validateTemplate(self, entity: DatasetRef | DatasetType | StorageClass | None) -> None: 

709 """Compare the template against supplied entity that wants to use it. 

710 

711 Parameters 

712 ---------- 

713 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

714 Entity to compare against template. If `None` is given only 

715 very basic validation of templates will be performed. 

716 

717 Raises 

718 ------ 

719 FileTemplateValidationError 

720 Raised if the template is inconsistent with the supplied entity. 

721 

722 Notes 

723 ----- 

724 Validation will always include a check that mandatory fields 

725 are present and that at least one field refers to a dimension. 

726 If the supplied entity includes a `DimensionGraph` then it will be 

727 used to compare the available dimensions with those specified in the 

728 template. 

729 """ 

730 # A universe can be used to filter out alternates that are 

731 # not known. 

732 dimensions = getattr(entity, "dimensions", None) 

733 grouped_fields, grouped_optionals = self.grouped_fields(dimensions) 

734 

735 # Check that the template has run 

736 withSpecials = ( 

737 grouped_fields["standard"] 

738 | grouped_fields["parent"] 

739 | grouped_fields["special"] 

740 | grouped_optionals["standard"] 

741 | grouped_optionals["parent"] 

742 | grouped_optionals["special"] 

743 ) 

744 

745 if "collection" in withSpecials: 

746 raise FileTemplateValidationError( 

747 "'collection' is no longer supported as a file template placeholder; use 'run' instead." 

748 ) 

749 

750 if not withSpecials & self.mandatoryFields: 

751 raise FileTemplateValidationError( 

752 f"Template '{self}' is missing a mandatory field from {self.mandatoryFields}" 

753 ) 

754 

755 # Check that there are some dimension fields in the template 

756 # The id is allowed instead if present since that also uniquely 

757 # identifies the file in the datastore. 

758 allfields = ( 

759 grouped_fields["standard"] 

760 | grouped_fields["parent"] 

761 | grouped_optionals["standard"] 

762 | grouped_optionals["parent"] 

763 ) 

764 if not allfields and "id" not in withSpecials: 

765 raise FileTemplateValidationError( 

766 f"Template '{self}' does not seem to have any fields corresponding to dimensions." 

767 ) 

768 

769 # Do not allow ../ in the template to confuse where the file might 

770 # end up. 

771 if "../" in self.template: 

772 raise FileTemplateValidationError("A file template should not include jump to parent directory.") 

773 

774 # Require that if "id" is in the template then it must exist in the 

775 # file part -- this avoids templates like "{id}/fixed" where the file 

776 # name is fixed but the directory has the ID. 

777 if "id" in withSpecials: 

778 file_part = os.path.split(self.template)[-1] 

779 if "{id}" not in file_part: 

780 raise FileTemplateValidationError( 

781 f"Template '{self}' includes the 'id' but that ID is not part of the file name." 

782 ) 

783 

784 # If we do not have dimensions available then all we can do is shrug 

785 if not hasattr(entity, "dimensions"): 

786 return 

787 

788 # Mypy does not know about hasattr so help it out 

789 if entity is None: 

790 return 

791 

792 # if this entity represents a component then insist that component 

793 # is present in the template. If the entity is not a component 

794 # make sure that component is not mandatory. 

795 try: 

796 # mypy does not see the except block so complains about 

797 # StorageClass not supporting isComponent 

798 if entity.isComponent(): # type: ignore 

799 if "component" not in withSpecials: 

800 raise FileTemplateValidationError( 

801 f"Template '{self}' has no component but {entity} refers to a component." 

802 ) 

803 else: 

804 mandatorySpecials = ( 

805 grouped_fields["standard"] | grouped_fields["parent"] | grouped_fields["special"] 

806 ) 

807 if "component" in mandatorySpecials: 

808 raise FileTemplateValidationError( 

809 f"Template '{self}' has mandatory component but " 

810 f"{entity} does not refer to a component." 

811 ) 

812 except AttributeError: 

813 pass 

814 

815 # From here on we need at least a DatasetType 

816 # Mypy doesn't understand the AttributeError clause below 

817 if isinstance(entity, StorageClass): 

818 return 

819 

820 # Get the dimension links to get the full set of available field names 

821 # Fall back to dataId keys if we have them but no links. 

822 # dataId keys must still be present in the template 

823 try: 

824 minimal = set(entity.dimensions.required.names) 

825 maximal = set(entity.dimensions.names) 

826 except AttributeError: 

827 try: 

828 minimal = set(entity.dataId.keys().names) # type: ignore 

829 maximal = minimal 

830 except AttributeError: 

831 return 

832 

833 required = grouped_fields["standard"] | grouped_fields["parent"] 

834 

835 # Replace specific skypix dimensions with generic one 

836 skypix_alias = self._determine_skypix_alias(entity) 

837 if skypix_alias is not None: 

838 minimal.add("skypix") 

839 maximal.add("skypix") 

840 minimal.remove(skypix_alias) 

841 maximal.remove(skypix_alias) 

842 if skypix_alias in required: 

843 required.remove(skypix_alias) 

844 required.add("skypix") 

845 if skypix_alias in allfields: 

846 allfields.remove(skypix_alias) 

847 allfields.add("skypix") 

848 

849 # Calculate any field usage that does not match a dimension 

850 if not required.issubset(maximal): 

851 raise FileTemplateValidationError( 

852 f"Template '{self}' is inconsistent with {entity}: {required} is not a subset of {maximal}." 

853 ) 

854 

855 if not allfields.issuperset(minimal): 

856 raise FileTemplateValidationError( 

857 f"Template '{self}' is inconsistent with {entity}:" 

858 f" {allfields} is not a superset of {minimal}." 

859 ) 

860 

861 return 

862 

863 def _determine_skypix_alias(self, entity: DatasetRef | DatasetType) -> str | None: 

864 """Return the dimension name that refers to a sky pixel. 

865 

866 Parameters 

867 ---------- 

868 entity : `DatasetRef` or `DatasetType` 

869 The entity to examine. 

870 

871 Returns 

872 ------- 

873 alias : `str` 

874 If there is a sky pixelization in the supplied dataId, return 

875 its name, else returns `None`. Will return `None` also if there 

876 is more than one sky pix dimension in the data ID or if the 

877 dataID is not a `DataCoordinate` 

878 """ 

879 alias = None 

880 

881 if isinstance(entity, DatasetRef): 

882 entity = entity.datasetType 

883 

884 # If there is exactly one SkyPixDimension in the data ID, alias its 

885 # value with the key "skypix", so we can use that to match any 

886 # skypix dimension. 

887 # We restrict this behavior to the (real-world) case where the 

888 # data ID is a DataCoordinate, not just a dict. That should only 

889 # not be true in some test code, but that test code is a pain to 

890 # update to be more like the real world while still providing our 

891 # only tests of important behavior. 

892 if len(entity.dimensions.skypix) == 1: 

893 (alias,) = entity.dimensions.skypix.names 

894 return alias