Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for file template string expansion.""" 

25 

26__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError") 

27 

28import os.path 

29import string 

30import logging 

31from types import MappingProxyType 

32 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Iterable, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Tuple, 

41 Union, 

42) 

43 

44from .config import Config 

45from .configSupport import processLookupConfigs, LookupKey 

46from .exceptions import ValidationError 

47from .dimensions import SkyPixDimension, DataCoordinate 

48from .datasets import DatasetRef 

49from .storageClass import StorageClass 

50 

51if TYPE_CHECKING: 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true

52 from .dimensions import DimensionUniverse 

53 from .datasets import DatasetType 

54 

55log = logging.getLogger(__name__) 

56 

57 

58class FileTemplateValidationError(ValidationError): 

59 """Exception thrown when a file template is not consistent with the 

60 associated `DatasetType`.""" 

61 pass 

62 

63 

64class FileTemplatesConfig(Config): 

65 """Configuration information for `FileTemplates`""" 

66 pass 

67 

68 

69class FileTemplates: 

70 """Collection of `FileTemplate` templates. 

71 

72 Parameters 

73 ---------- 

74 config : `FileTemplatesConfig` or `str` 

75 Load configuration. 

76 default : `str`, optional 

77 If not `None`, a default template to use if no template has 

78 been specified explicitly in the configuration. 

79 universe : `DimensionUniverse` 

80 The set of all known dimensions, used to normalize any lookup keys 

81 involving dimensions. 

82 

83 Notes 

84 ----- 

85 The configuration can include one level of hierarchy where an 

86 instrument-specific section can be defined to override more general 

87 template specifications. This is represented in YAML using a 

88 key of form ``instrument<name>`` which can then define templates 

89 that will be returned if a `DatasetRef` contains a matching instrument 

90 name in the data ID. 

91 

92 A default fallback template can be specified using the key ``default``. 

93 Defaulting can be disabled in a child configuration by defining the 

94 value to be an empty string or a boolean `False`. 

95 

96 The config is parsed using the function 

97 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

98 """ 

99 

100 defaultKey = LookupKey("default") 

101 """Configuration key associated with the default template.""" 

102 

103 def __init__(self, config: Union[FileTemplatesConfig, str], 

104 default: Optional[str] = None, *, 

105 universe: DimensionUniverse): 

106 self.config = FileTemplatesConfig(config) 

107 self._templates = {} 

108 

109 contents = processLookupConfigs(self.config, universe=universe) 

110 

111 # Determine default to use -- defaults can be disabled if 

112 # we get a False or None 

113 defaultValue = contents.get(self.defaultKey, default) 

114 if defaultValue and not isinstance(defaultValue, str): 

115 raise RuntimeError("Default template value should be str or False, or None. " 

116 f"Got '{defaultValue}'") 

117 self.default = FileTemplate(defaultValue) if isinstance(defaultValue, str) and defaultValue else None 

118 

119 # Convert all the values to FileTemplate, handling defaults 

120 for key, templateStr in contents.items(): 

121 if key == self.defaultKey: 

122 continue 

123 if not isinstance(templateStr, str): 

124 raise RuntimeError(f"Unexpected value in file template key {key}: {templateStr}") 

125 self._templates[key] = FileTemplate(templateStr) 

126 

127 @property 

128 def templates(self) -> Mapping[LookupKey, FileTemplate]: 

129 """Collection of templates indexed by lookup key (`dict`).""" 

130 return MappingProxyType(self._templates) 

131 

132 def __contains__(self, key: LookupKey) -> bool: 

133 """Indicates whether the supplied key is present in the templates. 

134 

135 Parameters 

136 ---------- 

137 key : `LookupKey` 

138 Key to use to determine if a corresponding value is present 

139 in the templates. 

140 

141 Returns 

142 ------- 

143 in : `bool` 

144 `True` if the supplied key is present in the templates. 

145 """ 

146 return key in self.templates 

147 

148 def __getitem__(self, key: LookupKey) -> FileTemplate: 

149 return self.templates[key] 

150 

151 def validateTemplates(self, entities: Iterable[Union[DatasetType, DatasetRef, StorageClass]], 

152 logFailures: bool = False) -> None: 

153 """Retrieve the template associated with each dataset type and 

154 validate the dimensions against the template. 

155 

156 Parameters 

157 ---------- 

158 entities : `DatasetType`, `DatasetRef`, or `StorageClass` 

159 Entities to validate against the matching templates. Can be 

160 differing types. 

161 logFailures : `bool`, optional 

162 If `True`, output a log message for every validation error 

163 detected. 

164 

165 Raises 

166 ------ 

167 FileTemplateValidationError 

168 Raised if an entity failed validation. 

169 

170 Notes 

171 ----- 

172 See `FileTemplate.validateTemplate()` for details on the validation. 

173 """ 

174 unmatchedKeys = set(self.templates) 

175 failed = [] 

176 for entity in entities: 

177 try: 

178 matchKey, template = self.getTemplateWithMatch(entity) 

179 except KeyError as e: 

180 # KeyError always quotes on stringification so strip here 

181 errMsg = str(e).strip('"\'') 

182 failed.append(errMsg) 

183 if logFailures: 

184 log.fatal("%s", errMsg) 

185 continue 

186 

187 if matchKey in unmatchedKeys: 

188 unmatchedKeys.remove(matchKey) 

189 

190 try: 

191 template.validateTemplate(entity) 

192 except FileTemplateValidationError as e: 

193 failed.append(f"{e} (via key '{matchKey}')") 

194 if logFailures: 

195 log.fatal("Template failure with key '%s': %s", matchKey, e) 

196 

197 if logFailures and unmatchedKeys: 

198 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys])) 

199 

200 if failed: 

201 if len(failed) == 1: 

202 msg = str(failed[0]) 

203 else: 

204 failMsg = ";\n".join(failed) 

205 msg = f"{len(failed)} template validation failures: {failMsg}" 

206 raise FileTemplateValidationError(msg) 

207 

208 def getLookupKeys(self) -> Set[LookupKey]: 

209 """Retrieve the look up keys for all the template entries. 

210 

211 Returns 

212 ------- 

213 keys : `set` of `LookupKey` 

214 The keys available for matching a template. 

215 """ 

216 return set(self.templates) 

217 

218 def getTemplateWithMatch(self, entity: Union[DatasetRef, 

219 DatasetType, StorageClass]) -> Tuple[LookupKey, 

220 FileTemplate]: 

221 """Retrieve the `FileTemplate` associated with the dataset type along 

222 with the lookup key that was a match for this template. 

223 

224 If the lookup name corresponds to a component the base name for 

225 the component will be examined if the full component name does 

226 not match. 

227 

228 Parameters 

229 ---------- 

230 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

231 Instance to use to look for a corresponding template. 

232 A `DatasetType` name or a `StorageClass` name will be used 

233 depending on the supplied entity. Priority is given to a 

234 `DatasetType` name. Supports instrument override if a 

235 `DatasetRef` is provided configured with an ``instrument`` 

236 value for the data ID. 

237 

238 Returns 

239 ------- 

240 matchKey : `LookupKey` 

241 The key that resulted in the successful match. 

242 template : `FileTemplate` 

243 Template instance to use with that dataset type. 

244 

245 Raises 

246 ------ 

247 KeyError 

248 Raised if no template could be located for this Dataset type. 

249 """ 

250 # Get the names to use for lookup 

251 names = entity._lookupNames() 

252 

253 # Get a location from the templates 

254 template = self.default 

255 source = self.defaultKey 

256 for name in names: 

257 if name in self.templates: 

258 template = self.templates[name] 

259 source = name 

260 break 

261 

262 if template is None: 

263 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]") 

264 

265 log.debug("Got file %s from %s via %s", template, entity, source) 

266 

267 return source, template 

268 

269 def getTemplate(self, entity: Union[DatasetType, DatasetRef, StorageClass]) -> FileTemplate: 

270 """Retrieve the `FileTemplate` associated with the dataset type. 

271 

272 If the lookup name corresponds to a component the base name for 

273 the component will be examined if the full component name does 

274 not match. 

275 

276 Parameters 

277 ---------- 

278 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

279 Instance to use to look for a corresponding template. 

280 A `DatasetType` name or a `StorageClass` name will be used 

281 depending on the supplied entity. Priority is given to a 

282 `DatasetType` name. Supports instrument override if a 

283 `DatasetRef` is provided configured with an ``instrument`` 

284 value for the data ID. 

285 

286 Returns 

287 ------- 

288 template : `FileTemplate` 

289 Template instance to use with that dataset type. 

290 

291 Raises 

292 ------ 

293 KeyError 

294 Raised if no template could be located for this Dataset type. 

295 """ 

296 _, template = self.getTemplateWithMatch(entity) 

297 return template 

298 

299 

300class FileTemplate: 

301 """Format a path template into a fully expanded path. 

302 

303 Parameters 

304 ---------- 

305 template : `str` 

306 Template string. 

307 

308 Raises 

309 ------ 

310 FileTemplateValidationError 

311 Raised if the template fails basic validation. 

312 

313 Notes 

314 ----- 

315 The templates use the standard Format Specification Mini-Language 

316 with the caveat that only named fields can be used. The field names 

317 are taken from the Dimensions along with several additional fields: 

318 

319 - datasetType: `str`, `DatasetType.name` 

320 - component: `str`, name of the StorageClass component 

321 - run: `str`, name of the run this dataset was added with 

322 

323 `run` must always be provided to ensure unique paths. 

324 

325 More detailed information can be requested from dimensions by using a dot 

326 notation, so ``visit.name`` would use the name of the visit and 

327 ``detector.name_in_raft`` would use the name of the detector within the 

328 raft. 

329 

330 The mini-language is extended to understand a "?" in the format 

331 specification. This indicates that a field is optional. If that 

332 Dimension is missing the field, along with the text before the field, 

333 unless it is a path separator, will be removed from the output path. 

334 

335 By default any "/" in a dataId value will be replaced by "_" to prevent 

336 unexpected directories being created in the path. If the "/" should be 

337 retained then a special "/" format specifier can be included in the 

338 template. 

339 """ 

340 

341 mandatoryFields = {"run"} 

342 """A set of fields, one of which must be present in a template.""" 

343 

344 datasetFields = {"datasetType", "component"} 

345 """Fields related to the supplied dataset, not a dimension.""" 

346 

347 specialFields = mandatoryFields | datasetFields 

348 """Set of special fields that are available independently of the defined 

349 Dimensions.""" 

350 

351 def __init__(self, template: str): 

352 if not isinstance(template, str): 

353 raise FileTemplateValidationError(f"Template ('{template}') does " 

354 "not contain any format specifiers") 

355 self.template = template 

356 

357 # Do basic validation without access to dimensions 

358 self.validateTemplate(None) 

359 

360 def __eq__(self, other: Any) -> bool: 

361 if not isinstance(other, FileTemplate): 

362 return False 

363 

364 return self.template == other.template 

365 

366 def __str__(self) -> str: 

367 return self.template 

368 

369 def __repr__(self) -> str: 

370 return f'{self.__class__.__name__}("{self.template}")' 

371 

372 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> Set[str]: 

373 """Return the field names used in this template. 

374 

375 Parameters 

376 ---------- 

377 optionals : `bool` 

378 If `True`, optional fields are included in the returned set. 

379 specials : `bool` 

380 If `True`, non-dimension fields are included. 

381 subfields : `bool`, optional 

382 If `True`, fields with syntax ``a.b`` are included. If `False`, 

383 the default, only ``a`` would be returned. 

384 

385 Returns 

386 ------- 

387 names : `set` 

388 Names of fields used in this template 

389 

390 Notes 

391 ----- 

392 The returned set will include the special values such as `datasetType` 

393 and `component`. 

394 """ 

395 fmt = string.Formatter() 

396 parts = fmt.parse(self.template) 

397 

398 names = set() 

399 for literal, field_name, format_spec, conversion in parts: 

400 if field_name is not None and format_spec is not None: 

401 if "?" in format_spec and not optionals: 

402 continue 

403 

404 if not specials and field_name in self.specialFields: 

405 continue 

406 

407 if "." in field_name and not subfields: 

408 field_name, _ = field_name.split(".") 

409 

410 names.add(field_name) 

411 

412 return names 

413 

414 def format(self, ref: DatasetRef) -> str: 

415 """Format a template string into a full path. 

416 

417 Parameters 

418 ---------- 

419 ref : `DatasetRef` 

420 The dataset to be formatted. 

421 

422 Returns 

423 ------- 

424 path : `str` 

425 Expanded path. 

426 

427 Raises 

428 ------ 

429 KeyError 

430 Raised if the requested field is not defined and the field is 

431 not optional. Or, `component` is specified but "component" was 

432 not part of the template. 

433 """ 

434 # Extract defined non-None dimensions from the dataId. 

435 # This guards against Nones being explicitly present in the data ID 

436 # (which can happen if, say, an exposure has no filter), as well as 

437 # the case where only required dimensions are present (which in this 

438 # context should only happen in unit tests; in general we need all 

439 # dimensions to fill out templates). 

440 fields = {k: ref.dataId.get(k) for k in ref.datasetType.dimensions.names 

441 if ref.dataId.get(k) is not None} 

442 # Extra information that can be included using . syntax 

443 extras = {} 

444 if isinstance(ref.dataId, DataCoordinate): 

445 if ref.dataId.hasRecords(): 

446 extras = ref.dataId.records.byName() 

447 skypix_alias = self._determine_skypix_alias(ref) 

448 if skypix_alias is not None: 

449 fields["skypix"] = fields[skypix_alias] 

450 if extras: 

451 extras["skypix"] = extras[skypix_alias] 

452 

453 datasetType = ref.datasetType 

454 fields["datasetType"], component = datasetType.nameAndComponent() 

455 

456 usedComponent = False 

457 if component is not None: 

458 fields["component"] = component 

459 

460 usedRun = False 

461 fields["run"] = ref.run 

462 

463 fmt = string.Formatter() 

464 parts = fmt.parse(self.template) 

465 output = "" 

466 

467 for literal, field_name, format_spec, conversion in parts: 

468 

469 if field_name == "component": 

470 usedComponent = True 

471 

472 if format_spec is None: 

473 output = output + literal 

474 continue 

475 

476 # Should only happen if format_spec is None 

477 if field_name is None: 

478 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]") 

479 

480 if "?" in format_spec: 

481 optional = True 

482 # Remove the non-standard character from the spec 

483 format_spec = format_spec.replace("?", "") 

484 else: 

485 optional = False 

486 

487 if field_name == "run": 

488 usedRun = True 

489 

490 if field_name == "collection": 

491 raise KeyError("'collection' is no longer supported as a " 

492 "file template placeholder; use 'run' instead.") 

493 

494 # Check for request for additional information from the dataId 

495 if "." in field_name: 

496 primary, secondary = field_name.split(".") 

497 if primary in extras: 

498 record = extras[primary] 

499 # Only fill in the fields if we have a value, the 

500 # KeyError will trigger below if the attribute is missing. 

501 if hasattr(record, secondary): 

502 fields[field_name] = getattr(record, secondary) 

503 

504 if field_name in fields: 

505 value = fields[field_name] 

506 elif optional: 

507 # If this is optional ignore the format spec 

508 # and do not include the literal text prior to the optional 

509 # field unless it contains a "/" path separator 

510 format_spec = "" 

511 value = "" 

512 if "/" not in literal: 

513 literal = "" 

514 else: 

515 raise KeyError(f"'{field_name}' requested in template via '{self.template}' " 

516 "but not defined and not optional") 

517 

518 # Handle "/" in values since we do not want to be surprised by 

519 # unexpected directories turning up 

520 replace_slash = True 

521 if "/" in format_spec: 

522 # Remove the non-standard character from the spec 

523 format_spec = format_spec.replace("/", "") 

524 replace_slash = False 

525 

526 if isinstance(value, str): 

527 if replace_slash: 

528 value = value.replace("/", "_") 

529 

530 # Now use standard formatting 

531 output = output + literal + format(value, format_spec) 

532 

533 # Replace periods with underscores in the non-directory part to 

534 # prevent file extension confusion. 

535 head, tail = os.path.split(output) 

536 output = os.path.join(head, tail.replace(".", "_")) 

537 

538 # Complain if we were meant to use a component 

539 if component is not None and not usedComponent: 

540 raise KeyError("Component '{}' specified but template {} did not use it".format(component, 

541 self.template)) 

542 

543 # Complain if there's no run 

544 if not usedRun: 

545 raise KeyError("Template does not include 'run'.") 

546 

547 # Since this is known to be a path, normalize it in case some double 

548 # slashes have crept in 

549 path = os.path.normpath(output) 

550 

551 # It should not be an absolute path (may happen with optionals) 

552 if os.path.isabs(path): 

553 path = os.path.relpath(path, start="/") 

554 

555 return path 

556 

557 def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass, None]) -> None: 

558 """Compare the template against a representative entity that would 

559 like to use template. 

560 

561 Parameters 

562 ---------- 

563 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

564 Entity to compare against template. If `None` is given only 

565 very basic validation of templates will be performed. 

566 

567 Raises 

568 ------ 

569 FileTemplateValidationError 

570 Raised if the template is inconsistent with the supplied entity. 

571 

572 Notes 

573 ----- 

574 Validation will always include a check that mandatory fields 

575 are present and that at least one field refers to a dimension. 

576 If the supplied entity includes a `DimensionGraph` then it will be 

577 used to compare the available dimensions with those specified in the 

578 template. 

579 """ 

580 

581 # Check that the template has run 

582 withSpecials = self.fields(specials=True, optionals=True) 

583 if not withSpecials & self.mandatoryFields: 

584 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field" 

585 f" from {self.mandatoryFields}") 

586 

587 # Check that there are some dimension fields in the template 

588 allfields = self.fields(optionals=True) 

589 if not allfields: 

590 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields" 

591 " corresponding to dimensions.") 

592 

593 # If we do not have dimensions available then all we can do is shrug 

594 if not hasattr(entity, "dimensions"): 

595 return 

596 

597 # Mypy does not know about hasattr so help it out 

598 if entity is None: 

599 return 

600 

601 # if this entity represents a component then insist that component 

602 # is present in the template. If the entity is not a component 

603 # make sure that component is not mandatory. 

604 try: 

605 # mypy does not see the except block so complains about 

606 # StorageClass not supporting isComponent 

607 if entity.isComponent(): # type: ignore 

608 if "component" not in withSpecials: 

609 raise FileTemplateValidationError(f"Template '{self}' has no component but " 

610 f"{entity} refers to a component.") 

611 else: 

612 mandatorySpecials = self.fields(specials=True) 

613 if "component" in mandatorySpecials: 

614 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but " 

615 f"{entity} does not refer to a component.") 

616 except AttributeError: 

617 pass 

618 

619 # From here on we need at least a DatasetType 

620 # Mypy doesn't understand the AttributeError clause below 

621 if isinstance(entity, StorageClass): 

622 return 

623 

624 # Get the dimension links to get the full set of available field names 

625 # Fall back to dataId keys if we have them but no links. 

626 # dataId keys must still be present in the template 

627 try: 

628 minimal = set(entity.dimensions.required.names) 

629 maximal = set(entity.dimensions.names) 

630 except AttributeError: 

631 try: 

632 minimal = set(entity.dataId.keys()) # type: ignore 

633 maximal = minimal 

634 except AttributeError: 

635 return 

636 

637 # Replace specific skypix dimensions with generic one 

638 skypix_alias = self._determine_skypix_alias(entity) 

639 if skypix_alias is not None: 

640 minimal.add("skypix") 

641 maximal.add("skypix") 

642 minimal.remove(skypix_alias) 

643 maximal.remove(skypix_alias) 

644 

645 required = self.fields(optionals=False) 

646 

647 # Calculate any field usage that does not match a dimension 

648 if not required.issubset(maximal): 

649 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

650 f" {required} is not a subset of {maximal}.") 

651 

652 if not allfields.issuperset(minimal): 

653 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

654 f" {allfields} is not a superset of {minimal}.") 

655 

656 return 

657 

658 def _determine_skypix_alias(self, entity: Union[DatasetRef, DatasetType]) -> Optional[str]: 

659 """Given a `DatasetRef` return the dimension name that refers to a sky 

660 pixel. 

661 

662 Parameters 

663 ---------- 

664 ref : `DatasetRef` or `DatasetType` 

665 The entity to examine. 

666 

667 Returns 

668 ------- 

669 alias : `str` 

670 If there is a sky pixelization in the supplied dataId, return 

671 its name, else returns `None`. Will return `None` also if there 

672 is more than one sky pix dimension in the data ID or if the 

673 dataID is not a `DataCoordinate` 

674 """ 

675 alias = None 

676 

677 if isinstance(entity, DatasetRef): 

678 entity = entity.datasetType 

679 

680 # If there is exactly one SkyPixDimension in the data ID, alias its 

681 # value with the key "skypix", so we can use that to match any 

682 # skypix dimension. 

683 # We restrict this behavior to the (real-world) case where the 

684 # data ID is a DataCoordinate, not just a dict. That should only 

685 # not be true in some test code, but that test code is a pain to 

686 # update to be more like the real world while still providing our 

687 # only tests of important behavior. 

688 skypix = [dimension for dimension in entity.dimensions 

689 if isinstance(dimension, SkyPixDimension)] 

690 if len(skypix) == 1: 

691 alias = skypix[0].name 

692 return alias