Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Support for file template string expansion.""" 

25 

26__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError") 

27 

28import os.path 

29import string 

30import logging 

31from types import MappingProxyType 

32 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Iterable, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Tuple, 

41 Union, 

42) 

43 

44from .config import Config 

45from .configSupport import processLookupConfigs, LookupKey 

46from .exceptions import ValidationError 

47from .dimensions import SkyPixDimension, DataCoordinate 

48 

49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true

50 from .dimensions import DimensionUniverse 

51 from .datasets import DatasetType, DatasetRef 

52 from .storageClass import StorageClass 

53 

54log = logging.getLogger(__name__) 

55 

56 

57class FileTemplateValidationError(ValidationError): 

58 """Exception thrown when a file template is not consistent with the 

59 associated `DatasetType`.""" 

60 pass 

61 

62 

63class FileTemplatesConfig(Config): 

64 """Configuration information for `FileTemplates`""" 

65 pass 

66 

67 

68class FileTemplates: 

69 """Collection of `FileTemplate` templates. 

70 

71 Parameters 

72 ---------- 

73 config : `FileTemplatesConfig` or `str` 

74 Load configuration. 

75 default : `str`, optional 

76 If not `None`, a default template to use if no template has 

77 been specified explicitly in the configuration. 

78 universe : `DimensionUniverse` 

79 The set of all known dimensions, used to normalize any lookup keys 

80 involving dimensions. 

81 

82 Notes 

83 ----- 

84 The configuration can include one level of hierarchy where an 

85 instrument-specific section can be defined to override more general 

86 template specifications. This is represented in YAML using a 

87 key of form ``instrument<name>`` which can then define templates 

88 that will be returned if a `DatasetRef` contains a matching instrument 

89 name in the data ID. 

90 

91 A default fallback template can be specified using the key ``default``. 

92 Defaulting can be disabled in a child configuration by defining the 

93 value to be an empty string or a boolean `False`. 

94 

95 The config is parsed using the function 

96 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

97 """ 

98 

99 defaultKey = LookupKey("default") 

100 """Configuration key associated with the default template.""" 

101 

102 def __init__(self, config: Union[FileTemplatesConfig, str], 

103 default: Optional[str] = None, *, 

104 universe: DimensionUniverse): 

105 self.config = FileTemplatesConfig(config) 

106 self._templates = {} 

107 self.default = FileTemplate(default) if default is not None else None 

108 contents = processLookupConfigs(self.config, universe=universe) 

109 

110 # Convert all the values to FileTemplate, handling defaults 

111 for key, templateStr in contents.items(): 

112 if key == self.defaultKey: 

113 if not templateStr: 

114 self.default = None 

115 else: 

116 self.default = FileTemplate(templateStr) 

117 else: 

118 self._templates[key] = FileTemplate(templateStr) 

119 

120 @property 

121 def templates(self) -> Mapping[LookupKey, FileTemplate]: 

122 """Collection of templates indexed by lookup key (`dict`).""" 

123 return MappingProxyType(self._templates) 

124 

125 def __contains__(self, key: LookupKey) -> bool: 

126 """Indicates whether the supplied key is present in the templates. 

127 

128 Parameters 

129 ---------- 

130 key : `LookupKey` 

131 Key to use to determine if a corresponding value is present 

132 in the templates. 

133 

134 Returns 

135 ------- 

136 in : `bool` 

137 `True` if the supplied key is present in the templates. 

138 """ 

139 return key in self.templates 

140 

141 def __getitem__(self, key: LookupKey) -> FileTemplate: 

142 return self.templates[key] 

143 

144 def validateTemplates(self, entities: Iterable[Union[DatasetType, DatasetRef, StorageClass]], 

145 logFailures: bool = False) -> None: 

146 """Retrieve the template associated with each dataset type and 

147 validate the dimensions against the template. 

148 

149 Parameters 

150 ---------- 

151 entities : `DatasetType`, `DatasetRef`, or `StorageClass` 

152 Entities to validate against the matching templates. Can be 

153 differing types. 

154 logFailures : `bool`, optional 

155 If `True`, output a log message for every validation error 

156 detected. 

157 

158 Raises 

159 ------ 

160 FileTemplateValidationError 

161 Raised if an entity failed validation. 

162 

163 Notes 

164 ----- 

165 See `FileTemplate.validateTemplate()` for details on the validation. 

166 """ 

167 unmatchedKeys = set(self.templates) 

168 failed = [] 

169 for entity in entities: 

170 try: 

171 matchKey, template = self.getTemplateWithMatch(entity) 

172 except KeyError as e: 

173 # KeyError always quotes on stringification so strip here 

174 errMsg = str(e).strip('"\'') 

175 failed.append(errMsg) 

176 if logFailures: 

177 log.fatal("%s", errMsg) 

178 continue 

179 

180 if matchKey in unmatchedKeys: 

181 unmatchedKeys.remove(matchKey) 

182 

183 try: 

184 template.validateTemplate(entity) 

185 except FileTemplateValidationError as e: 

186 failed.append(f"{e} (via key '{matchKey}')") 

187 if logFailures: 

188 log.fatal("Template failure with key '%s': %s", matchKey, e) 

189 

190 if logFailures and unmatchedKeys: 

191 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys])) 

192 

193 if failed: 

194 if len(failed) == 1: 

195 msg = str(failed[0]) 

196 else: 

197 failMsg = ";\n".join(failed) 

198 msg = f"{len(failed)} template validation failures: {failMsg}" 

199 raise FileTemplateValidationError(msg) 

200 

201 def getLookupKeys(self) -> Set[LookupKey]: 

202 """Retrieve the look up keys for all the template entries. 

203 

204 Returns 

205 ------- 

206 keys : `set` of `LookupKey` 

207 The keys available for matching a template. 

208 """ 

209 return set(self.templates) 

210 

211 def getTemplateWithMatch(self, entity: Union[DatasetRef, 

212 DatasetType, StorageClass]) -> Tuple[LookupKey, 

213 FileTemplate]: 

214 """Retrieve the `FileTemplate` associated with the dataset type along 

215 with the lookup key that was a match for this template. 

216 

217 If the lookup name corresponds to a component the base name for 

218 the component will be examined if the full component name does 

219 not match. 

220 

221 Parameters 

222 ---------- 

223 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

224 Instance to use to look for a corresponding template. 

225 A `DatasetType` name or a `StorageClass` name will be used 

226 depending on the supplied entity. Priority is given to a 

227 `DatasetType` name. Supports instrument override if a 

228 `DatasetRef` is provided configured with an ``instrument`` 

229 value for the data ID. 

230 

231 Returns 

232 ------- 

233 matchKey : `LookupKey` 

234 The key that resulted in the successful match. 

235 template : `FileTemplate` 

236 Template instance to use with that dataset type. 

237 

238 Raises 

239 ------ 

240 KeyError 

241 Raised if no template could be located for this Dataset type. 

242 """ 

243 # Get the names to use for lookup 

244 names = entity._lookupNames() 

245 

246 # Get a location from the templates 

247 template = self.default 

248 source = self.defaultKey 

249 for name in names: 

250 if name in self.templates: 

251 template = self.templates[name] 

252 source = name 

253 break 

254 

255 if template is None: 

256 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]") 

257 

258 log.debug("Got file %s from %s via %s", template, entity, source) 

259 

260 return source, template 

261 

262 def getTemplate(self, entity: Union[DatasetType, DatasetRef, StorageClass]) -> FileTemplate: 

263 """Retrieve the `FileTemplate` associated with the dataset type. 

264 

265 If the lookup name corresponds to a component the base name for 

266 the component will be examined if the full component name does 

267 not match. 

268 

269 Parameters 

270 ---------- 

271 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

272 Instance to use to look for a corresponding template. 

273 A `DatasetType` name or a `StorageClass` name will be used 

274 depending on the supplied entity. Priority is given to a 

275 `DatasetType` name. Supports instrument override if a 

276 `DatasetRef` is provided configured with an ``instrument`` 

277 value for the data ID. 

278 

279 Returns 

280 ------- 

281 template : `FileTemplate` 

282 Template instance to use with that dataset type. 

283 

284 Raises 

285 ------ 

286 KeyError 

287 Raised if no template could be located for this Dataset type. 

288 """ 

289 _, template = self.getTemplateWithMatch(entity) 

290 return template 

291 

292 

293class FileTemplate: 

294 """Format a path template into a fully expanded path. 

295 

296 Parameters 

297 ---------- 

298 template : `str` 

299 Template string. 

300 

301 Raises 

302 ------ 

303 FileTemplateValidationError 

304 Raised if the template fails basic validation. 

305 

306 Notes 

307 ----- 

308 The templates use the standard Format Specification Mini-Language 

309 with the caveat that only named fields can be used. The field names 

310 are taken from the Dimensions along with several additional fields: 

311 

312 - datasetType: `str`, `DatasetType.name` 

313 - component: `str`, name of the StorageClass component 

314 - run: `str`, name of the run this dataset was added with 

315 

316 `run` must always be provided to ensure unique paths. 

317 

318 More detailed information can be requested from dimensions by using a dot 

319 notation, so ``visit.name`` would use the name of the visit and 

320 ``detector.name_in_raft`` would use the name of the detector within the 

321 raft. 

322 

323 The mini-language is extended to understand a "?" in the format 

324 specification. This indicates that a field is optional. If that 

325 Dimension is missing the field, along with the text before the field, 

326 unless it is a path separator, will be removed from the output path. 

327 

328 By default any "/" in a dataId value will be replaced by "_" to prevent 

329 unexpected directories being created in the path. If the "/" should be 

330 retained then a special "/" format specifier can be included in the 

331 template. 

332 """ 

333 

334 mandatoryFields = {"run"} 

335 """A set of fields, one of which must be present in a template.""" 

336 

337 datasetFields = {"datasetType", "component"} 

338 """Fields related to the supplied dataset, not a dimension.""" 

339 

340 specialFields = mandatoryFields | datasetFields 

341 """Set of special fields that are available independently of the defined 

342 Dimensions.""" 

343 

344 def __init__(self, template: str): 

345 if not isinstance(template, str): 

346 raise FileTemplateValidationError(f"Template ('{template}') does " 

347 "not contain any format specifiers") 

348 self.template = template 

349 

350 # Do basic validation without access to dimensions 

351 self.validateTemplate(None) 

352 

353 def __eq__(self, other: Any) -> bool: 

354 if not isinstance(other, FileTemplate): 

355 return False 

356 

357 return self.template == other.template 

358 

359 def __str__(self) -> str: 

360 return self.template 

361 

362 def __repr__(self) -> str: 

363 return f'{self.__class__.__name__}("{self.template}")' 

364 

365 def fields(self, optionals: bool = False, specials: bool = False, subfields: bool = False) -> Set[str]: 

366 """Return the field names used in this template. 

367 

368 Parameters 

369 ---------- 

370 optionals : `bool` 

371 If `True`, optional fields are included in the returned set. 

372 specials : `bool` 

373 If `True`, non-dimension fields are included. 

374 subfields : `bool`, optional 

375 If `True`, fields with syntax ``a.b`` are included. If `False`, 

376 the default, only ``a`` would be returned. 

377 

378 Returns 

379 ------- 

380 names : `set` 

381 Names of fields used in this template 

382 

383 Notes 

384 ----- 

385 The returned set will include the special values such as `datasetType` 

386 and `component`. 

387 """ 

388 fmt = string.Formatter() 

389 parts = fmt.parse(self.template) 

390 

391 names = set() 

392 for literal, field_name, format_spec, conversion in parts: 

393 if field_name is not None and format_spec is not None: 

394 if "?" in format_spec and not optionals: 

395 continue 

396 

397 if not specials and field_name in self.specialFields: 

398 continue 

399 

400 if "." in field_name and not subfields: 

401 field_name, _ = field_name.split(".") 

402 

403 names.add(field_name) 

404 

405 return names 

406 

407 def format(self, ref: DatasetRef) -> str: 

408 """Format a template string into a full path. 

409 

410 Parameters 

411 ---------- 

412 ref : `DatasetRef` 

413 The dataset to be formatted. 

414 

415 Returns 

416 ------- 

417 path : `str` 

418 Expanded path. 

419 

420 Raises 

421 ------ 

422 KeyError 

423 Raised if the requested field is not defined and the field is 

424 not optional. Or, `component` is specified but "component" was 

425 not part of the template. 

426 """ 

427 # Extract defined non-None dimensions from the dataId 

428 # We attempt to get the "full" dict on the assumption that ref.dataId 

429 # is a ExpandedDataCoordinate, as it should be when running 

430 # PipelineTasks. We should probably just require that when formatting 

431 # templates (and possibly when constructing DatasetRefs), but doing so 

432 # would break a ton of otherwise-useful tests that would need to be 

433 # modified to provide a lot more metadata. 

434 fields = {k: v for k, v in getattr(ref.dataId, "full", ref.dataId).items() if v is not None} 

435 

436 if isinstance(ref.dataId, DataCoordinate): 

437 # If there is exactly one SkyPixDimension in the data ID, alias its 

438 # value with the key "skypix", so we can use that to match any 

439 # skypix dimension. 

440 # We restrict this behavior to the (real-world) case where the 

441 # data ID is a DataCoordinate, not just a dict. That should only 

442 # not be true in some test code, but that test code is a pain to 

443 # update to be more like the real world while still providing our 

444 # only tests of important behavior. 

445 skypix = [dimension for dimension in ref.dataId.graph if isinstance(dimension, SkyPixDimension)] 

446 if len(skypix) == 1: 

447 fields["skypix"] = fields[skypix[0]] 

448 

449 # Extra information that can be included using . syntax 

450 extras = getattr(ref.dataId, "records", {}) 

451 

452 datasetType = ref.datasetType 

453 fields["datasetType"], component = datasetType.nameAndComponent() 

454 

455 usedComponent = False 

456 if component is not None: 

457 fields["component"] = component 

458 

459 usedRun = False 

460 fields["run"] = ref.run 

461 

462 fmt = string.Formatter() 

463 parts = fmt.parse(self.template) 

464 output = "" 

465 

466 for literal, field_name, format_spec, conversion in parts: 

467 

468 if field_name == "component": 

469 usedComponent = True 

470 

471 if format_spec is None: 

472 output = output + literal 

473 continue 

474 

475 # Should only happen if format_spec is None 

476 if field_name is None: 

477 raise RuntimeError(f"Unexpected blank field_name encountered in {self.template} [{literal}]") 

478 

479 if "?" in format_spec: 

480 optional = True 

481 # Remove the non-standard character from the spec 

482 format_spec = format_spec.replace("?", "") 

483 else: 

484 optional = False 

485 

486 if field_name == "run": 

487 usedRun = True 

488 

489 if field_name == "collection": 

490 raise KeyError("'collection' is no longer supported as a " 

491 "file template placeholder; use 'run' instead.") 

492 

493 # Check for request for additional information from the dataId 

494 if "." in field_name: 

495 primary, secondary = field_name.split(".") 

496 if primary in extras: 

497 record = extras[primary] 

498 # Only fill in the fields if we have a value, the 

499 # KeyError will trigger below if the attribute is missing. 

500 if hasattr(record, secondary): 

501 fields[field_name] = getattr(record, secondary) 

502 

503 if field_name in fields: 

504 value = fields[field_name] 

505 elif optional: 

506 # If this is optional ignore the format spec 

507 # and do not include the literal text prior to the optional 

508 # field unless it contains a "/" path separator 

509 format_spec = "" 

510 value = "" 

511 if "/" not in literal: 

512 literal = "" 

513 else: 

514 raise KeyError(f"'{field_name}' requested in template via '{self.template}' " 

515 "but not defined and not optional") 

516 

517 # Handle "/" in values since we do not want to be surprised by 

518 # unexpected directories turning up 

519 replace_slash = True 

520 if "/" in format_spec: 

521 # Remove the non-standard character from the spec 

522 format_spec = format_spec.replace("/", "") 

523 replace_slash = False 

524 

525 if isinstance(value, str): 

526 if replace_slash: 

527 value = value.replace("/", "_") 

528 

529 # Now use standard formatting 

530 output = output + literal + format(value, format_spec) 

531 

532 # Replace periods with underscores in the non-directory part to 

533 # prevent file extension confusion. 

534 head, tail = os.path.split(output) 

535 output = os.path.join(head, tail.replace(".", "_")) 

536 

537 # Complain if we were meant to use a component 

538 if component is not None and not usedComponent: 

539 raise KeyError("Component '{}' specified but template {} did not use it".format(component, 

540 self.template)) 

541 

542 # Complain if there's no run 

543 if not usedRun: 

544 raise KeyError("Template does not include 'run'.") 

545 

546 # Since this is known to be a path, normalize it in case some double 

547 # slashes have crept in 

548 path = os.path.normpath(output) 

549 

550 # It should not be an absolute path (may happen with optionals) 

551 if os.path.isabs(path): 

552 path = os.path.relpath(path, start="/") 

553 

554 return path 

555 

556 def validateTemplate(self, entity: Union[DatasetRef, DatasetType, StorageClass, None]) -> None: 

557 """Compare the template against a representative entity that would 

558 like to use template. 

559 

560 Parameters 

561 ---------- 

562 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

563 Entity to compare against template. If `None` is given only 

564 very basic validation of templates will be performed. 

565 

566 Raises 

567 ------ 

568 FileTemplateValidationError 

569 Raised if the template is inconsistent with the supplied entity. 

570 

571 Notes 

572 ----- 

573 Validation will always include a check that mandatory fields 

574 are present and that at least one field refers to a dimension. 

575 If the supplied entity includes a `DimensionGraph` then it will be 

576 used to compare the available dimensions with those specified in the 

577 template. 

578 """ 

579 

580 # Check that the template has run 

581 withSpecials = self.fields(specials=True, optionals=True) 

582 if not withSpecials & self.mandatoryFields: 

583 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field" 

584 f" from {self.mandatoryFields}") 

585 

586 # Check that there are some dimension fields in the template 

587 allfields = self.fields(optionals=True) 

588 if not allfields: 

589 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields" 

590 " corresponding to dimensions.") 

591 

592 # If we do not have dimensions available then all we can do is shrug 

593 if not hasattr(entity, "dimensions"): 

594 return 

595 

596 # if this entity represents a component then insist that component 

597 # is present in the template. If the entity is not a component 

598 # make sure that component is not mandatory. 

599 try: 

600 # mypy does not see the except block so complains about 

601 # StorageClass not supporting isComponent 

602 if entity.isComponent(): # type: ignore 

603 if "component" not in withSpecials: 

604 raise FileTemplateValidationError(f"Template '{self}' has no component but " 

605 f"{entity} refers to a component.") 

606 else: 

607 mandatorySpecials = self.fields(specials=True) 

608 if "component" in mandatorySpecials: 

609 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but " 

610 f"{entity} does not refer to a component.") 

611 except AttributeError: 

612 pass 

613 

614 # Get the dimension links to get the full set of available field names 

615 # Fall back to dataId keys if we have them but no links. 

616 # dataId keys must still be present in the template 

617 # Ignore warnings from mypy concerning StorageClass and DatasetType 

618 # not supporting the full API. 

619 try: 

620 minimal = set(entity.dimensions.required.names) # type: ignore 

621 maximal = set(entity.dimensions.names) # type: ignore 

622 except AttributeError: 

623 try: 

624 minimal = set(entity.dataId.keys()) # type: ignore 

625 maximal = minimal 

626 except AttributeError: 

627 return 

628 

629 required = self.fields(optionals=False) 

630 

631 # Calculate any field usage that does not match a dimension 

632 if not required.issubset(maximal): 

633 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

634 f" {required} is not a subset of {maximal}.") 

635 

636 if not allfields.issuperset(minimal): 

637 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

638 f" {allfields} is not a superset of {minimal}.") 

639 

640 return