Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for file template string expansion.""" 

23 

24__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError") 

25 

26import os.path 

27import string 

28import logging 

29from types import MappingProxyType 

30 

31from .config import Config 

32from .configSupport import processLookupConfigs, LookupKey 

33from .exceptions import ValidationError 

34from .dimensions import SkyPixDimension, DataCoordinate 

35 

36log = logging.getLogger(__name__) 

37 

38 

39class FileTemplateValidationError(ValidationError): 

40 """Exception thrown when a file template is not consistent with the 

41 associated `DatasetType`.""" 

42 pass 

43 

44 

45class FileTemplatesConfig(Config): 

46 """Configuration information for `FileTemplates`""" 

47 pass 

48 

49 

50class FileTemplates: 

51 """Collection of `FileTemplate` templates. 

52 

53 Parameters 

54 ---------- 

55 config : `FileTemplatesConfig` or `str` 

56 Load configuration. 

57 default : `str`, optional 

58 If not `None`, a default template to use if no template has 

59 been specified explicitly in the configuration. 

60 universe : `DimensionUniverse` 

61 The set of all known dimensions, used to normalize any lookup keys 

62 involving dimensions. 

63 

64 Notes 

65 ----- 

66 The configuration can include one level of hierarchy where an 

67 instrument-specific section can be defined to override more general 

68 template specifications. This is represented in YAML using a 

69 key of form ``instrument<name>`` which can then define templates 

70 that will be returned if a `DatasetRef` contains a matching instrument 

71 name in the data ID. 

72 

73 A default fallback template can be specified using the key ``default``. 

74 Defaulting can be disabled in a child configuration by defining the 

75 value to be an empty string or a boolean `False`. 

76 

77 The config is parsed using the function 

78 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

79 """ 

80 

81 defaultKey = LookupKey("default") 

82 """Configuration key associated with the default template.""" 

83 

84 def __init__(self, config, default=None, *, universe): 

85 self.config = FileTemplatesConfig(config) 

86 self._templates = {} 

87 self.default = FileTemplate(default) if default is not None else None 

88 contents = processLookupConfigs(self.config, universe=universe) 

89 

90 # Convert all the values to FileTemplate, handling defaults 

91 for key, templateStr in contents.items(): 

92 if key == self.defaultKey: 

93 if not templateStr: 

94 self.default = None 

95 else: 

96 self.default = FileTemplate(templateStr) 

97 else: 

98 self._templates[key] = FileTemplate(templateStr) 

99 

100 @property 

101 def templates(self): 

102 """Collection of templates indexed by lookup key (`dict`).""" 

103 return MappingProxyType(self._templates) 

104 

105 def __contains__(self, key): 

106 """Indicates whether the supplied key is present in the templates. 

107 

108 Parameters 

109 ---------- 

110 key : `LookupKey` 

111 Key to use to determine if a corresponding value is present 

112 in the templates. 

113 

114 Returns 

115 ------- 

116 in : `bool` 

117 `True` if the supplied key is present in the templates. 

118 """ 

119 return key in self.templates 

120 

121 def __getitem__(self, key): 

122 return self.templates[key] 

123 

124 def validateTemplates(self, entities, logFailures=False): 

125 """Retrieve the template associated with each dataset type and 

126 validate the dimensions against the template. 

127 

128 Parameters 

129 ---------- 

130 entities : `DatasetType`, `DatasetRef`, or `StorageClass` 

131 Entities to validate against the matching templates. Can be 

132 differing types. 

133 logFailures : `bool`, optional 

134 If `True`, output a log message for every validation error 

135 detected. 

136 

137 Raises 

138 ------ 

139 FileTemplateValidationError 

140 Raised if an entity failed validation. 

141 

142 Notes 

143 ----- 

144 See `FileTemplate.validateTemplate()` for details on the validation. 

145 """ 

146 unmatchedKeys = set(self.templates) 

147 failed = [] 

148 for entity in entities: 

149 try: 

150 matchKey, template = self.getTemplateWithMatch(entity) 

151 except KeyError as e: 

152 # KeyError always quotes on stringification so strip here 

153 errMsg = str(e).strip('"\'') 

154 failed.append(errMsg) 

155 if logFailures: 

156 log.fatal("%s", errMsg) 

157 continue 

158 

159 if matchKey in unmatchedKeys: 

160 unmatchedKeys.remove(matchKey) 

161 

162 try: 

163 template.validateTemplate(entity) 

164 except FileTemplateValidationError as e: 

165 failed.append(f"{e} (via key '{matchKey}')") 

166 if logFailures: 

167 log.fatal("Template failure with key '%s': %s", matchKey, e) 

168 

169 if logFailures and unmatchedKeys: 

170 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys])) 

171 

172 if failed: 

173 if len(failed) == 1: 

174 msg = str(failed[0]) 

175 else: 

176 failMsg = ";\n".join(failed) 

177 msg = f"{len(failed)} template validation failures: {failMsg}" 

178 raise FileTemplateValidationError(msg) 

179 

180 def getLookupKeys(self): 

181 """Retrieve the look up keys for all the template entries. 

182 

183 Returns 

184 ------- 

185 keys : `set` of `LookupKey` 

186 The keys available for matching a template. 

187 """ 

188 return set(self.templates) 

189 

190 def getTemplateWithMatch(self, entity): 

191 """Retrieve the `FileTemplate` associated with the dataset type along 

192 with the lookup key that was a match for this template. 

193 

194 If the lookup name corresponds to a component the base name for 

195 the component will be examined if the full component name does 

196 not match. 

197 

198 Parameters 

199 ---------- 

200 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

201 Instance to use to look for a corresponding template. 

202 A `DatasetType` name or a `StorageClass` name will be used 

203 depending on the supplied entity. Priority is given to a 

204 `DatasetType` name. Supports instrument override if a 

205 `DatasetRef` is provided configured with an ``instrument`` 

206 value for the data ID. 

207 

208 Returns 

209 ------- 

210 matchKey : `LookupKey` 

211 The key that resulted in the successful match. 

212 template : `FileTemplate` 

213 Template instance to use with that dataset type. 

214 

215 Raises 

216 ------ 

217 KeyError 

218 Raised if no template could be located for this Dataset type. 

219 """ 

220 # Get the names to use for lookup 

221 names = entity._lookupNames() 

222 

223 # Get a location from the templates 

224 template = self.default 

225 source = self.defaultKey 

226 for name in names: 

227 if name in self.templates: 

228 template = self.templates[name] 

229 source = name 

230 break 

231 

232 if template is None: 

233 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]") 

234 

235 log.debug("Got file %s from %s via %s", template, entity, source) 

236 

237 return source, template 

238 

239 def getTemplate(self, entity): 

240 """Retrieve the `FileTemplate` associated with the dataset type. 

241 

242 If the lookup name corresponds to a component the base name for 

243 the component will be examined if the full component name does 

244 not match. 

245 

246 Parameters 

247 ---------- 

248 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

249 Instance to use to look for a corresponding template. 

250 A `DatasetType` name or a `StorageClass` name will be used 

251 depending on the supplied entity. Priority is given to a 

252 `DatasetType` name. Supports instrument override if a 

253 `DatasetRef` is provided configured with an ``instrument`` 

254 value for the data ID. 

255 

256 Returns 

257 ------- 

258 template : `FileTemplate` 

259 Template instance to use with that dataset type. 

260 

261 Raises 

262 ------ 

263 KeyError 

264 Raised if no template could be located for this Dataset type. 

265 """ 

266 _, template = self.getTemplateWithMatch(entity) 

267 return template 

268 

269 

270class FileTemplate: 

271 """Format a path template into a fully expanded path. 

272 

273 Parameters 

274 ---------- 

275 template : `str` 

276 Template string. 

277 

278 Raises 

279 ------ 

280 FileTemplateValidationError 

281 Raised if the template fails basic validation. 

282 

283 Notes 

284 ----- 

285 The templates use the standard Format Specification Mini-Language 

286 with the caveat that only named fields can be used. The field names 

287 are taken from the Dimensions along with several additional fields: 

288 

289 - datasetType: `str`, `DatasetType.name` 

290 - component: `str`, name of the StorageClass component 

291 - run: `str`, name of the run this dataset was added with 

292 

293 `run` must always be provided to ensure unique paths. 

294 

295 More detailed information can be requested from dimensions by using a dot 

296 notation, so ``visit.name`` would use the name of the visit and 

297 ``detector.name_in_raft`` would use the name of the detector within the 

298 raft. 

299 

300 The mini-language is extended to understand a "?" in the format 

301 specification. This indicates that a field is optional. If that 

302 Dimension is missing the field, along with the text before the field, 

303 unless it is a path separator, will be removed from the output path. 

304 

305 By default any "/" in a dataId value will be replaced by "_" to prevent 

306 unexpected directories being created in the path. If the "/" should be 

307 retained then a special "/" format specifier can be included in the 

308 template. 

309 """ 

310 

311 mandatoryFields = {"run"} 

312 """A set of fields, one of which must be present in a template.""" 

313 

314 datasetFields = {"datasetType", "component"} 

315 """Fields related to the supplied dataset, not a dimension.""" 

316 

317 specialFields = mandatoryFields | datasetFields 

318 """Set of special fields that are available independently of the defined 

319 Dimensions.""" 

320 

321 def __init__(self, template): 

322 if not isinstance(template, str): 

323 raise FileTemplateValidationError(f"Template ('{template}') does " 

324 "not contain any format specifiers") 

325 self.template = template 

326 

327 # Do basic validation without access to dimensions 

328 self.validateTemplate(None) 

329 

330 def __eq__(self, other): 

331 if not isinstance(other, FileTemplate): 

332 return False 

333 

334 return self.template == other.template 

335 

336 def __str__(self): 

337 return self.template 

338 

339 def __repr__(self): 

340 return f'{self.__class__.__name__}("{self.template}")' 

341 

342 def fields(self, optionals=False, specials=False, subfields=False): 

343 """Return the field names used in this template. 

344 

345 Parameters 

346 ---------- 

347 optionals : `bool` 

348 If `True`, optional fields are included in the returned set. 

349 specials : `bool` 

350 If `True`, non-dimension fields are included. 

351 subfields : `bool`, optional 

352 If `True`, fields with syntax ``a.b`` are included. If `False`, 

353 the default, only ``a`` would be returned. 

354 

355 Returns 

356 ------- 

357 names : `set` 

358 Names of fields used in this template 

359 

360 Notes 

361 ----- 

362 The returned set will include the special values such as `datasetType` 

363 and `component`. 

364 """ 

365 fmt = string.Formatter() 

366 parts = fmt.parse(self.template) 

367 

368 names = set() 

369 for literal, field_name, format_spec, conversion in parts: 

370 if field_name is not None: 

371 if "?" in format_spec and not optionals: 

372 continue 

373 

374 if not specials and field_name in self.specialFields: 

375 continue 

376 

377 if "." in field_name and not subfields: 

378 field_name, _ = field_name.split(".") 

379 

380 names.add(field_name) 

381 

382 return names 

383 

384 def format(self, ref): 

385 """Format a template string into a full path. 

386 

387 Parameters 

388 ---------- 

389 ref : `DatasetRef` 

390 The dataset to be formatted. 

391 

392 Returns 

393 ------- 

394 path : `str` 

395 Expanded path. 

396 

397 Raises 

398 ------ 

399 KeyError 

400 Raised if the requested field is not defined and the field is 

401 not optional. Or, `component` is specified but "component" was 

402 not part of the template. 

403 """ 

404 # Extract defined non-None dimensions from the dataId 

405 # We attempt to get the "full" dict on the assumption that ref.dataId 

406 # is a ExpandedDataCoordinate, as it should be when running 

407 # PipelineTasks. We should probably just require that when formatting 

408 # templates (and possibly when constructing DatasetRefs), but doing so 

409 # would break a ton of otherwise-useful tests that would need to be 

410 # modified to provide a lot more metadata. 

411 fields = {k: v for k, v in getattr(ref.dataId, "full", ref.dataId).items() if v is not None} 

412 

413 if isinstance(ref.dataId, DataCoordinate): 

414 # If there is exactly one SkyPixDimension in the data ID, alias its 

415 # value with the key "skypix", so we can use that to match any 

416 # skypix dimension. 

417 # We restrict this behavior to the (real-world) case where the 

418 # data ID is a DataCoordinate, not just a dict. That should only 

419 # not be true in some test code, but that test code is a pain to 

420 # update to be more like the real world while still providing our 

421 # only tests of important behavior. 

422 skypix = [dimension for dimension in ref.dataId.graph if isinstance(dimension, SkyPixDimension)] 

423 if len(skypix) == 1: 

424 fields["skypix"] = fields[skypix[0]] 

425 

426 # Extra information that can be included using . syntax 

427 extras = getattr(ref.dataId, "records", {}) 

428 

429 datasetType = ref.datasetType 

430 fields["datasetType"], component = datasetType.nameAndComponent() 

431 

432 usedComponent = False 

433 if component is not None: 

434 fields["component"] = component 

435 

436 usedRun = False 

437 fields["run"] = ref.run 

438 

439 fmt = string.Formatter() 

440 parts = fmt.parse(self.template) 

441 output = "" 

442 

443 for literal, field_name, format_spec, conversion in parts: 

444 

445 if field_name == "component": 

446 usedComponent = True 

447 

448 if format_spec is None: 

449 output = output + literal 

450 continue 

451 

452 if "?" in format_spec: 

453 optional = True 

454 # Remove the non-standard character from the spec 

455 format_spec = format_spec.replace("?", "") 

456 else: 

457 optional = False 

458 

459 if field_name == "run": 

460 usedRun = True 

461 

462 if field_name == "collection": 

463 raise KeyError("'collection' is no longer supported as a " 

464 "file template placeholder; use 'run' instead.") 

465 

466 # Check for request for additional information from the dataId 

467 if "." in field_name: 

468 primary, secondary = field_name.split(".") 

469 if primary in extras: 

470 record = extras[primary] 

471 # Only fill in the fields if we have a value, the 

472 # KeyError will trigger below if the attribute is missing. 

473 if hasattr(record, secondary): 

474 fields[field_name] = getattr(record, secondary) 

475 

476 if field_name in fields: 

477 value = fields[field_name] 

478 elif optional: 

479 # If this is optional ignore the format spec 

480 # and do not include the literal text prior to the optional 

481 # field unless it contains a "/" path separator 

482 format_spec = "" 

483 value = "" 

484 if "/" not in literal: 

485 literal = "" 

486 else: 

487 raise KeyError(f"'{field_name}' requested in template via '{self.template}' " 

488 "but not defined and not optional") 

489 

490 # Handle "/" in values since we do not want to be surprised by 

491 # unexpected directories turning up 

492 replace_slash = True 

493 if "/" in format_spec: 

494 # Remove the non-standard character from the spec 

495 format_spec = format_spec.replace("/", "") 

496 replace_slash = False 

497 

498 if isinstance(value, str): 

499 if replace_slash: 

500 value = value.replace("/", "_") 

501 

502 # Now use standard formatting 

503 output = output + literal + format(value, format_spec) 

504 

505 # Replace periods with underscores in the non-directory part to 

506 # prevent file extension confusion. 

507 head, tail = os.path.split(output) 

508 output = os.path.join(head, tail.replace(".", "_")) 

509 

510 # Complain if we were meant to use a component 

511 if component is not None and not usedComponent: 

512 raise KeyError("Component '{}' specified but template {} did not use it".format(component, 

513 self.template)) 

514 

515 # Complain if there's no run 

516 if not usedRun: 

517 raise KeyError("Template does not include 'run'.") 

518 

519 # Since this is known to be a path, normalize it in case some double 

520 # slashes have crept in 

521 path = os.path.normpath(output) 

522 

523 # It should not be an absolute path (may happen with optionals) 

524 if os.path.isabs(path): 

525 path = os.path.relpath(path, start="/") 

526 

527 return path 

528 

529 def validateTemplate(self, entity): 

530 """Compare the template against a representative entity that would 

531 like to use template. 

532 

533 Parameters 

534 ---------- 

535 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

536 Entity to compare against template. 

537 

538 Raises 

539 ------ 

540 FileTemplateValidationError 

541 Raised if the template is inconsistent with the supplied entity. 

542 

543 Notes 

544 ----- 

545 Validation will always include a check that mandatory fields 

546 are present and that at least one field refers to a dimension. 

547 If the supplied entity includes a `DimensionGraph` then it will be 

548 used to compare the available dimensions with those specified in the 

549 template. 

550 """ 

551 

552 # Check that the template has run 

553 withSpecials = self.fields(specials=True, optionals=True) 

554 if not withSpecials & self.mandatoryFields: 

555 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field" 

556 f" from {self.mandatoryFields}") 

557 

558 # Check that there are some dimension fields in the template 

559 allfields = self.fields(optionals=True) 

560 if not allfields: 

561 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields" 

562 " corresponding to dimensions.") 

563 

564 # If we do not have dimensions available then all we can do is shrug 

565 if not hasattr(entity, "dimensions"): 

566 return 

567 

568 # if this entity represents a component then insist that component 

569 # is present in the template. If the entity is not a component 

570 # make sure that component is not mandatory. 

571 try: 

572 if entity.isComponent(): 

573 if "component" not in withSpecials: 

574 raise FileTemplateValidationError(f"Template '{self}' has no component but " 

575 f"{entity} refers to a component.") 

576 else: 

577 mandatorySpecials = self.fields(specials=True) 

578 if "component" in mandatorySpecials: 

579 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but " 

580 f"{entity} does not refer to a component.") 

581 except AttributeError: 

582 pass 

583 

584 # Get the dimension links to get the full set of available field names 

585 # Fall back to dataId keys if we have them but no links. 

586 # dataId keys must still be present in the template 

587 try: 

588 minimal = set(entity.dimensions.required.names) 

589 maximal = set(entity.dimensions.names) 

590 except AttributeError: 

591 try: 

592 minimal = set(entity.dataId.keys()) 

593 maximal = minimal 

594 except AttributeError: 

595 return 

596 

597 required = self.fields(optionals=False) 

598 

599 # Calculate any field usage that does not match a dimension 

600 if not required.issubset(maximal): 

601 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

602 f" {required} is not a subset of {maximal}.") 

603 

604 if not allfields.issuperset(minimal): 

605 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

606 f" {allfields} is not a superset of {minimal}.") 

607 

608 return