Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for file template string expansion.""" 

23 

24__all__ = ("FileTemplates", "FileTemplate", "FileTemplatesConfig", "FileTemplateValidationError") 

25 

26import os.path 

27import string 

28import logging 

29from types import MappingProxyType 

30 

31from .config import Config 

32from .configSupport import processLookupConfigs, LookupKey 

33from .exceptions import ValidationError 

34from .dimensions import SkyPixDimension, DataCoordinate 

35 

36log = logging.getLogger(__name__) 

37 

38 

39class FileTemplateValidationError(ValidationError): 

40 """Exception thrown when a file template is not consistent with the 

41 associated `DatasetType`.""" 

42 pass 

43 

44 

45class FileTemplatesConfig(Config): 

46 """Configuration information for `FileTemplates`""" 

47 pass 

48 

49 

50class FileTemplates: 

51 """Collection of `FileTemplate` templates. 

52 

53 Parameters 

54 ---------- 

55 config : `FileTemplatesConfig` or `str` 

56 Load configuration. 

57 default : `str`, optional 

58 If not `None`, a default template to use if no template has 

59 been specified explicitly in the configuration. 

60 universe : `DimensionUniverse` 

61 The set of all known dimensions, used to normalize any lookup keys 

62 involving dimensions. 

63 

64 Notes 

65 ----- 

66 The configuration can include one level of hierarchy where an 

67 instrument-specific section can be defined to override more general 

68 template specifications. This is represented in YAML using a 

69 key of form ``instrument<name>`` which can then define templates 

70 that will be returned if a `DatasetRef` contains a matching instrument 

71 name in the data ID. 

72 

73 A default fallback template can be specified using the key ``default``. 

74 Defaulting can be disabled in a child configuration by defining the 

75 value to be an empty string or a boolean `False`. 

76 

77 The config is parsed using the function 

78 `~lsst.daf.butler.configSubset.processLookupConfigs`. 

79 """ 

80 

81 defaultKey = LookupKey("default") 

82 """Configuration key associated with the default template.""" 

83 

84 def __init__(self, config, default=None, *, universe): 

85 self.config = FileTemplatesConfig(config) 

86 self._templates = {} 

87 self.default = FileTemplate(default) if default is not None else None 

88 contents = processLookupConfigs(self.config, universe=universe) 

89 

90 # Convert all the values to FileTemplate, handling defaults 

91 for key, templateStr in contents.items(): 

92 if key == self.defaultKey: 

93 if not templateStr: 

94 self.default = None 

95 else: 

96 self.default = FileTemplate(templateStr) 

97 else: 

98 self._templates[key] = FileTemplate(templateStr) 

99 

100 @property 

101 def templates(self): 

102 """Collection of templates indexed by lookup key (`dict`).""" 

103 return MappingProxyType(self._templates) 

104 

105 def __contains__(self, key): 

106 """Indicates whether the supplied key is present in the templates. 

107 

108 Parameters 

109 ---------- 

110 key : `LookupKey` 

111 Key to use to determine if a corresponding value is present 

112 in the templates. 

113 

114 Returns 

115 ------- 

116 in : `bool` 

117 `True` if the supplied key is present in the templates. 

118 """ 

119 return key in self.templates 

120 

121 def __getitem__(self, key): 

122 return self.templates[key] 

123 

124 def validateTemplates(self, entities, logFailures=False): 

125 """Retrieve the template associated with each dataset type and 

126 validate the dimensions against the template. 

127 

128 Parameters 

129 ---------- 

130 entities : `DatasetType`, `DatasetRef`, or `StorageClass` 

131 Entities to validate against the matching templates. Can be 

132 differing types. 

133 logFailures : `bool`, optional 

134 If `True`, output a log message for every validation error 

135 detected. 

136 

137 Raises 

138 ------ 

139 FileTemplateValidationError 

140 Raised if an entity failed validation. 

141 

142 Notes 

143 ----- 

144 See `FileTemplate.validateTemplate()` for details on the validation. 

145 """ 

146 unmatchedKeys = set(self.templates) 

147 failed = [] 

148 for entity in entities: 

149 try: 

150 matchKey, template = self.getTemplateWithMatch(entity) 

151 except KeyError as e: 

152 # KeyError always quotes on stringification so strip here 

153 errMsg = str(e).strip('"\'') 

154 failed.append(errMsg) 

155 if logFailures: 

156 log.fatal("%s", errMsg) 

157 continue 

158 

159 if matchKey in unmatchedKeys: 

160 unmatchedKeys.remove(matchKey) 

161 

162 try: 

163 template.validateTemplate(entity) 

164 except FileTemplateValidationError as e: 

165 failed.append(f"{e} (via key '{matchKey}')") 

166 if logFailures: 

167 log.fatal("Template failure with key '%s': %s", matchKey, e) 

168 

169 if logFailures and unmatchedKeys: 

170 log.warning("Unchecked keys: %s", ", ".join([str(k) for k in unmatchedKeys])) 

171 

172 if failed: 

173 if len(failed) == 1: 

174 msg = str(failed[0]) 

175 else: 

176 failMsg = ";\n".join(failed) 

177 msg = f"{len(failed)} template validation failures: {failMsg}" 

178 raise FileTemplateValidationError(msg) 

179 

180 def getLookupKeys(self): 

181 """Retrieve the look up keys for all the template entries. 

182 

183 Returns 

184 ------- 

185 keys : `set` of `LookupKey` 

186 The keys available for matching a template. 

187 """ 

188 return set(self.templates) 

189 

190 def getTemplateWithMatch(self, entity): 

191 """Retrieve the `FileTemplate` associated with the dataset type along 

192 with the lookup key that was a match for this template. 

193 

194 If the lookup name corresponds to a component the base name for 

195 the component will be examined if the full component name does 

196 not match. 

197 

198 Parameters 

199 ---------- 

200 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

201 Instance to use to look for a corresponding template. 

202 A `DatasetType` name or a `StorageClass` name will be used 

203 depending on the supplied entity. Priority is given to a 

204 `DatasetType` name. Supports instrument override if a 

205 `DatasetRef` is provided configured with an ``instrument`` 

206 value for the data ID. 

207 

208 Returns 

209 ------- 

210 matchKey : `LookupKey` 

211 The key that resulted in the successful match. 

212 template : `FileTemplate` 

213 Template instance to use with that dataset type. 

214 

215 Raises 

216 ------ 

217 KeyError 

218 Raised if no template could be located for this Dataset type. 

219 """ 

220 # Get the names to use for lookup 

221 names = entity._lookupNames() 

222 

223 # Get a location from the templates 

224 template = self.default 

225 source = self.defaultKey 

226 for name in names: 

227 if name in self.templates: 

228 template = self.templates[name] 

229 source = name 

230 break 

231 

232 if template is None: 

233 raise KeyError(f"Unable to determine file template from supplied argument [{entity}]") 

234 

235 log.debug("Got file %s from %s via %s", template, entity, source) 

236 

237 return source, template 

238 

239 def getTemplate(self, entity): 

240 """Retrieve the `FileTemplate` associated with the dataset type. 

241 

242 If the lookup name corresponds to a component the base name for 

243 the component will be examined if the full component name does 

244 not match. 

245 

246 Parameters 

247 ---------- 

248 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

249 Instance to use to look for a corresponding template. 

250 A `DatasetType` name or a `StorageClass` name will be used 

251 depending on the supplied entity. Priority is given to a 

252 `DatasetType` name. Supports instrument override if a 

253 `DatasetRef` is provided configured with an ``instrument`` 

254 value for the data ID. 

255 

256 Returns 

257 ------- 

258 template : `FileTemplate` 

259 Template instance to use with that dataset type. 

260 

261 Raises 

262 ------ 

263 KeyError 

264 Raised if no template could be located for this Dataset type. 

265 """ 

266 _, template = self.getTemplateWithMatch(entity) 

267 return template 

268 

269 

270class FileTemplate: 

271 """Format a path template into a fully expanded path. 

272 

273 Parameters 

274 ---------- 

275 template : `str` 

276 Template string. 

277 

278 Raises 

279 ------ 

280 FileTemplateValidationError 

281 Raised if the template fails basic validation. 

282 

283 Notes 

284 ----- 

285 The templates use the standard Format Specification Mini-Language 

286 with the caveat that only named fields can be used. The field names 

287 are taken from the Dimensions along with several additional fields: 

288 

289 - datasetType: `str`, `DatasetType.name` 

290 - component: `str`, name of the StorageClass component 

291 - run: `str`, name of the run this dataset was added with 

292 - collection: synonoym for ``run`` 

293 

294 At least one of `run` or `collection` must be provided to ensure unique 

295 filenames. 

296 

297 More detailed information can be requested from dimensions by using a dot 

298 notation, so ``visit.name`` would use the name of the visit and 

299 ``detector.name_in_raft`` would use the name of the detector within the 

300 raft. 

301 

302 The mini-language is extended to understand a "?" in the format 

303 specification. This indicates that a field is optional. If that 

304 Dimension is missing the field, along with the text before the field, 

305 unless it is a path separator, will be removed from the output path. 

306 

307 By default any "/" in a dataId value will be replaced by "_" to prevent 

308 unexpected directories being created in the path. If the "/" should be 

309 retained then a special "/" format specifier can be included in the 

310 template. 

311 """ 

312 

313 mandatoryFields = {"collection", "run"} 

314 """A set of fields, one of which must be present in a template.""" 

315 

316 datasetFields = {"datasetType", "component"} 

317 """Fields related to the supplied dataset, not a dimension.""" 

318 

319 specialFields = mandatoryFields | datasetFields 

320 """Set of special fields that are available independently of the defined 

321 Dimensions.""" 

322 

323 def __init__(self, template): 

324 if not isinstance(template, str): 

325 raise FileTemplateValidationError(f"Template ('{template}') does " 

326 "not contain any format specifiers") 

327 self.template = template 

328 

329 # Do basic validation without access to dimensions 

330 self.validateTemplate(None) 

331 

332 def __eq__(self, other): 

333 if not isinstance(other, FileTemplate): 

334 return False 

335 

336 return self.template == other.template 

337 

338 def __str__(self): 

339 return self.template 

340 

341 def __repr__(self): 

342 return f'{self.__class__.__name__}("{self.template}")' 

343 

344 def fields(self, optionals=False, specials=False, subfields=False): 

345 """Return the field names used in this template. 

346 

347 Parameters 

348 ---------- 

349 optionals : `bool` 

350 If `True`, optional fields are included in the returned set. 

351 specials : `bool` 

352 If `True`, non-dimension fields are included. 

353 subfields : `bool`, optional 

354 If `True`, fields with syntax ``a.b`` are included. If `False`, 

355 the default, only ``a`` would be returned. 

356 

357 Returns 

358 ------- 

359 names : `set` 

360 Names of fields used in this template 

361 

362 Notes 

363 ----- 

364 The returned set will include the special values such as `datasetType` 

365 and `component`. 

366 """ 

367 fmt = string.Formatter() 

368 parts = fmt.parse(self.template) 

369 

370 names = set() 

371 for literal, field_name, format_spec, conversion in parts: 

372 if field_name is not None: 

373 if "?" in format_spec and not optionals: 

374 continue 

375 

376 if not specials and field_name in self.specialFields: 

377 continue 

378 

379 if "." in field_name and not subfields: 

380 field_name, _ = field_name.split(".") 

381 

382 names.add(field_name) 

383 

384 return names 

385 

386 def format(self, ref): 

387 """Format a template string into a full path. 

388 

389 Parameters 

390 ---------- 

391 ref : `DatasetRef` 

392 The dataset to be formatted. 

393 

394 Returns 

395 ------- 

396 path : `str` 

397 Expanded path. 

398 

399 Raises 

400 ------ 

401 KeyError 

402 Raised if the requested field is not defined and the field is 

403 not optional. Or, `component` is specified but "component" was 

404 not part of the template. 

405 """ 

406 # Extract defined non-None dimensions from the dataId 

407 # We attempt to get the "full" dict on the assumption that ref.dataId 

408 # is a ExpandedDataCoordinate, as it should be when running 

409 # PipelineTasks. We should probably just require that when formatting 

410 # templates (and possibly when constructing DatasetRefs), but doing so 

411 # would break a ton of otherwise-useful tests that would need to be 

412 # modified to provide a lot more metadata. 

413 fields = {k: v for k, v in getattr(ref.dataId, "full", ref.dataId).items() if v is not None} 

414 

415 if isinstance(ref.dataId, DataCoordinate): 

416 # If there is exactly one SkyPixDimension in the data ID, alias its 

417 # value with the key "skypix", so we can use that to match any 

418 # skypix dimension. 

419 # We restrict this behavior to the (real-world) case where the 

420 # data ID is a DataCoordinate, not just a dict. That should only 

421 # not be true in some test code, but that test code is a pain to 

422 # update to be more like the real world while still providing our 

423 # only tests of important behavior. 

424 skypix = [dimension for dimension in ref.dataId.graph if isinstance(dimension, SkyPixDimension)] 

425 if len(skypix) == 1: 

426 fields["skypix"] = fields[skypix[0]] 

427 

428 # Extra information that can be included using . syntax 

429 extras = getattr(ref.dataId, "records", {}) 

430 

431 datasetType = ref.datasetType 

432 fields["datasetType"], component = datasetType.nameAndComponent() 

433 

434 usedComponent = False 

435 if component is not None: 

436 fields["component"] = component 

437 

438 usedRunOrCollection = False 

439 fields["collection"] = ref.run 

440 fields["run"] = ref.run 

441 

442 fmt = string.Formatter() 

443 parts = fmt.parse(self.template) 

444 output = "" 

445 

446 for literal, field_name, format_spec, conversion in parts: 

447 

448 if field_name == "component": 

449 usedComponent = True 

450 

451 if format_spec is None: 

452 output = output + literal 

453 continue 

454 

455 if "?" in format_spec: 

456 optional = True 

457 # Remove the non-standard character from the spec 

458 format_spec = format_spec.replace("?", "") 

459 else: 

460 optional = False 

461 

462 if field_name in ("run", "collection"): 

463 usedRunOrCollection = True 

464 

465 # Check for request for additional information from the dataId 

466 if "." in field_name: 

467 primary, secondary = field_name.split(".") 

468 if primary in extras: 

469 record = extras[primary] 

470 # Only fill in the fields if we have a value, the 

471 # KeyError will trigger below if the attribute is missing. 

472 if hasattr(record, secondary): 

473 fields[field_name] = getattr(record, secondary) 

474 

475 if field_name in fields: 

476 value = fields[field_name] 

477 elif optional: 

478 # If this is optional ignore the format spec 

479 # and do not include the literal text prior to the optional 

480 # field unless it contains a "/" path separator 

481 format_spec = "" 

482 value = "" 

483 if "/" not in literal: 

484 literal = "" 

485 else: 

486 raise KeyError(f"'{field_name}' requested in template via '{self.template}' " 

487 "but not defined and not optional") 

488 

489 # Handle "/" in values since we do not want to be surprised by 

490 # unexpected directories turning up 

491 replace_slash = True 

492 if "/" in format_spec: 

493 # Remove the non-standard character from the spec 

494 format_spec = format_spec.replace("/", "") 

495 replace_slash = False 

496 

497 if isinstance(value, str): 

498 if replace_slash: 

499 value = value.replace("/", "_") 

500 

501 # Now use standard formatting 

502 output = output + literal + format(value, format_spec) 

503 

504 # Replace periods with underscores in the non-directory part to 

505 # prevent file extension confusion. 

506 head, tail = os.path.split(output) 

507 output = os.path.join(head, tail.replace(".", "_")) 

508 

509 # Complain if we were meant to use a component 

510 if component is not None and not usedComponent: 

511 raise KeyError("Component '{}' specified but template {} did not use it".format(component, 

512 self.template)) 

513 

514 # Complain if there's no run or collection 

515 if not usedRunOrCollection: 

516 raise KeyError("Template does not include 'run' or 'collection'.") 

517 

518 # Since this is known to be a path, normalize it in case some double 

519 # slashes have crept in 

520 path = os.path.normpath(output) 

521 

522 # It should not be an absolute path (may happen with optionals) 

523 if os.path.isabs(path): 

524 path = os.path.relpath(path, start="/") 

525 

526 return path 

527 

528 def validateTemplate(self, entity): 

529 """Compare the template against a representative entity that would 

530 like to use template. 

531 

532 Parameters 

533 ---------- 

534 entity : `DatasetType`, `DatasetRef`, or `StorageClass` 

535 Entity to compare against template. 

536 

537 Raises 

538 ------ 

539 FileTemplateValidationError 

540 Raised if the template is inconsistent with the supplied entity. 

541 

542 Notes 

543 ----- 

544 Validation will always include a check that mandatory fields 

545 are present and that at least one field refers to a dimension. 

546 If the supplied entity includes a `DimensionGraph` then it will be 

547 used to compare the available dimensions with those specified in the 

548 template. 

549 """ 

550 

551 # Check that the template has run or collection 

552 withSpecials = self.fields(specials=True, optionals=True) 

553 if not withSpecials & self.mandatoryFields: 

554 raise FileTemplateValidationError(f"Template '{self}' is missing a mandatory field" 

555 f" from {self.mandatoryFields}") 

556 

557 # Check that there are some dimension fields in the template 

558 allfields = self.fields(optionals=True) 

559 if not allfields: 

560 raise FileTemplateValidationError(f"Template '{self}' does not seem to have any fields" 

561 " corresponding to dimensions.") 

562 

563 # If we do not have dimensions available then all we can do is shrug 

564 if not hasattr(entity, "dimensions"): 

565 return 

566 

567 # if this entity represents a component then insist that component 

568 # is present in the template. If the entity is not a component 

569 # make sure that component is not mandatory. 

570 try: 

571 if entity.isComponent(): 

572 if "component" not in withSpecials: 

573 raise FileTemplateValidationError(f"Template '{self}' has no component but " 

574 f"{entity} refers to a component.") 

575 else: 

576 mandatorySpecials = self.fields(specials=True) 

577 if "component" in mandatorySpecials: 

578 raise FileTemplateValidationError(f"Template '{self}' has mandatory component but " 

579 f"{entity} does not refer to a component.") 

580 except AttributeError: 

581 pass 

582 

583 # Get the dimension links to get the full set of available field names 

584 # Fall back to dataId keys if we have them but no links. 

585 # dataId keys must still be present in the template 

586 try: 

587 minimal = set(entity.dimensions.required.names) 

588 maximal = set(entity.dimensions.names) 

589 except AttributeError: 

590 try: 

591 minimal = set(entity.dataId.keys()) 

592 maximal = minimal 

593 except AttributeError: 

594 return 

595 

596 required = self.fields(optionals=False) 

597 

598 # Calculate any field usage that does not match a dimension 

599 if not required.issubset(maximal): 

600 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

601 f" {required} is not a subset of {maximal}.") 

602 

603 if not allfields.issuperset(minimal): 

604 raise FileTemplateValidationError(f"Template '{self}' is inconsistent with {entity}:" 

605 f" {allfields} is not a superset of {minimal}.") 

606 

607 return