Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Configuration control.""" 

25 

26__all__ = ("Config", "ConfigSubset") 

27 

28import collections 

29import copy 

30import logging 

31import pprint 

32import os 

33import yaml 

34import sys 

35from yaml.representer import Representer 

36import io 

37from typing import Sequence, Optional, ClassVar 

38 

39try: 

40 import boto3 

41except ImportError: 

42 boto3 = None 

43 

44import lsst.utils 

45from lsst.utils import doImport 

46from .location import ButlerURI 

47from .s3utils import getS3Client 

48 

49yaml.add_representer(collections.defaultdict, Representer.represent_dict) 

50 

51 

52# Config module logger 

53log = logging.getLogger(__name__) 

54 

55# PATH-like environment variable to use for defaults. 

56CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH" 

57 

58 

59class Loader(yaml.CSafeLoader): 

60 """YAML Loader that supports file include directives 

61 

62 Uses ``!include`` directive in a YAML file to point to another 

63 YAML file to be included. The path in the include directive is relative 

64 to the file containing that directive. 

65 

66 storageClasses: !include storageClasses.yaml 

67 

68 Examples 

69 -------- 

70 >>> with open("document.yaml", "r") as f: 

71 data = yaml.load(f, Loader=Loader) 

72 

73 Notes 

74 ----- 

75 See https://davidchall.github.io/yaml-includes.html 

76 """ 

77 

78 def __init__(self, stream): 

79 super().__init__(stream) 

80 # if this is a string and not a stream we may well lack a name 

81 try: 

82 self._root = ButlerURI(stream.name) 

83 except AttributeError: 

84 # No choice but to assume a local filesystem 

85 self._root = ButlerURI("no-file.yaml") 

86 Loader.add_constructor("!include", Loader.include) 

87 

88 def include(self, node): 

89 if isinstance(node, yaml.ScalarNode): 

90 return self.extractFile(self.construct_scalar(node)) 

91 

92 elif isinstance(node, yaml.SequenceNode): 

93 result = [] 

94 for filename in self.construct_sequence(node): 

95 result.append(self.extractFile(filename)) 

96 return result 

97 

98 elif isinstance(node, yaml.MappingNode): 

99 result = {} 

100 for k, v in self.construct_mapping(node).items(): 

101 result[k] = self.extractFile(v) 

102 return result 

103 

104 else: 

105 print("Error:: unrecognised node type in !include statement", file=sys.stderr) 

106 raise yaml.constructor.ConstructorError 

107 

108 def extractFile(self, filename): 

109 # It is possible for the !include to point to an explicit URI 

110 # instead of a relative URI, therefore we first see if it is 

111 # scheme-less or not. If it has a scheme we use it directly 

112 # if it is scheme-less we use it relative to the file root. 

113 requesteduri = ButlerURI(filename, forceAbsolute=False) 

114 

115 if requesteduri.scheme: 

116 fileuri = requesteduri 

117 else: 

118 fileuri = copy.copy(self._root) 

119 fileuri.updateFile(filename) 

120 

121 log.debug("Opening YAML file via !include: %s", fileuri) 

122 

123 if not fileuri.scheme or fileuri.scheme == "file": 

124 with open(fileuri.ospath, "r") as f: 

125 return yaml.load(f, Loader) 

126 elif fileuri.scheme == "s3": 

127 if boto3 is None: 

128 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

129 s3 = getS3Client() 

130 try: 

131 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot) 

132 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err: 

133 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err 

134 

135 # boto3 response is a `StreamingBody`, but not a valid Python 

136 # IOStream. Loader will raise an error that the stream has no name. 

137 # The name is used to resolve the "!include" filename location to 

138 # download. A hackish solution is to name it explicitly. 

139 response["Body"].name = fileuri.geturl() 

140 return yaml.load(response["Body"], Loader) 

141 

142 

143class Config(collections.abc.MutableMapping): 

144 r"""Implements a datatype that is used by `Butler` for configuration 

145 parameters. 

146 

147 It is essentially a `dict` with key/value pairs, including nested dicts 

148 (as values). In fact, it can be initialized with a `dict`. 

149 This is explained next: 

150 

151 Config extends the `dict` api so that hierarchical values may be accessed 

152 with delimited notation or as a tuple. If a string is given the delimiter 

153 is picked up from the first character in that string. For example, 

154 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``, 

155 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome. 

156 If the first character is alphanumeric, no delimiter will be used. 

157 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``. 

158 Unicode characters can be used as the delimiter for distinctiveness if 

159 required. 

160 

161 If a key in the hierarchy starts with a non-alphanumeric character care 

162 should be used to ensure that either the tuple interface is used or 

163 a distinct delimiter is always given in string form. 

164 

165 Finally, the delimiter can be escaped if it is part of a key and also 

166 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in 

167 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is 

168 always better to use a different delimiter in these cases. 

169 

170 Note that adding a multi-level key implicitly creates any nesting levels 

171 that do not exist, but removing multi-level keys does not automatically 

172 remove empty nesting levels. As a result: 

173 

174 >>> c = Config() 

175 >>> c[".a.b"] = 1 

176 >>> del c[".a.b"] 

177 >>> c["a"] 

178 Config({'a': {}}) 

179 

180 Storage formats supported: 

181 

182 - yaml: read and write is supported. 

183 

184 

185 Parameters 

186 ---------- 

187 other : `str` or `Config` or `dict` 

188 Other source of configuration, can be: 

189 

190 - (`str`) Treated as a path to a config file on disk. Must end with 

191 ".yaml". 

192 - (`Config`) Copies the other Config's values into this one. 

193 - (`dict`) Copies the values from the dict into this Config. 

194 

195 If `None` is provided an empty `Config` will be created. 

196 """ 

197 

198 _D: ClassVar[str] = "→" 

199 """Default internal delimiter to use for components in the hierarchy when 

200 constructing keys for external use (see `Config.names()`).""" 

201 

202 includeKey: ClassVar[str] = "includeConfigs" 

203 """Key used to indicate that another config should be included at this 

204 part of the hierarchy.""" 

205 

206 def __init__(self, other=None): 

207 self._data = {} 

208 self.configFile = None 

209 

210 if other is None: 

211 return 

212 

213 if isinstance(other, Config): 

214 self._data = copy.deepcopy(other._data) 

215 self.configFile = other.configFile 

216 elif isinstance(other, collections.abc.Mapping): 

217 self.update(other) 

218 elif isinstance(other, str): 

219 # if other is a string, assume it is a file path. 

220 self.__initFromFile(other) 

221 self._processExplicitIncludes() 

222 else: 

223 # if the config specified by other could not be recognized raise 

224 # a runtime error. 

225 raise RuntimeError("A Config could not be loaded from other:%s" % other) 

226 

227 def ppprint(self): 

228 """helper function for debugging, prints a config out in a readable 

229 way in the debugger. 

230 

231 use: pdb> print(myConfigObject.ppprint()) 

232 

233 Returns 

234 ------- 

235 s : `str` 

236 A prettyprint formatted string representing the config 

237 """ 

238 return pprint.pformat(self._data, indent=2, width=1) 

239 

240 def __repr__(self): 

241 return f"{type(self).__name__}({self._data!r})" 

242 

243 def __str__(self): 

244 return self.ppprint() 

245 

246 def __len__(self): 

247 return len(self._data) 

248 

249 def __iter__(self): 

250 return iter(self._data) 

251 

252 def copy(self): 

253 return type(self)(self) 

254 

255 @classmethod 

256 def fromYaml(cls, string: str) -> Config: 

257 """Create a new Config instance from a YAML string. 

258 

259 Parameters 

260 ---------- 

261 string : `str` 

262 String containing content in YAML format 

263 

264 Returns 

265 ------- 

266 c : `Config` 

267 Newly-constructed Config. 

268 """ 

269 return cls().__initFromYaml(string) 

270 

271 def __initFromFile(self, path): 

272 """Load a file from a path or an URI. 

273 

274 Parameters 

275 ---------- 

276 path : `str` 

277 Path or an URI to a persisted config file. 

278 """ 

279 uri = ButlerURI(path) 

280 if uri.path.endswith("yaml"): 

281 if uri.scheme == "s3": 

282 self.__initFromS3YamlFile(uri.geturl()) 

283 else: 

284 self.__initFromYamlFile(uri.ospath) 

285 else: 

286 raise RuntimeError("Unhandled config file type:%s" % uri) 

287 self.configFile = str(path) 

288 

289 def __initFromS3YamlFile(self, url): 

290 """Load a file at a given S3 Bucket uri and attempts to load it from 

291 yaml. 

292 

293 Parameters 

294 ---------- 

295 path : `str` 

296 To a persisted config file. 

297 """ 

298 if boto3 is None: 

299 raise ModuleNotFoundError("boto3 not found." 

300 "Are you sure it is installed?") 

301 

302 uri = ButlerURI(url) 

303 s3 = getS3Client() 

304 try: 

305 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot) 

306 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err: 

307 raise FileNotFoundError(f"No such file or directory: {uri}") from err 

308 

309 # boto3 response is a `StreamingBody`, but not a valid Python IOStream. 

310 # Loader will raise an error that the stream has no name. A hackish 

311 # solution is to name it explicitly. 

312 response["Body"].name = url 

313 self.__initFromYaml(response["Body"]) 

314 response["Body"].close() 

315 

316 def __initFromYamlFile(self, path): 

317 """Opens a file at a given path and attempts to load it in from yaml. 

318 

319 Parameters 

320 ---------- 

321 path : `str` 

322 To a persisted config file in YAML format. 

323 """ 

324 log.debug("Opening YAML config file: %s", path) 

325 with open(path, "r") as f: 

326 self.__initFromYaml(f) 

327 

328 def __initFromYaml(self, stream): 

329 """Loads a YAML config from any readable stream that contains one. 

330 

331 Parameters 

332 ---------- 

333 stream: `IO` or `str` 

334 Stream to pass to the YAML loader. Accepts anything that 

335 `yaml.load` accepts. This can include a string as well as an 

336 IO stream. 

337 

338 Raises 

339 ------ 

340 yaml.YAMLError 

341 If there is an error loading the file. 

342 """ 

343 content = yaml.load(stream, Loader=Loader) 

344 if content is None: 

345 content = {} 

346 self._data = content 

347 return self 

348 

349 def _processExplicitIncludes(self): 

350 """Scan through the configuration searching for the special 

351 includeConfigs directive and process the includes.""" 

352 

353 # Search paths for config files 

354 searchPaths = [os.path.curdir] 

355 if self.configFile is not None: 

356 searchPaths.append(os.path.abspath(os.path.dirname(self.configFile))) 

357 

358 # Ensure we know what delimiter to use 

359 names = self.nameTuples() 

360 for path in names: 

361 if path[-1] == self.includeKey: 

362 

363 log.debug("Processing file include directive at %s", self._D + self._D.join(path)) 

364 basePath = path[:-1] 

365 

366 # Extract the includes and then delete them from the config 

367 includes = self[path] 

368 del self[path] 

369 

370 # Be consistent and convert to a list 

371 if not isinstance(includes, list): 

372 includes = [includes] 

373 

374 # Read each file assuming it is a reference to a file 

375 # The file can be relative to config file or cwd 

376 # ConfigSubset search paths are not used 

377 # At some point these might be URIs which we will have to 

378 # assume resolve explicitly 

379 subConfigs = [] 

380 for fileName in includes: 

381 # Expand any shell variables 

382 fileName = os.path.expandvars(fileName) 

383 found = None 

384 if os.path.isabs(fileName): 

385 found = fileName 

386 else: 

387 for dir in searchPaths: 

388 filePath = os.path.join(dir, fileName) 

389 if os.path.exists(filePath): 

390 found = os.path.normpath(os.path.abspath(filePath)) 

391 break 

392 if not found: 

393 raise RuntimeError(f"Unable to find referenced include file: {fileName}") 

394 

395 # Read the referenced Config as a Config 

396 subConfigs.append(type(self)(found)) 

397 

398 # Now we need to merge these sub configs with the current 

399 # information that was present in this node in the config 

400 # tree with precedence given to the explicit values 

401 newConfig = subConfigs.pop(0) 

402 for sc in subConfigs: 

403 newConfig.update(sc) 

404 

405 # Explicit values take precedence 

406 if not basePath: 

407 # This is an include at the root config 

408 newConfig.update(self) 

409 # Replace the current config 

410 self._data = newConfig._data 

411 else: 

412 newConfig.update(self[basePath]) 

413 # And reattach to the base config 

414 self[basePath] = newConfig 

415 

416 @staticmethod 

417 def _splitIntoKeys(key): 

418 r"""Split the argument for get/set/in into a hierarchical list. 

419 

420 Parameters 

421 ---------- 

422 key : `str` or iterable 

423 Argument given to get/set/in. If an iterable is provided it will 

424 be converted to a list. If the first character of the string 

425 is not an alphanumeric character then it will be used as the 

426 delimiter for the purposes of splitting the remainder of the 

427 string. If the delimiter is also in one of the keys then it 

428 can be escaped using ``\``. There is no default delimiter. 

429 

430 Returns 

431 ------- 

432 keys : `list` 

433 Hierarchical keys as a `list`. 

434 """ 

435 if isinstance(key, str): 

436 if not key[0].isalnum(): 

437 d = key[0] 

438 key = key[1:] 

439 else: 

440 return [key, ] 

441 escaped = f"\\{d}" 

442 temp = None 

443 if escaped in key: 

444 # Complain at the attempt to escape the escape 

445 doubled = fr"\{escaped}" 

446 if doubled in key: 

447 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})" 

448 " is not yet supported.") 

449 # Replace with a character that won't be in the string 

450 temp = "\r" 

451 if temp in key or d == temp: 

452 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as" 

453 " delimiter if escaping the delimiter") 

454 key = key.replace(escaped, temp) 

455 hierarchy = key.split(d) 

456 if temp: 

457 hierarchy = [h.replace(temp, d) for h in hierarchy] 

458 return hierarchy 

459 elif isinstance(key, collections.abc.Iterable): 

460 return list(key) 

461 else: 

462 # Not sure what this is so try it anyway 

463 return [key, ] 

464 

465 def _getKeyHierarchy(self, name): 

466 """Retrieve the key hierarchy for accessing the Config 

467 

468 Parameters 

469 ---------- 

470 name : `str` or `tuple` 

471 Delimited string or `tuple` of hierarchical keys. 

472 

473 Returns 

474 ------- 

475 hierarchy : `list` of `str` 

476 Hierarchy to use as a `list`. If the name is available directly 

477 as a key in the Config it will be used regardless of the presence 

478 of any nominal delimiter. 

479 """ 

480 if name in self._data: 

481 keys = [name, ] 

482 else: 

483 keys = self._splitIntoKeys(name) 

484 return keys 

485 

486 def _findInHierarchy(self, keys, create=False): 

487 """Look for hierarchy of keys in Config 

488 

489 Parameters 

490 ---------- 

491 keys : `list` or `tuple` 

492 Keys to search in hierarchy. 

493 create : `bool`, optional 

494 If `True`, if a part of the hierarchy does not exist, insert an 

495 empty `dict` into the hierarchy. 

496 

497 Returns 

498 ------- 

499 hierarchy : `list` 

500 List of the value corresponding to each key in the supplied 

501 hierarchy. Only keys that exist in the hierarchy will have 

502 a value. 

503 complete : `bool` 

504 `True` if the full hierarchy exists and the final element 

505 in ``hierarchy`` is the value of relevant value. 

506 """ 

507 d = self._data 

508 

509 def checkNextItem(k, d, create): 

510 """See if k is in d and if it is return the new child""" 

511 nextVal = None 

512 isThere = False 

513 if d is None: 

514 # We have gone past the end of the hierarchy 

515 pass 

516 elif isinstance(d, collections.abc.Sequence): 

517 # Check sequence first because for lists 

518 # __contains__ checks whether value is found in list 

519 # not whether the index exists in list. When we traverse 

520 # the hierarchy we are interested in the index. 

521 try: 

522 nextVal = d[int(k)] 

523 isThere = True 

524 except IndexError: 

525 pass 

526 except ValueError: 

527 isThere = k in d 

528 elif k in d: 

529 nextVal = d[k] 

530 isThere = True 

531 elif create: 

532 d[k] = {} 

533 nextVal = d[k] 

534 isThere = True 

535 return nextVal, isThere 

536 

537 hierarchy = [] 

538 complete = True 

539 for k in keys: 

540 d, isThere = checkNextItem(k, d, create) 

541 if isThere: 

542 hierarchy.append(d) 

543 else: 

544 complete = False 

545 break 

546 

547 return hierarchy, complete 

548 

549 def __getitem__(self, name): 

550 # Override the split for the simple case where there is an exact 

551 # match. This allows `Config.items()` to work via a simple 

552 # __iter__ implementation that returns top level keys of 

553 # self._data. 

554 keys = self._getKeyHierarchy(name) 

555 

556 hierarchy, complete = self._findInHierarchy(keys) 

557 if not complete: 

558 raise KeyError(f"{name} not found") 

559 data = hierarchy[-1] 

560 

561 if isinstance(data, collections.abc.Mapping): 

562 data = Config(data) 

563 # Ensure that child configs inherit the parent internal delimiter 

564 if self._D != Config._D: 

565 data._D = self._D 

566 return data 

567 

568 def __setitem__(self, name, value): 

569 keys = self._getKeyHierarchy(name) 

570 last = keys.pop() 

571 if isinstance(value, Config): 

572 value = copy.deepcopy(value._data) 

573 

574 hierarchy, complete = self._findInHierarchy(keys, create=True) 

575 if hierarchy: 

576 data = hierarchy[-1] 

577 else: 

578 data = self._data 

579 

580 try: 

581 data[last] = value 

582 except TypeError: 

583 data[int(last)] = value 

584 

585 def __contains__(self, key): 

586 keys = self._getKeyHierarchy(key) 

587 hierarchy, complete = self._findInHierarchy(keys) 

588 return complete 

589 

590 def __delitem__(self, key): 

591 keys = self._getKeyHierarchy(key) 

592 last = keys.pop() 

593 hierarchy, complete = self._findInHierarchy(keys) 

594 if complete: 

595 if hierarchy: 

596 data = hierarchy[-1] 

597 else: 

598 data = self._data 

599 del data[last] 

600 else: 

601 raise KeyError(f"{key} not found in Config") 

602 

603 def update(self, other): 

604 """Like dict.update, but will add or modify keys in nested dicts, 

605 instead of overwriting the nested dict entirely. 

606 

607 For example, for the given code: 

608 foo = {"a": {"b": 1}} 

609 foo.update({"a": {"c": 2}}) 

610 

611 Parameters 

612 ---------- 

613 other : `dict` or `Config` 

614 Source of configuration: 

615 

616 - If foo is a dict, then after the update foo == {"a": {"c": 2}} 

617 - But if foo is a Config, then after the update 

618 foo == {"a": {"b": 1, "c": 2}} 

619 """ 

620 def doUpdate(d, u): 

621 if not isinstance(u, collections.abc.Mapping) or \ 

622 not isinstance(d, collections.abc.Mapping): 

623 raise RuntimeError("Only call update with Mapping, not {}".format(type(d))) 

624 for k, v in u.items(): 

625 if isinstance(v, collections.abc.Mapping): 

626 d[k] = doUpdate(d.get(k, {}), v) 

627 else: 

628 d[k] = v 

629 return d 

630 doUpdate(self._data, other) 

631 

632 def merge(self, other): 

633 """Like Config.update, but will add keys & values from other that 

634 DO NOT EXIST in self. 

635 

636 Keys and values that already exist in self will NOT be overwritten. 

637 

638 Parameters 

639 ---------- 

640 other : `dict` or `Config` 

641 Source of configuration: 

642 """ 

643 otherCopy = copy.deepcopy(other) 

644 otherCopy.update(self) 

645 self._data = otherCopy._data 

646 

647 def nameTuples(self, topLevelOnly=False): 

648 """Get tuples representing the name hierarchies of all keys. 

649 

650 The tuples returned from this method are guaranteed to be usable 

651 to access items in the configuration object. 

652 

653 Parameters 

654 ---------- 

655 topLevelOnly : `bool`, optional 

656 If False, the default, a full hierarchy of names is returned. 

657 If True, only the top level are returned. 

658 

659 Returns 

660 ------- 

661 names : `list` of `tuple` of `str` 

662 List of all names present in the `Config` where each element 

663 in the list is a `tuple` of strings representing the hierarchy. 

664 """ 

665 if topLevelOnly: 

666 return list((k,) for k in self) 

667 

668 def getKeysAsTuples(d, keys, base): 

669 if isinstance(d, collections.abc.Sequence): 

670 theseKeys = range(len(d)) 

671 else: 

672 theseKeys = d.keys() 

673 for key in theseKeys: 

674 val = d[key] 

675 levelKey = base + (key,) if base is not None else (key,) 

676 keys.append(levelKey) 

677 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \ 

678 and not isinstance(val, str): 

679 getKeysAsTuples(val, keys, levelKey) 

680 keys = [] 

681 getKeysAsTuples(self._data, keys, None) 

682 return keys 

683 

684 def names(self, topLevelOnly=False, delimiter=None): 

685 """Get a delimited name of all the keys in the hierarchy. 

686 

687 The values returned from this method are guaranteed to be usable 

688 to access items in the configuration object. 

689 

690 Parameters 

691 ---------- 

692 topLevelOnly : `bool`, optional 

693 If False, the default, a full hierarchy of names is returned. 

694 If True, only the top level are returned. 

695 delimiter : `str`, optional 

696 Delimiter to use when forming the keys. If the delimiter is 

697 present in any of the keys, it will be escaped in the returned 

698 names. If `None` given a delimiter will be automatically provided. 

699 The delimiter can not be alphanumeric. 

700 

701 Returns 

702 ------- 

703 names : `list` of `str` 

704 List of all names present in the `Config`. 

705 

706 Notes 

707 ----- 

708 This is different than the built-in method `dict.keys`, which will 

709 return only the first level keys. 

710 

711 Raises 

712 ------ 

713 ValueError: 

714 The supplied delimiter is alphanumeric. 

715 """ 

716 if topLevelOnly: 

717 return list(self.keys()) 

718 

719 # Get all the tuples of hierarchical keys 

720 nameTuples = self.nameTuples() 

721 

722 if delimiter is not None and delimiter.isalnum(): 

723 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.") 

724 

725 if delimiter is None: 

726 # Start with something, and ensure it does not need to be 

727 # escaped (it is much easier to understand if not escaped) 

728 delimiter = self._D 

729 

730 # Form big string for easy check of delimiter clash 

731 combined = "".join("".join(str(s) for s in k) for k in nameTuples) 

732 

733 # Try a delimiter and keep trying until we get something that 

734 # works. 

735 ntries = 0 

736 while delimiter in combined: 

737 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.") 

738 ntries += 1 

739 

740 if ntries > 100: 

741 raise ValueError(f"Unable to determine a delimiter for Config {self}") 

742 

743 # try another one 

744 while True: 

745 delimiter = chr(ord(delimiter)+1) 

746 if not delimiter.isalnum(): 

747 break 

748 

749 log.debug(f"Using delimiter {delimiter!r}") 

750 

751 # Form the keys, escaping the delimiter if necessary 

752 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k) 

753 for k in nameTuples] 

754 return strings 

755 

756 def asArray(self, name): 

757 """Get a value as an array. 

758 

759 May contain one or more elements. 

760 

761 Parameters 

762 ---------- 

763 name : `str` 

764 Key to use to retrieve value. 

765 

766 Returns 

767 ------- 

768 array : `collections.abc.Sequence` 

769 The value corresponding to name, but guaranteed to be returned 

770 as a list with at least one element. If the value is a 

771 `~collections.abc.Sequence` (and not a `str`) the value itself 

772 will be returned, else the value will be the first element. 

773 """ 

774 val = self.get(name) 

775 if isinstance(val, str): 

776 val = [val] 

777 elif not isinstance(val, collections.abc.Sequence): 

778 val = [val] 

779 return val 

780 

781 def __eq__(self, other): 

782 if isinstance(other, Config): 

783 other = other._data 

784 return self._data == other 

785 

786 def __ne__(self, other): 

787 if isinstance(other, Config): 

788 other = other._data 

789 return self._data != other 

790 

791 ####### 

792 # i/o # 

793 

794 def dump(self, output): 

795 """Writes the config to a yaml stream. 

796 

797 Parameters 

798 ---------- 

799 output 

800 The YAML stream to use for output. 

801 """ 

802 yaml.safe_dump(self._data, output, default_flow_style=False) 

803 

804 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml", 

805 overwrite=True): 

806 """Writes the config to location pointed to by given URI. 

807 

808 Currently supports 's3' and 'file' URI schemes. 

809 

810 Parameters 

811 ---------- 

812 uri: `str` or `ButlerURI` 

813 URI of location where the Config will be written. 

814 updateFile : bool, optional 

815 If True and uri does not end on a filename with extension, will 

816 append `defaultFileName` to the target uri. True by default. 

817 defaultFileName : bool, optional 

818 The file name that will be appended to target uri if updateFile is 

819 True and uri does not end on a file with an extension. 

820 overwrite : bool, optional 

821 If True the configuration will be written even if it already 

822 exists at that location. 

823 """ 

824 if isinstance(uri, str): 

825 uri = ButlerURI(uri) 

826 

827 if not uri.scheme or uri.scheme == "file": 

828 if os.path.isdir(uri.path) and updateFile: 

829 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName)) 

830 self.dumpToFile(uri.ospath, overwrite=overwrite) 

831 elif uri.scheme == "s3": 

832 if not uri.dirLike and "." not in uri.basename(): 

833 uri = ButlerURI(uri.geturl(), forceDirectory=True) 

834 uri.updateFile(defaultFileName) 

835 self.dumpToS3File(uri, overwrite=overwrite) 

836 else: 

837 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}") 

838 

839 def dumpToFile(self, path, *, overwrite=True): 

840 """Writes the config to a file. 

841 

842 Parameters 

843 ---------- 

844 path : `str` 

845 Path to the file to use for output. 

846 overwrite : `bool`, optional 

847 If True any existing file will be over written. 

848 

849 Notes 

850 ----- 

851 The name of the config file is stored in the Config object. 

852 

853 Raises 

854 ------ 

855 FileExistsError 

856 Raised if the file already exists but overwrite is False. 

857 """ 

858 if overwrite: 

859 mode = "w" 

860 else: 

861 mode = "x" 

862 with open(path, mode) as f: 

863 self.dump(f) 

864 self.configFile = path 

865 

866 def dumpToS3File(self, uri, *, overwrite=True): 

867 """Writes the config to a file in S3 Bucket. 

868 

869 Parameters 

870 ---------- 

871 uri : `ButlerURI` 

872 S3 URI where the configuration should be stored. 

873 overwrite : `bool`, optional 

874 If False, a check will be made to see if the key already 

875 exists. 

876 

877 Raises 

878 ------ 

879 FileExistsError 

880 Raised if the configuration already exists at this location 

881 and overwrite is set to `False`. 

882 """ 

883 if boto3 is None: 

884 raise ModuleNotFoundError("Could not find boto3. " 

885 "Are you sure it is installed?") 

886 

887 if uri.scheme != "s3": 

888 raise ValueError(f"Must provide S3 URI not {uri}") 

889 

890 s3 = getS3Client() 

891 

892 if not overwrite: 

893 from .s3utils import s3CheckFileExists 

894 if s3CheckFileExists(uri, client=s3)[0]: 

895 raise FileExistsError(f"Config already exists at {uri}") 

896 

897 bucket = uri.netloc 

898 key = uri.relativeToPathRoot 

899 

900 with io.StringIO() as stream: 

901 self.dump(stream) 

902 stream.seek(0) 

903 s3.put_object(Bucket=bucket, Key=key, Body=stream.read()) 

904 

905 @staticmethod 

906 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True): 

907 """Generic helper function for updating specific config parameters. 

908 

909 Allows for named parameters to be set to new values in bulk, and 

910 for other values to be set by copying from a reference config. 

911 

912 Assumes that the supplied config is compatible with ``configType`` 

913 and will attach the updated values to the supplied config by 

914 looking for the related component key. It is assumed that 

915 ``config`` and ``full`` are from the same part of the 

916 configuration hierarchy. 

917 

918 Parameters 

919 ---------- 

920 configType : `ConfigSubset` 

921 Config type to use to extract relevant items from ``config``. 

922 config : `Config` 

923 A `Config` to update. Only the subset understood by 

924 the supplied `ConfigSubset` will be modified. Default values 

925 will not be inserted and the content will not be validated 

926 since mandatory keys are allowed to be missing until 

927 populated later by merging. 

928 full : `Config` 

929 A complete config with all defaults expanded that can be 

930 converted to a ``configType``. Read-only and will not be 

931 modified by this method. Values are read from here if 

932 ``toCopy`` is defined. 

933 

934 Repository-specific options that should not be obtained 

935 from defaults when Butler instances are constructed 

936 should be copied from ``full`` to ``config``. 

937 toUpdate : `dict`, optional 

938 A `dict` defining the keys to update and the new value to use. 

939 The keys and values can be any supported by `Config` 

940 assignment. 

941 toCopy : `tuple`, optional 

942 `tuple` of keys whose values should be copied from ``full`` 

943 into ``config``. 

944 overwrite : `bool`, optional 

945 If `False`, do not modify a value in ``config`` if the key 

946 already exists. Default is always to overwrite. 

947 

948 Raises 

949 ------ 

950 ValueError 

951 Neither ``toUpdate`` not ``toCopy`` were defined. 

952 """ 

953 if toUpdate is None and toCopy is None: 

954 raise ValueError("One of toUpdate or toCopy parameters must be set.") 

955 

956 # If this is a parent configuration then we need to ensure that 

957 # the supplied config has the relevant component key in it. 

958 # If this is a parent configuration we add in the stub entry 

959 # so that the ConfigSubset constructor will do the right thing. 

960 # We check full for this since that is guaranteed to be complete. 

961 if configType.component in full and configType.component not in config: 

962 config[configType.component] = {} 

963 

964 # Extract the part of the config we wish to update 

965 localConfig = configType(config, mergeDefaults=False, validate=False) 

966 

967 if toUpdate: 

968 for key, value in toUpdate.items(): 

969 if key in localConfig and not overwrite: 

970 log.debug("Not overriding key '%s' with value '%s' in config %s", 

971 key, value, localConfig.__class__.__name__) 

972 else: 

973 localConfig[key] = value 

974 

975 if toCopy: 

976 localFullConfig = configType(full, mergeDefaults=False) 

977 for key in toCopy: 

978 if key in localConfig and not overwrite: 

979 log.debug("Not overriding key '%s' from defaults in config %s", 

980 key, localConfig.__class__.__name__) 

981 else: 

982 localConfig[key] = localFullConfig[key] 

983 

984 # Reattach to parent if this is a child config 

985 if configType.component in config: 

986 config[configType.component] = localConfig 

987 else: 

988 config.update(localConfig) 

989 

990 def toDict(self): 

991 """Convert a `Config` to a standalone hierarchical `dict`. 

992 

993 Returns 

994 ------- 

995 d : `dict` 

996 The standalone hierarchical `dict` with any `Config` classes 

997 in the hierarchy converted to `dict`. 

998 

999 Notes 

1000 ----- 

1001 This can be useful when passing a Config to some code that 

1002 expects native Python types. 

1003 """ 

1004 output = copy.deepcopy(self._data) 

1005 for k, v in output.items(): 

1006 if isinstance(v, Config): 

1007 v = v.toDict() 

1008 output[k] = v 

1009 return output 

1010 

1011 

1012class ConfigSubset(Config): 

1013 """Config representing a subset of a more general configuration. 

1014 

1015 Subclasses define their own component and when given a configuration 

1016 that includes that component, the resulting configuration only includes 

1017 the subset. For example, your config might contain ``dimensions`` if it's 

1018 part of a global config and that subset will be stored. If ``dimensions`` 

1019 can not be found it is assumed that the entire contents of the 

1020 configuration should be used. 

1021 

1022 Default values are read from the environment or supplied search paths 

1023 using the default configuration file name specified in the subclass. 

1024 This allows a configuration class to be instantiated without any 

1025 additional arguments. 

1026 

1027 Additional validation can be specified to check for keys that are mandatory 

1028 in the configuration. 

1029 

1030 Parameters 

1031 ---------- 

1032 other : `Config` or `str` or `dict` 

1033 Argument specifying the configuration information as understood 

1034 by `Config` 

1035 validate : `bool`, optional 

1036 If `True` required keys will be checked to ensure configuration 

1037 consistency. 

1038 mergeDefaults : `bool`, optional 

1039 If `True` defaults will be read and the supplied config will 

1040 be combined with the defaults, with the supplied valiues taking 

1041 precedence. 

1042 searchPaths : `list` or `tuple`, optional 

1043 Explicit additional paths to search for defaults. They should 

1044 be supplied in priority order. These paths have higher priority 

1045 than those read from the environment in 

1046 `ConfigSubset.defaultSearchPaths()`. 

1047 """ 

1048 

1049 component: ClassVar[Optional[str]] = None 

1050 """Component to use from supplied config. Can be None. If specified the 

1051 key is not required. Can be a full dot-separated path to a component. 

1052 """ 

1053 

1054 requiredKeys: ClassVar[Sequence[str]] = () 

1055 """Keys that are required to be specified in the configuration. 

1056 """ 

1057 

1058 defaultConfigFile: ClassVar[Optional[str]] = None 

1059 """Name of the file containing defaults for this config class. 

1060 """ 

1061 

1062 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None): 

1063 

1064 # Create a blank object to receive the defaults 

1065 # Once we have the defaults we then update with the external values 

1066 super().__init__() 

1067 

1068 # Create a standard Config rather than subset 

1069 externalConfig = Config(other) 

1070 

1071 # Select the part we need from it 

1072 # To simplify the use of !include we also check for the existence of 

1073 # component.component (since the included files can themselves 

1074 # include the component name) 

1075 if self.component is not None: 

1076 doubled = (self.component, self.component) 

1077 # Must check for double depth first 

1078 if doubled in externalConfig: 

1079 externalConfig = externalConfig[doubled] 

1080 elif self.component in externalConfig: 

1081 externalConfig._data = externalConfig._data[self.component] 

1082 

1083 # Default files read to create this configuration 

1084 self.filesRead = [] 

1085 

1086 # Assume we are not looking up child configurations 

1087 containerKey = None 

1088 

1089 # Sometimes we do not want to merge with defaults. 

1090 if mergeDefaults: 

1091 

1092 # Supplied search paths have highest priority 

1093 fullSearchPath = [] 

1094 if searchPaths: 

1095 fullSearchPath.extend(searchPaths) 

1096 

1097 # Read default paths from enviroment 

1098 fullSearchPath.extend(self.defaultSearchPaths()) 

1099 

1100 # There are two places to find defaults for this particular config 

1101 # - The "defaultConfigFile" defined in the subclass 

1102 # - The class specified in the "cls" element in the config. 

1103 # Read cls after merging in case it changes. 

1104 if self.defaultConfigFile is not None: 

1105 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile) 

1106 

1107 # Can have a class specification in the external config (priority) 

1108 # or from the defaults. 

1109 pytype = None 

1110 if "cls" in externalConfig: 

1111 pytype = externalConfig["cls"] 

1112 elif "cls" in self: 

1113 pytype = self["cls"] 

1114 

1115 if pytype is not None: 

1116 try: 

1117 cls = doImport(pytype) 

1118 except ImportError as e: 

1119 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e 

1120 defaultsFile = cls.defaultConfigFile 

1121 if defaultsFile is not None: 

1122 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile) 

1123 

1124 # Get the container key in case we need it 

1125 try: 

1126 containerKey = cls.containerKey 

1127 except AttributeError: 

1128 pass 

1129 

1130 # Now update this object with the external values so that the external 

1131 # values always override the defaults 

1132 self.update(externalConfig) 

1133 

1134 # If this configuration has child configurations of the same 

1135 # config class, we need to expand those defaults as well. 

1136 

1137 if mergeDefaults and containerKey is not None and containerKey in self: 

1138 for idx, subConfig in enumerate(self[containerKey]): 

1139 self[containerKey, idx] = type(self)(other=subConfig, validate=validate, 

1140 mergeDefaults=mergeDefaults, 

1141 searchPaths=searchPaths) 

1142 

1143 if validate: 

1144 self.validate() 

1145 

1146 @classmethod 

1147 def defaultSearchPaths(cls): 

1148 """Read the environment to determine search paths to use for global 

1149 defaults. 

1150 

1151 Global defaults, at lowest priority, are found in the ``config`` 

1152 directory of the butler source tree. Additional defaults can be 

1153 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS`` 

1154 which is a PATH-like variable where paths at the front of the list 

1155 have priority over those later. 

1156 

1157 Returns 

1158 ------- 

1159 paths : `list` 

1160 Returns a list of paths to search. The returned order is in 

1161 priority with the highest priority paths first. The butler config 

1162 directory will always be at the end of the list. 

1163 """ 

1164 # We can pick up defaults from multiple search paths 

1165 # We fill defaults by using the butler config path and then 

1166 # the config path environment variable in reverse order. 

1167 defaultsPaths = [] 

1168 

1169 if CONFIG_PATH in os.environ: 

1170 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep) 

1171 defaultsPaths.extend(externalPaths) 

1172 

1173 # Find the butler configs 

1174 defaultsPaths.append(os.path.join(lsst.utils.getPackageDir("daf_butler"), "config")) 

1175 

1176 return defaultsPaths 

1177 

1178 def _updateWithConfigsFromPath(self, searchPaths, configFile): 

1179 """Search the supplied paths, merging the configuration values 

1180 

1181 The values read will override values currently stored in the object. 

1182 Every file found in the path will be read, such that the earlier 

1183 path entries have higher priority. 

1184 

1185 Parameters 

1186 ---------- 

1187 searchPaths : `list` 

1188 Paths to search for the supplied configFile. This path 

1189 is the priority order, such that files read from the 

1190 first path entry will be selected over those read from 

1191 a later path. 

1192 configFile : `str` 

1193 File to locate in path. If absolute path it will be read 

1194 directly and the search path will not be used. 

1195 """ 

1196 if os.path.isabs(configFile): 

1197 if os.path.exists(configFile): 

1198 self.filesRead.append(configFile) 

1199 self._updateWithOtherConfigFile(configFile) 

1200 else: 

1201 # Reverse order so that high priority entries 

1202 # update the object last. 

1203 for pathDir in reversed(searchPaths): 

1204 file = os.path.join(pathDir, configFile) 

1205 if os.path.exists(file): 

1206 self.filesRead.append(file) 

1207 self._updateWithOtherConfigFile(file) 

1208 

1209 def _updateWithOtherConfigFile(self, file): 

1210 """Read in some defaults and update. 

1211 

1212 Update the configuration by reading the supplied file as a config 

1213 of this class, and merging such that these values override the 

1214 current values. Contents of the external config are not validated. 

1215 

1216 Parameters 

1217 ---------- 

1218 file : `Config`, `str`, or `dict` 

1219 Entity that can be converted to a `ConfigSubset`. 

1220 """ 

1221 # Use this class to read the defaults so that subsetting can happen 

1222 # correctly. 

1223 externalConfig = type(self)(file, validate=False, mergeDefaults=False) 

1224 self.update(externalConfig) 

1225 

1226 def validate(self): 

1227 """Check that mandatory keys are present in this configuration. 

1228 

1229 Ignored if ``requiredKeys`` is empty.""" 

1230 # Validation 

1231 missing = [k for k in self.requiredKeys if k not in self._data] 

1232 if missing: 

1233 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")