Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Configuration control.""" 

23 

24__all__ = ("Config", "ConfigSubset") 

25 

26import collections 

27import copy 

28import logging 

29import pprint 

30import os 

31import yaml 

32import sys 

33from yaml.representer import Representer 

34import io 

35import posixpath 

36from typing import Sequence, Optional, ClassVar 

37 

38try: 

39 import boto3 

40except ImportError: 

41 boto3 = None 

42 

43import lsst.utils 

44from lsst.utils import doImport 

45from .location import ButlerURI 

46 

47yaml.add_representer(collections.defaultdict, Representer.represent_dict) 

48 

49 

50# Config module logger 

51log = logging.getLogger(__name__) 

52 

53# PATH-like environment variable to use for defaults. 

54CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH" 

55 

56 

57class Loader(yaml.CSafeLoader): 

58 """YAML Loader that supports file include directives 

59 

60 Uses ``!include`` directive in a YAML file to point to another 

61 YAML file to be included. The path in the include directive is relative 

62 to the file containing that directive. 

63 

64 storageClasses: !include storageClasses.yaml 

65 

66 Examples 

67 -------- 

68 >>> with open("document.yaml", "r") as f: 

69 data = yaml.load(f, Loader=Loader) 

70 

71 Notes 

72 ----- 

73 See https://davidchall.github.io/yaml-includes.html 

74 """ 

75 

76 def __init__(self, stream): 

77 super().__init__(stream) 

78 self._root = ButlerURI(stream.name) 

79 Loader.add_constructor("!include", Loader.include) 

80 

81 def include(self, node): 

82 if isinstance(node, yaml.ScalarNode): 

83 return self.extractFile(self.construct_scalar(node)) 

84 

85 elif isinstance(node, yaml.SequenceNode): 

86 result = [] 

87 for filename in self.construct_sequence(node): 

88 result.append(self.extractFile(filename)) 

89 return result 

90 

91 elif isinstance(node, yaml.MappingNode): 

92 result = {} 

93 for k, v in self.construct_mapping(node).items(): 

94 result[k] = self.extractFile(v) 

95 return result 

96 

97 else: 

98 print("Error:: unrecognised node type in !include statement", file=sys.stderr) 

99 raise yaml.constructor.ConstructorError 

100 

101 def extractFile(self, filename): 

102 fileuri = copy.copy(self._root) 

103 fileuri.updateFile(filename) 

104 log.debug("Opening YAML file via !include: %s", fileuri) 

105 

106 if not fileuri.scheme or fileuri.scheme == "file": 

107 with open(fileuri.ospath, "r") as f: 

108 return yaml.load(f, Loader) 

109 elif fileuri.scheme == "s3": 

110 if boto3 is None: 

111 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

112 s3 = boto3.client("s3") 

113 try: 

114 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot) 

115 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err: 

116 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err 

117 

118 # boto3 response is a `StreamingBody`, but not a valid Python 

119 # IOStream. Loader will raise an error that the stream has no name. 

120 # The name is used to resolve the "!include" filename location to 

121 # download. A hackish solution is to name it explicitly. 

122 response["Body"].name = fileuri.geturl() 

123 return yaml.load(response["Body"], Loader) 

124 

125 

126class Config(collections.abc.MutableMapping): 

127 r"""Implements a datatype that is used by `Butler` for configuration 

128 parameters. 

129 

130 It is essentially a `dict` with key/value pairs, including nested dicts 

131 (as values). In fact, it can be initialized with a `dict`. 

132 This is explained next: 

133 

134 Config extends the `dict` api so that hierarchical values may be accessed 

135 with delimited notation or as a tuple. If a string is given the delimiter 

136 is picked up from the first character in that string. For example, 

137 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``, 

138 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome. 

139 If the first character is alphanumeric, no delimiter will be used. 

140 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``. 

141 Unicode characters can be used as the delimiter for distinctiveness if 

142 required. 

143 

144 If a key in the hierarchy starts with a non-alphanumeric character care 

145 should be used to ensure that either the tuple interface is used or 

146 a distinct delimiter is always given in string form. 

147 

148 Finally, the delimiter can be escaped if it is part of a key and also 

149 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in 

150 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is 

151 always better to use a different delimiter in these cases. 

152 

153 Note that adding a multi-level key implicitly creates any nesting levels 

154 that do not exist, but removing multi-level keys does not automatically 

155 remove empty nesting levels. As a result: 

156 

157 >>> c = Config() 

158 >>> c[".a.b"] = 1 

159 >>> del c[".a.b"] 

160 >>> c["a"] 

161 Config({'a': {}}) 

162 

163 Storage formats supported: 

164 

165 - yaml: read and write is supported. 

166 

167 

168 Parameters 

169 ---------- 

170 other : `str` or `Config` or `dict` 

171 Other source of configuration, can be: 

172 

173 - (`str`) Treated as a path to a config file on disk. Must end with 

174 ".yaml". 

175 - (`Config`) Copies the other Config's values into this one. 

176 - (`dict`) Copies the values from the dict into this Config. 

177 

178 If `None` is provided an empty `Config` will be created. 

179 """ 

180 

181 _D: ClassVar[str] = "→" 

182 """Default internal delimiter to use for components in the hierarchy when 

183 constructing keys for external use (see `Config.names()`).""" 

184 

185 includeKey: ClassVar[str] = "includeConfigs" 

186 """Key used to indicate that another config should be included at this 

187 part of the hierarchy.""" 

188 

189 def __init__(self, other=None): 

190 self._data = {} 

191 self.configFile = None 

192 

193 if other is None: 

194 return 

195 

196 if isinstance(other, Config): 

197 self._data = copy.deepcopy(other._data) 

198 self.configFile = other.configFile 

199 elif isinstance(other, collections.abc.Mapping): 

200 self.update(other) 

201 elif isinstance(other, str): 

202 # if other is a string, assume it is a file path. 

203 self.__initFromFile(other) 

204 self._processExplicitIncludes() 

205 else: 

206 # if the config specified by other could not be recognized raise 

207 # a runtime error. 

208 raise RuntimeError("A Config could not be loaded from other:%s" % other) 

209 

210 def ppprint(self): 

211 """helper function for debugging, prints a config out in a readable 

212 way in the debugger. 

213 

214 use: pdb> print(myConfigObject.ppprint()) 

215 

216 Returns 

217 ------- 

218 s : `str` 

219 A prettyprint formatted string representing the config 

220 """ 

221 return pprint.pformat(self._data, indent=2, width=1) 

222 

223 def __repr__(self): 

224 return f"{type(self).__name__}({self._data!r})" 

225 

226 def __str__(self): 

227 return self.ppprint() 

228 

229 def __len__(self): 

230 return len(self._data) 

231 

232 def __iter__(self): 

233 return iter(self._data) 

234 

235 def copy(self): 

236 return type(self)(self) 

237 

238 def __initFromFile(self, path): 

239 """Load a file from a path or an URI. 

240 

241 Parameters 

242 ---------- 

243 path : `str` 

244 Path or an URI to a persisted config file. 

245 """ 

246 uri = ButlerURI(path) 

247 if uri.path.endswith("yaml"): 

248 if uri.scheme == "s3": 

249 self.__initFromS3YamlFile(uri.geturl()) 

250 else: 

251 self.__initFromYamlFile(uri.ospath) 

252 else: 

253 raise RuntimeError("Unhandled config file type:%s" % uri) 

254 

255 def __initFromS3YamlFile(self, url): 

256 """Load a file at a given S3 Bucket uri and attempts to load it from 

257 yaml. 

258 

259 Parameters 

260 ---------- 

261 path : `str` 

262 To a persisted config file. 

263 """ 

264 if boto3 is None: 

265 raise ModuleNotFoundError("boto3 not found." 

266 "Are you sure it is installed?") 

267 

268 uri = ButlerURI(url) 

269 s3 = boto3.client("s3") 

270 try: 

271 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot) 

272 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err: 

273 raise FileNotFoundError(f"No such file or directory: {uri}") from err 

274 

275 # boto3 response is a `StreamingBody`, but not a valid Python IOStream. 

276 # Loader will raise an error that the stream has no name. A hackish 

277 # solution is to name it explicitly. 

278 response["Body"].name = url 

279 self.__initFromYaml(response["Body"]) 

280 response["Body"].close() 

281 

282 def __initFromYamlFile(self, path): 

283 """Opens a file at a given path and attempts to load it in from yaml. 

284 

285 Parameters 

286 ---------- 

287 path : `str` 

288 To a persisted config file in YAML format. 

289 """ 

290 log.debug("Opening YAML config file: %s", path) 

291 with open(path, "r") as f: 

292 self.__initFromYaml(f) 

293 self.configFile = path 

294 

295 def __initFromYaml(self, stream): 

296 """Loads a YAML config from any readable stream that contains one. 

297 

298 Parameters 

299 ---------- 

300 stream 

301 To a persisted config file in YAML format. 

302 

303 Raises 

304 ------ 

305 yaml.YAMLError 

306 If there is an error loading the file. 

307 """ 

308 content = yaml.load(stream, Loader=Loader) 

309 if content is None: 

310 content = {} 

311 self._data = content 

312 return self 

313 

314 def _processExplicitIncludes(self): 

315 """Scan through the configuration searching for the special 

316 includeConfigs directive and process the includes.""" 

317 

318 # Search paths for config files 

319 searchPaths = [os.path.curdir] 

320 if self.configFile is not None: 

321 searchPaths.append(os.path.abspath(os.path.dirname(self.configFile))) 

322 

323 # Ensure we know what delimiter to use 

324 names = self.nameTuples() 

325 for path in names: 

326 if path[-1] == self.includeKey: 

327 

328 log.debug("Processing file include directive at %s", self._D + self._D.join(path)) 

329 basePath = path[:-1] 

330 

331 # Extract the includes and then delete them from the config 

332 includes = self[path] 

333 del self[path] 

334 

335 # Be consistent and convert to a list 

336 if not isinstance(includes, list): 

337 includes = [includes] 

338 

339 # Read each file assuming it is a reference to a file 

340 # The file can be relative to config file or cwd 

341 # ConfigSubset search paths are not used 

342 # At some point these might be URIs which we will have to 

343 # assume resolve explicitly 

344 subConfigs = [] 

345 for fileName in includes: 

346 found = None 

347 if os.path.isabs(fileName): 

348 found = fileName 

349 else: 

350 for dir in searchPaths: 

351 filePath = os.path.join(dir, fileName) 

352 if os.path.exists(filePath): 

353 found = os.path.normpath(os.path.abspath(filePath)) 

354 break 

355 if not found: 

356 raise RuntimeError(f"Unable to find referenced include file: {fileName}") 

357 

358 # Read the referenced Config as a Config 

359 subConfigs.append(type(self)(found)) 

360 

361 # Now we need to merge these sub configs with the current 

362 # information that was present in this node in the config 

363 # tree with precedence given to the explicit values 

364 newConfig = subConfigs.pop(0) 

365 for sc in subConfigs: 

366 newConfig.update(sc) 

367 

368 # Explicit values take precedence 

369 if not basePath: 

370 # This is an include at the root config 

371 newConfig.update(self) 

372 # Replace the current config 

373 self._data = newConfig._data 

374 else: 

375 newConfig.update(self[basePath]) 

376 # And reattach to the base config 

377 self[basePath] = newConfig 

378 

379 @staticmethod 

380 def _splitIntoKeys(key): 

381 r"""Split the argument for get/set/in into a hierarchical list. 

382 

383 Parameters 

384 ---------- 

385 key : `str` or iterable 

386 Argument given to get/set/in. If an iterable is provided it will 

387 be converted to a list. If the first character of the string 

388 is not an alphanumeric character then it will be used as the 

389 delimiter for the purposes of splitting the remainder of the 

390 string. If the delimiter is also in one of the keys then it 

391 can be escaped using ``\``. There is no default delimiter. 

392 

393 Returns 

394 ------- 

395 keys : `list` 

396 Hierarchical keys as a `list`. 

397 """ 

398 if isinstance(key, str): 

399 if not key[0].isalnum(): 

400 d = key[0] 

401 key = key[1:] 

402 else: 

403 return [key, ] 

404 escaped = f"\\{d}" 

405 temp = None 

406 if escaped in key: 

407 # Complain at the attempt to escape the escape 

408 doubled = fr"\{escaped}" 

409 if doubled in key: 

410 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})" 

411 " is not yet supported.") 

412 # Replace with a character that won't be in the string 

413 temp = "\r" 

414 if temp in key or d == temp: 

415 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as" 

416 " delimiter if escaping the delimiter") 

417 key = key.replace(escaped, temp) 

418 hierarchy = key.split(d) 

419 if temp: 

420 hierarchy = [h.replace(temp, d) for h in hierarchy] 

421 return hierarchy 

422 elif isinstance(key, collections.abc.Iterable): 

423 return list(key) 

424 else: 

425 # Not sure what this is so try it anyway 

426 return [key, ] 

427 

428 def _getKeyHierarchy(self, name): 

429 """Retrieve the key hierarchy for accessing the Config 

430 

431 Parameters 

432 ---------- 

433 name : `str` or `tuple` 

434 Delimited string or `tuple` of hierarchical keys. 

435 

436 Returns 

437 ------- 

438 hierarchy : `list` of `str` 

439 Hierarchy to use as a `list`. If the name is available directly 

440 as a key in the Config it will be used regardless of the presence 

441 of any nominal delimiter. 

442 """ 

443 if name in self._data: 

444 keys = [name, ] 

445 else: 

446 keys = self._splitIntoKeys(name) 

447 return keys 

448 

449 def _findInHierarchy(self, keys, create=False): 

450 """Look for hierarchy of keys in Config 

451 

452 Parameters 

453 ---------- 

454 keys : `list` or `tuple` 

455 Keys to search in hierarchy. 

456 create : `bool`, optional 

457 If `True`, if a part of the hierarchy does not exist, insert an 

458 empty `dict` into the hierarchy. 

459 

460 Returns 

461 ------- 

462 hierarchy : `list` 

463 List of the value corresponding to each key in the supplied 

464 hierarchy. Only keys that exist in the hierarchy will have 

465 a value. 

466 complete : `bool` 

467 `True` if the full hierarchy exists and the final element 

468 in ``hierarchy`` is the value of relevant value. 

469 """ 

470 d = self._data 

471 

472 def checkNextItem(k, d, create): 

473 """See if k is in d and if it is return the new child""" 

474 nextVal = None 

475 isThere = False 

476 if d is None: 

477 # We have gone past the end of the hierarchy 

478 pass 

479 elif isinstance(d, collections.abc.Sequence): 

480 # Check sequence first because for lists 

481 # __contains__ checks whether value is found in list 

482 # not whether the index exists in list. When we traverse 

483 # the hierarchy we are interested in the index. 

484 try: 

485 nextVal = d[int(k)] 

486 isThere = True 

487 except IndexError: 

488 pass 

489 except ValueError: 

490 isThere = k in d 

491 elif k in d: 

492 nextVal = d[k] 

493 isThere = True 

494 elif create: 

495 d[k] = {} 

496 nextVal = d[k] 

497 isThere = True 

498 return nextVal, isThere 

499 

500 hierarchy = [] 

501 complete = True 

502 for k in keys: 

503 d, isThere = checkNextItem(k, d, create) 

504 if isThere: 

505 hierarchy.append(d) 

506 else: 

507 complete = False 

508 break 

509 

510 return hierarchy, complete 

511 

512 def __getitem__(self, name): 

513 # Override the split for the simple case where there is an exact 

514 # match. This allows `Config.items()` to work via a simple 

515 # __iter__ implementation that returns top level keys of 

516 # self._data. 

517 keys = self._getKeyHierarchy(name) 

518 

519 hierarchy, complete = self._findInHierarchy(keys) 

520 if not complete: 

521 raise KeyError(f"{name} not found") 

522 data = hierarchy[-1] 

523 

524 if isinstance(data, collections.abc.Mapping): 

525 data = Config(data) 

526 # Ensure that child configs inherit the parent internal delimiter 

527 if self._D != Config._D: 

528 data._D = self._D 

529 return data 

530 

531 def __setitem__(self, name, value): 

532 keys = self._getKeyHierarchy(name) 

533 last = keys.pop() 

534 if isinstance(value, Config): 

535 value = copy.deepcopy(value._data) 

536 

537 hierarchy, complete = self._findInHierarchy(keys, create=True) 

538 if hierarchy: 

539 data = hierarchy[-1] 

540 else: 

541 data = self._data 

542 

543 try: 

544 data[last] = value 

545 except TypeError: 

546 data[int(last)] = value 

547 

548 def __contains__(self, key): 

549 keys = self._getKeyHierarchy(key) 

550 hierarchy, complete = self._findInHierarchy(keys) 

551 return complete 

552 

553 def __delitem__(self, key): 

554 keys = self._getKeyHierarchy(key) 

555 last = keys.pop() 

556 hierarchy, complete = self._findInHierarchy(keys) 

557 if complete: 

558 if hierarchy: 

559 data = hierarchy[-1] 

560 else: 

561 data = self._data 

562 del data[last] 

563 else: 

564 raise KeyError(f"{key} not found in Config") 

565 

566 def update(self, other): 

567 """Like dict.update, but will add or modify keys in nested dicts, 

568 instead of overwriting the nested dict entirely. 

569 

570 For example, for the given code: 

571 foo = {"a": {"b": 1}} 

572 foo.update({"a": {"c": 2}}) 

573 

574 Parameters 

575 ---------- 

576 other : `dict` or `Config` 

577 Source of configuration: 

578 

579 - If foo is a dict, then after the update foo == {"a": {"c": 2}} 

580 - But if foo is a Config, then after the update 

581 foo == {"a": {"b": 1, "c": 2}} 

582 """ 

583 def doUpdate(d, u): 

584 if not isinstance(u, collections.abc.Mapping) or \ 

585 not isinstance(d, collections.abc.Mapping): 

586 raise RuntimeError("Only call update with Mapping, not {}".format(type(d))) 

587 for k, v in u.items(): 

588 if isinstance(v, collections.abc.Mapping): 

589 d[k] = doUpdate(d.get(k, {}), v) 

590 else: 

591 d[k] = v 

592 return d 

593 doUpdate(self._data, other) 

594 

595 def merge(self, other): 

596 """Like Config.update, but will add keys & values from other that 

597 DO NOT EXIST in self. 

598 

599 Keys and values that already exist in self will NOT be overwritten. 

600 

601 Parameters 

602 ---------- 

603 other : `dict` or `Config` 

604 Source of configuration: 

605 """ 

606 otherCopy = copy.deepcopy(other) 

607 otherCopy.update(self) 

608 self._data = otherCopy._data 

609 

610 def nameTuples(self, topLevelOnly=False): 

611 """Get tuples representing the name hierarchies of all keys. 

612 

613 The tuples returned from this method are guaranteed to be usable 

614 to access items in the configuration object. 

615 

616 Parameters 

617 ---------- 

618 topLevelOnly : `bool`, optional 

619 If False, the default, a full hierarchy of names is returned. 

620 If True, only the top level are returned. 

621 

622 Returns 

623 ------- 

624 names : `list` of `tuple` of `str` 

625 List of all names present in the `Config` where each element 

626 in the list is a `tuple` of strings representing the hierarchy. 

627 """ 

628 if topLevelOnly: 

629 return list((k,) for k in self) 

630 

631 def getKeysAsTuples(d, keys, base): 

632 if isinstance(d, collections.abc.Sequence): 

633 theseKeys = range(len(d)) 

634 else: 

635 theseKeys = d.keys() 

636 for key in theseKeys: 

637 val = d[key] 

638 levelKey = base + (key,) if base is not None else (key,) 

639 keys.append(levelKey) 

640 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \ 

641 and not isinstance(val, str): 

642 getKeysAsTuples(val, keys, levelKey) 

643 keys = [] 

644 getKeysAsTuples(self._data, keys, None) 

645 return keys 

646 

647 def names(self, topLevelOnly=False, delimiter=None): 

648 """Get a delimited name of all the keys in the hierarchy. 

649 

650 The values returned from this method are guaranteed to be usable 

651 to access items in the configuration object. 

652 

653 Parameters 

654 ---------- 

655 topLevelOnly : `bool`, optional 

656 If False, the default, a full hierarchy of names is returned. 

657 If True, only the top level are returned. 

658 delimiter : `str`, optional 

659 Delimiter to use when forming the keys. If the delimiter is 

660 present in any of the keys, it will be escaped in the returned 

661 names. If `None` given a delimiter will be automatically provided. 

662 The delimiter can not be alphanumeric. 

663 

664 Returns 

665 ------- 

666 names : `list` of `str` 

667 List of all names present in the `Config`. 

668 

669 Notes 

670 ----- 

671 This is different than the built-in method `dict.keys`, which will 

672 return only the first level keys. 

673 

674 Raises 

675 ------ 

676 ValueError: 

677 The supplied delimiter is alphanumeric. 

678 """ 

679 if topLevelOnly: 

680 return list(self.keys()) 

681 

682 # Get all the tuples of hierarchical keys 

683 nameTuples = self.nameTuples() 

684 

685 if delimiter is not None and delimiter.isalnum(): 

686 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.") 

687 

688 if delimiter is None: 

689 # Start with something, and ensure it does not need to be 

690 # escaped (it is much easier to understand if not escaped) 

691 delimiter = self._D 

692 

693 # Form big string for easy check of delimiter clash 

694 combined = "".join("".join(str(s) for s in k) for k in nameTuples) 

695 

696 # Try a delimiter and keep trying until we get something that 

697 # works. 

698 ntries = 0 

699 while delimiter in combined: 

700 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.") 

701 ntries += 1 

702 

703 if ntries > 100: 

704 raise ValueError(f"Unable to determine a delimiter for Config {self}") 

705 

706 # try another one 

707 while True: 

708 delimiter = chr(ord(delimiter)+1) 

709 if not delimiter.isalnum(): 

710 break 

711 

712 log.debug(f"Using delimiter {delimiter!r}") 

713 

714 # Form the keys, escaping the delimiter if necessary 

715 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k) 

716 for k in nameTuples] 

717 return strings 

718 

719 def asArray(self, name): 

720 """Get a value as an array. 

721 

722 May contain one or more elements. 

723 

724 Parameters 

725 ---------- 

726 name : `str` 

727 Key to use to retrieve value. 

728 

729 Returns 

730 ------- 

731 array : `collections.abc.Sequence` 

732 The value corresponding to name, but guaranteed to be returned 

733 as a list with at least one element. If the value is a 

734 `~collections.abc.Sequence` (and not a `str`) the value itself 

735 will be returned, else the value will be the first element. 

736 """ 

737 val = self.get(name) 

738 if isinstance(val, str): 

739 val = [val] 

740 elif not isinstance(val, collections.abc.Sequence): 

741 val = [val] 

742 return val 

743 

744 def __eq__(self, other): 

745 if isinstance(other, Config): 

746 other = other._data 

747 return self._data == other 

748 

749 def __ne__(self, other): 

750 if isinstance(other, Config): 

751 other = other._data 

752 return self._data != other 

753 

754 ####### 

755 # i/o # 

756 

757 def dump(self, output): 

758 """Writes the config to a yaml stream. 

759 

760 Parameters 

761 ---------- 

762 output 

763 The YAML stream to use for output. 

764 """ 

765 # First a set of known keys is handled and written to the stream in a 

766 # specific order for readability. 

767 # After the expected/ordered keys are weritten to the stream the 

768 # remainder of the keys are written to the stream. 

769 data = copy.copy(self._data) 

770 keys = [] 

771 for key in keys: 

772 try: 

773 yaml.safe_dump({key: data.pop(key)}, output, default_flow_style=False) 

774 output.write("\n") 

775 except KeyError: 

776 pass 

777 if data: 

778 yaml.safe_dump(data, output, default_flow_style=False) 

779 

780 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml"): 

781 """Writes the config to location pointed to by given URI. 

782 

783 Currently supports 's3' and 'file' URI schemes. 

784 

785 Parameters 

786 ---------- 

787 uri: `str` or `ButlerURI` 

788 URI of location where the Config will be written. 

789 updateFile : bool, optional 

790 If True and uri does not end on a filename with extension, will 

791 append `defaultFileName` to the target uri. True by default. 

792 defaultFileName : bool, optional 

793 The file name that will be appended to target uri if updateFile is 

794 True and uri does not end on a file with an extension. 

795 """ 

796 if isinstance(uri, str): 

797 uri = ButlerURI(uri) 

798 

799 if not uri.scheme or uri.scheme == "file": 

800 if os.path.isdir(uri.path) and updateFile: 

801 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName)) 

802 self.dumpToFile(uri.ospath) 

803 elif uri.scheme == "s3": 

804 head, filename = posixpath.split(uri.path) 

805 if "." not in filename: 

806 uri.updateFile(defaultFileName) 

807 self.dumpToS3File(uri.netloc, uri.relativeToPathRoot) 

808 else: 

809 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}") 

810 

811 def dumpToFile(self, path): 

812 """Writes the config to a file. 

813 

814 Parameters 

815 ---------- 

816 path : `str` 

817 Path to the file to use for output. 

818 

819 Notes 

820 ----- 

821 The name of the config file is stored in the Config object. 

822 """ 

823 with open(path, "w") as f: 

824 self.dump(f) 

825 self.configFile = path 

826 

827 def dumpToS3File(self, bucket, key): 

828 """Writes the config to a file in S3 Bucket. 

829 

830 Parameters 

831 ---------- 

832 bucketname: `str` 

833 Name of the Bucket into which config will be written. 

834 key : `str` 

835 Path to the file to use for output, relative to the bucket. 

836 """ 

837 if boto3 is None: 

838 raise ModuleNotFoundError("Could not find boto3. " 

839 "Are you sure it is installed?") 

840 

841 s3 = boto3.client("s3") 

842 with io.StringIO() as stream: 

843 self.dump(stream) 

844 stream.seek(0) 

845 s3.put_object(Bucket=bucket, Key=key, Body=stream.read()) 

846 

847 @staticmethod 

848 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True): 

849 """Generic helper function for updating specific config parameters. 

850 

851 Allows for named parameters to be set to new values in bulk, and 

852 for other values to be set by copying from a reference config. 

853 

854 Assumes that the supplied config is compatible with ``configType`` 

855 and will attach the updated values to the supplied config by 

856 looking for the related component key. It is assumed that 

857 ``config`` and ``full`` are from the same part of the 

858 configuration hierarchy. 

859 

860 Parameters 

861 ---------- 

862 configType : `ConfigSubset` 

863 Config type to use to extract relevant items from ``config``. 

864 config : `Config` 

865 A `Config` to update. Only the subset understood by 

866 the supplied `ConfigSubset` will be modified. Default values 

867 will not be inserted and the content will not be validated 

868 since mandatory keys are allowed to be missing until 

869 populated later by merging. 

870 full : `Config` 

871 A complete config with all defaults expanded that can be 

872 converted to a ``configType``. Read-only and will not be 

873 modified by this method. Values are read from here if 

874 ``toCopy`` is defined. 

875 

876 Repository-specific options that should not be obtained 

877 from defaults when Butler instances are constructed 

878 should be copied from ``full`` to ``config``. 

879 toUpdate : `dict`, optional 

880 A `dict` defining the keys to update and the new value to use. 

881 The keys and values can be any supported by `Config` 

882 assignment. 

883 toCopy : `tuple`, optional 

884 `tuple` of keys whose values should be copied from ``full`` 

885 into ``config``. 

886 overwrite : `bool`, optional 

887 If `False`, do not modify a value in ``config`` if the key 

888 already exists. Default is always to overwrite. 

889 

890 Raises 

891 ------ 

892 ValueError 

893 Neither ``toUpdate`` not ``toCopy`` were defined. 

894 """ 

895 if toUpdate is None and toCopy is None: 

896 raise ValueError("One of toUpdate or toCopy parameters must be set.") 

897 

898 # If this is a parent configuration then we need to ensure that 

899 # the supplied config has the relevant component key in it. 

900 # If this is a parent configuration we add in the stub entry 

901 # so that the ConfigSubset constructor will do the right thing. 

902 # We check full for this since that is guaranteed to be complete. 

903 if configType.component in full and configType.component not in config: 

904 config[configType.component] = {} 

905 

906 # Extract the part of the config we wish to update 

907 localConfig = configType(config, mergeDefaults=False, validate=False) 

908 

909 if toUpdate: 

910 for key, value in toUpdate.items(): 

911 if key in localConfig and not overwrite: 

912 log.debug("Not overriding key '%s' with value '%s' in config %s", 

913 key, value, localConfig.__class__.__name__) 

914 else: 

915 localConfig[key] = value 

916 

917 if toCopy: 

918 localFullConfig = configType(full, mergeDefaults=False) 

919 for key in toCopy: 

920 if key in localConfig and not overwrite: 

921 log.debug("Not overriding key '%s' from defaults in config %s", 

922 key, localConfig.__class__.__name__) 

923 else: 

924 localConfig[key] = localFullConfig[key] 

925 

926 # Reattach to parent if this is a child config 

927 if configType.component in config: 

928 config[configType.component] = localConfig 

929 else: 

930 config.update(localConfig) 

931 

932 

933class ConfigSubset(Config): 

934 """Config representing a subset of a more general configuration. 

935 

936 Subclasses define their own component and when given a configuration 

937 that includes that component, the resulting configuration only includes 

938 the subset. For example, your config might contain ``dimensions`` if it's 

939 part of a global config and that subset will be stored. If ``dimensions`` 

940 can not be found it is assumed that the entire contents of the 

941 configuration should be used. 

942 

943 Default values are read from the environment or supplied search paths 

944 using the default configuration file name specified in the subclass. 

945 This allows a configuration class to be instantiated without any 

946 additional arguments. 

947 

948 Additional validation can be specified to check for keys that are mandatory 

949 in the configuration. 

950 

951 Parameters 

952 ---------- 

953 other : `Config` or `str` or `dict` 

954 Argument specifying the configuration information as understood 

955 by `Config` 

956 validate : `bool`, optional 

957 If `True` required keys will be checked to ensure configuration 

958 consistency. 

959 mergeDefaults : `bool`, optional 

960 If `True` defaults will be read and the supplied config will 

961 be combined with the defaults, with the supplied valiues taking 

962 precedence. 

963 searchPaths : `list` or `tuple`, optional 

964 Explicit additional paths to search for defaults. They should 

965 be supplied in priority order. These paths have higher priority 

966 than those read from the environment in 

967 `ConfigSubset.defaultSearchPaths()`. 

968 """ 

969 

970 component: ClassVar[Optional[str]] = None 

971 """Component to use from supplied config. Can be None. If specified the 

972 key is not required. Can be a full dot-separated path to a component. 

973 """ 

974 

975 requiredKeys: ClassVar[Sequence[str]] = () 

976 """Keys that are required to be specified in the configuration. 

977 """ 

978 

979 defaultConfigFile: ClassVar[Optional[str]] = None 

980 """Name of the file containing defaults for this config class. 

981 """ 

982 

983 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None): 

984 

985 # Create a blank object to receive the defaults 

986 # Once we have the defaults we then update with the external values 

987 super().__init__() 

988 

989 # Create a standard Config rather than subset 

990 externalConfig = Config(other) 

991 

992 # Select the part we need from it 

993 # To simplify the use of !include we also check for the existence of 

994 # component.component (since the included files can themselves 

995 # include the component name) 

996 if self.component is not None: 

997 doubled = (self.component, self.component) 

998 # Must check for double depth first 

999 if doubled in externalConfig: 

1000 externalConfig = externalConfig[doubled] 

1001 elif self.component in externalConfig: 

1002 externalConfig._data = externalConfig._data[self.component] 

1003 

1004 # Default files read to create this configuration 

1005 self.filesRead = [] 

1006 

1007 # Assume we are not looking up child configurations 

1008 containerKey = None 

1009 

1010 # Sometimes we do not want to merge with defaults. 

1011 if mergeDefaults: 

1012 

1013 # Supplied search paths have highest priority 

1014 fullSearchPath = [] 

1015 if searchPaths: 

1016 fullSearchPath.extend(searchPaths) 

1017 

1018 # Read default paths from enviroment 

1019 fullSearchPath.extend(self.defaultSearchPaths()) 

1020 

1021 # There are two places to find defaults for this particular config 

1022 # - The "defaultConfigFile" defined in the subclass 

1023 # - The class specified in the "cls" element in the config. 

1024 # Read cls after merging in case it changes. 

1025 if self.defaultConfigFile is not None: 

1026 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile) 

1027 

1028 # Can have a class specification in the external config (priority) 

1029 # or from the defaults. 

1030 pytype = None 

1031 if "cls" in externalConfig: 

1032 pytype = externalConfig["cls"] 

1033 elif "cls" in self: 

1034 pytype = self["cls"] 

1035 

1036 if pytype is not None: 

1037 try: 

1038 cls = doImport(pytype) 

1039 except ImportError as e: 

1040 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e 

1041 defaultsFile = cls.defaultConfigFile 

1042 if defaultsFile is not None: 

1043 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile) 

1044 

1045 # Get the container key in case we need it 

1046 try: 

1047 containerKey = cls.containerKey 

1048 except AttributeError: 

1049 pass 

1050 

1051 # Now update this object with the external values so that the external 

1052 # values always override the defaults 

1053 self.update(externalConfig) 

1054 

1055 # If this configuration has child configurations of the same 

1056 # config class, we need to expand those defaults as well. 

1057 

1058 if mergeDefaults and containerKey is not None and containerKey in self: 

1059 for idx, subConfig in enumerate(self[containerKey]): 

1060 self[containerKey, idx] = type(self)(other=subConfig, validate=validate, 

1061 mergeDefaults=mergeDefaults, 

1062 searchPaths=searchPaths) 

1063 

1064 if validate: 

1065 self.validate() 

1066 

1067 @classmethod 

1068 def defaultSearchPaths(cls): 

1069 """Read the environment to determine search paths to use for global 

1070 defaults. 

1071 

1072 Global defaults, at lowest priority, are found in the ``config`` 

1073 directory of the butler source tree. Additional defaults can be 

1074 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS`` 

1075 which is a PATH-like variable where paths at the front of the list 

1076 have priority over those later. 

1077 

1078 Returns 

1079 ------- 

1080 paths : `list` 

1081 Returns a list of paths to search. The returned order is in 

1082 priority with the highest priority paths first. The butler config 

1083 directory will always be at the end of the list. 

1084 """ 

1085 # We can pick up defaults from multiple search paths 

1086 # We fill defaults by using the butler config path and then 

1087 # the config path environment variable in reverse order. 

1088 defaultsPaths = [] 

1089 

1090 if CONFIG_PATH in os.environ: 

1091 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep) 

1092 defaultsPaths.extend(externalPaths) 

1093 

1094 # Find the butler configs 

1095 defaultsPaths.append(os.path.join(lsst.utils.getPackageDir("daf_butler"), "config")) 

1096 

1097 return defaultsPaths 

1098 

1099 def _updateWithConfigsFromPath(self, searchPaths, configFile): 

1100 """Search the supplied paths, merging the configuration values 

1101 

1102 The values read will override values currently stored in the object. 

1103 Every file found in the path will be read, such that the earlier 

1104 path entries have higher priority. 

1105 

1106 Parameters 

1107 ---------- 

1108 searchPaths : `list` 

1109 Paths to search for the supplied configFile. This path 

1110 is the priority order, such that files read from the 

1111 first path entry will be selected over those read from 

1112 a later path. 

1113 configFile : `str` 

1114 File to locate in path. If absolute path it will be read 

1115 directly and the search path will not be used. 

1116 """ 

1117 if os.path.isabs(configFile): 

1118 if os.path.exists(configFile): 

1119 self.filesRead.append(configFile) 

1120 self._updateWithOtherConfigFile(configFile) 

1121 else: 

1122 # Reverse order so that high priority entries 

1123 # update the object last. 

1124 for pathDir in reversed(searchPaths): 

1125 file = os.path.join(pathDir, configFile) 

1126 if os.path.exists(file): 

1127 self.filesRead.append(file) 

1128 self._updateWithOtherConfigFile(file) 

1129 

1130 def _updateWithOtherConfigFile(self, file): 

1131 """Read in some defaults and update. 

1132 

1133 Update the configuration by reading the supplied file as a config 

1134 of this class, and merging such that these values override the 

1135 current values. Contents of the external config are not validated. 

1136 

1137 Parameters 

1138 ---------- 

1139 file : `Config`, `str`, or `dict` 

1140 Entity that can be converted to a `ConfigSubset`. 

1141 """ 

1142 # Use this class to read the defaults so that subsetting can happen 

1143 # correctly. 

1144 externalConfig = type(self)(file, validate=False, mergeDefaults=False) 

1145 self.update(externalConfig) 

1146 

1147 def validate(self): 

1148 """Check that mandatory keys are present in this configuration. 

1149 

1150 Ignored if ``requiredKeys`` is empty.""" 

1151 # Validation 

1152 missing = [k for k in self.requiredKeys if k not in self._data] 

1153 if missing: 

1154 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")