Coverage for python/lsst/daf/butler/core/config.py: 44%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

472 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Configuration control.""" 

25 

26__all__ = ("Config", "ConfigSubset") 

27 

28import collections 

29import copy 

30import io 

31import json 

32import logging 

33import os 

34import pprint 

35import sys 

36from pathlib import Path 

37from typing import IO, Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Union 

38 

39import yaml 

40from lsst.utils import doImport 

41from yaml.representer import Representer 

42 

43from ._butlerUri import ButlerURI 

44 

45yaml.add_representer(collections.defaultdict, Representer.represent_dict) 

46 

47 

48# Config module logger 

49log = logging.getLogger(__name__) 

50 

51# PATH-like environment variable to use for defaults. 

52CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH" 

53 

54try: 

55 yamlLoader = yaml.CSafeLoader 

56except AttributeError: 

57 # Not all installations have the C library 

58 # (but assume for mypy's sake that they're the same) 

59 yamlLoader = yaml.SafeLoader # type: ignore 

60 

61 

62def _doUpdate(d, u): 

63 if not isinstance(u, collections.abc.Mapping) or not isinstance(d, collections.abc.MutableMapping): 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true

64 raise RuntimeError("Only call update with Mapping, not {}".format(type(d))) 

65 for k, v in u.items(): 

66 if isinstance(v, collections.abc.Mapping): 

67 d[k] = _doUpdate(d.get(k, {}), v) 

68 else: 

69 d[k] = v 

70 return d 

71 

72 

73def _checkNextItem(k, d, create, must_be_dict): 

74 """See if k is in d and if it is return the new child.""" 

75 nextVal = None 

76 isThere = False 

77 if d is None: 77 ↛ 79line 77 didn't jump to line 79, because the condition on line 77 was never true

78 # We have gone past the end of the hierarchy 

79 pass 

80 elif not must_be_dict and isinstance(d, collections.abc.Sequence): 80 ↛ 85line 80 didn't jump to line 85, because the condition on line 80 was never true

81 # Check for Sequence first because for lists 

82 # __contains__ checks whether value is found in list 

83 # not whether the index exists in list. When we traverse 

84 # the hierarchy we are interested in the index. 

85 try: 

86 nextVal = d[int(k)] 

87 isThere = True 

88 except IndexError: 

89 pass 

90 except ValueError: 

91 isThere = k in d 

92 elif k in d: 

93 nextVal = d[k] 

94 isThere = True 

95 elif create: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 d[k] = {} 

97 nextVal = d[k] 

98 isThere = True 

99 

100 return nextVal, isThere 

101 

102 

103class Loader(yamlLoader): 

104 """YAML Loader that supports file include directives. 

105 

106 Uses ``!include`` directive in a YAML file to point to another 

107 YAML file to be included. The path in the include directive is relative 

108 to the file containing that directive. 

109 

110 storageClasses: !include storageClasses.yaml 

111 

112 Examples 

113 -------- 

114 >>> with open("document.yaml", "r") as f: 

115 data = yaml.load(f, Loader=Loader) 

116 

117 Notes 

118 ----- 

119 See https://davidchall.github.io/yaml-includes.html 

120 """ 

121 

122 def __init__(self, stream): 

123 super().__init__(stream) 

124 # if this is a string and not a stream we may well lack a name 

125 try: 

126 self._root = ButlerURI(stream.name) 

127 except AttributeError: 

128 # No choice but to assume a local filesystem 

129 self._root = ButlerURI("no-file.yaml") 

130 Loader.add_constructor("!include", Loader.include) 

131 

132 def include(self, node): 

133 result: Union[List[Any], Dict[str, Any]] 

134 if isinstance(node, yaml.ScalarNode): 

135 return self.extractFile(self.construct_scalar(node)) 

136 

137 elif isinstance(node, yaml.SequenceNode): 

138 result = [] 

139 for filename in self.construct_sequence(node): 

140 result.append(self.extractFile(filename)) 

141 return result 

142 

143 elif isinstance(node, yaml.MappingNode): 

144 result = {} 

145 for k, v in self.construct_mapping(node).items(): 

146 result[k] = self.extractFile(v) 

147 return result 

148 

149 else: 

150 print("Error:: unrecognised node type in !include statement", file=sys.stderr) 

151 raise yaml.constructor.ConstructorError 

152 

153 def extractFile(self, filename): 

154 # It is possible for the !include to point to an explicit URI 

155 # instead of a relative URI, therefore we first see if it is 

156 # scheme-less or not. If it has a scheme we use it directly 

157 # if it is scheme-less we use it relative to the file root. 

158 requesteduri = ButlerURI(filename, forceAbsolute=False) 

159 

160 if requesteduri.scheme: 

161 fileuri = requesteduri 

162 else: 

163 fileuri = self._root.updatedFile(filename) 

164 

165 log.debug("Opening YAML file via !include: %s", fileuri) 

166 

167 # Read all the data from the resource 

168 data = fileuri.read() 

169 

170 # Store the bytes into a BytesIO so we can attach a .name 

171 stream = io.BytesIO(data) 

172 stream.name = fileuri.geturl() 

173 return yaml.load(stream, Loader) 

174 

175 

176class Config(collections.abc.MutableMapping): 

177 r"""Implements a datatype that is used by `Butler` for configuration. 

178 

179 It is essentially a `dict` with key/value pairs, including nested dicts 

180 (as values). In fact, it can be initialized with a `dict`. 

181 This is explained next: 

182 

183 Config extends the `dict` api so that hierarchical values may be accessed 

184 with delimited notation or as a tuple. If a string is given the delimiter 

185 is picked up from the first character in that string. For example, 

186 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``, 

187 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome. 

188 If the first character is alphanumeric, no delimiter will be used. 

189 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``. 

190 Unicode characters can be used as the delimiter for distinctiveness if 

191 required. 

192 

193 If a key in the hierarchy starts with a non-alphanumeric character care 

194 should be used to ensure that either the tuple interface is used or 

195 a distinct delimiter is always given in string form. 

196 

197 Finally, the delimiter can be escaped if it is part of a key and also 

198 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in 

199 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is 

200 always better to use a different delimiter in these cases. 

201 

202 Note that adding a multi-level key implicitly creates any nesting levels 

203 that do not exist, but removing multi-level keys does not automatically 

204 remove empty nesting levels. As a result: 

205 

206 >>> c = Config() 

207 >>> c[".a.b"] = 1 

208 >>> del c[".a.b"] 

209 >>> c["a"] 

210 Config({'a': {}}) 

211 

212 Storage formats supported: 

213 

214 - yaml: read and write is supported. 

215 - json: read and write is supported but no ``!include`` directive. 

216 

217 Parameters 

218 ---------- 

219 other : `str` or `Config` or `dict` or `ButlerURI` or `pathlib.Path` 

220 Other source of configuration, can be: 

221 

222 - (`str` or `ButlerURI`) Treated as a URI to a config file. Must end 

223 with ".yaml". 

224 - (`Config`) Copies the other Config's values into this one. 

225 - (`dict`) Copies the values from the dict into this Config. 

226 

227 If `None` is provided an empty `Config` will be created. 

228 """ 

229 

230 _D: str = "→" 

231 """Default internal delimiter to use for components in the hierarchy when 

232 constructing keys for external use (see `Config.names()`).""" 

233 

234 includeKey: ClassVar[str] = "includeConfigs" 

235 """Key used to indicate that another config should be included at this 

236 part of the hierarchy.""" 

237 

238 resourcesPackage: str = "lsst.daf.butler" 

239 """Package to search for default configuration data. The resources 

240 themselves will be within a ``configs`` resource hierarchy.""" 

241 

242 def __init__(self, other=None): 

243 self._data: Dict[str, Any] = {} 

244 self.configFile = None 

245 

246 if other is None: 

247 return 

248 

249 if isinstance(other, Config): 

250 self._data = copy.deepcopy(other._data) 

251 self.configFile = other.configFile 

252 elif isinstance(other, (dict, collections.abc.Mapping)): 

253 # In most cases we have a dict, and it's more efficient 

254 # to check for a dict instance before checking the generic mapping. 

255 self.update(other) 

256 elif isinstance(other, (str, ButlerURI, Path)): 256 ↛ 263line 256 didn't jump to line 263, because the condition on line 256 was never false

257 # if other is a string, assume it is a file path/URI 

258 self.__initFromUri(other) 

259 self._processExplicitIncludes() 

260 else: 

261 # if the config specified by other could not be recognized raise 

262 # a runtime error. 

263 raise RuntimeError(f"A Config could not be loaded from other: {other}") 

264 

265 def ppprint(self): 

266 """Return config as formatted readable string. 

267 

268 Examples 

269 -------- 

270 use: ``pdb> print(myConfigObject.ppprint())`` 

271 

272 Returns 

273 ------- 

274 s : `str` 

275 A prettyprint formatted string representing the config 

276 """ 

277 return pprint.pformat(self._data, indent=2, width=1) 

278 

279 def __repr__(self): 

280 return f"{type(self).__name__}({self._data!r})" 

281 

282 def __str__(self): 

283 return self.ppprint() 

284 

285 def __len__(self): 

286 return len(self._data) 

287 

288 def __iter__(self): 

289 return iter(self._data) 

290 

291 def copy(self): 

292 return type(self)(self) 

293 

294 @classmethod 

295 def fromString(cls, string: str, format: str = "yaml") -> Config: 

296 """Create a new Config instance from a serialized string. 

297 

298 Parameters 

299 ---------- 

300 string : `str` 

301 String containing content in specified format 

302 format : `str`, optional 

303 Format of the supplied string. Can be ``json`` or ``yaml``. 

304 

305 Returns 

306 ------- 

307 c : `Config` 

308 Newly-constructed Config. 

309 """ 

310 if format == "yaml": 

311 new_config = cls().__initFromYaml(string) 

312 elif format == "json": 

313 new_config = cls().__initFromJson(string) 

314 else: 

315 raise ValueError(f"Unexpected format of string: {format}") 

316 new_config._processExplicitIncludes() 

317 return new_config 

318 

319 @classmethod 

320 def fromYaml(cls, string: str) -> Config: 

321 """Create a new Config instance from a YAML string. 

322 

323 Parameters 

324 ---------- 

325 string : `str` 

326 String containing content in YAML format 

327 

328 Returns 

329 ------- 

330 c : `Config` 

331 Newly-constructed Config. 

332 """ 

333 return cls.fromString(string, format="yaml") 

334 

335 def __initFromUri(self, path: Union[str, ButlerURI, Path]) -> None: 

336 """Load a file from a path or an URI. 

337 

338 Parameters 

339 ---------- 

340 path : `str` 

341 Path or a URI to a persisted config file. 

342 """ 

343 uri = ButlerURI(path) 

344 ext = uri.getExtension() 

345 if ext == ".yaml": 345 ↛ 352line 345 didn't jump to line 352, because the condition on line 345 was never false

346 log.debug("Opening YAML config file: %s", uri.geturl()) 

347 content = uri.read() 

348 # Use a stream so we can name it 

349 stream = io.BytesIO(content) 

350 stream.name = uri.geturl() 

351 self.__initFromYaml(stream) 

352 elif ext == ".json": 

353 log.debug("Opening JSON config file: %s", uri.geturl()) 

354 content = uri.read() 

355 self.__initFromJson(content) 

356 else: 

357 # This URI does not have a valid extension. It might be because 

358 # we ended up with a directory and not a file. Before we complain 

359 # about an extension, do an existence check. No need to do 

360 # the (possibly expensive) existence check in the default code 

361 # path above because we will find out soon enough that the file 

362 # is not there. 

363 if not uri.exists(): 

364 raise FileNotFoundError(f"Config location {uri} does not exist.") 

365 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}") 

366 self.configFile = uri 

367 

368 def __initFromYaml(self, stream): 

369 """Load a YAML config from any readable stream that contains one. 

370 

371 Parameters 

372 ---------- 

373 stream: `IO` or `str` 

374 Stream to pass to the YAML loader. Accepts anything that 

375 `yaml.load` accepts. This can include a string as well as an 

376 IO stream. 

377 

378 Raises 

379 ------ 

380 yaml.YAMLError 

381 If there is an error loading the file. 

382 """ 

383 content = yaml.load(stream, Loader=Loader) 

384 if content is None: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true

385 content = {} 

386 self._data = content 

387 return self 

388 

389 def __initFromJson(self, stream): 

390 """Load a JSON config from any readable stream that contains one. 

391 

392 Parameters 

393 ---------- 

394 stream: `IO` or `str` 

395 Stream to pass to the JSON loader. This can include a string as 

396 well as an IO stream. 

397 

398 Raises 

399 ------ 

400 TypeError: 

401 Raised if there is an error loading the content. 

402 """ 

403 if isinstance(stream, (bytes, str)): 

404 content = json.loads(stream) 

405 else: 

406 content = json.load(stream) 

407 if content is None: 

408 content = {} 

409 self._data = content 

410 return self 

411 

412 def _processExplicitIncludes(self): 

413 """Scan through the configuration searching for the special includes. 

414 

415 Looks for ``includeConfigs`` directive and processes the includes. 

416 """ 

417 # Search paths for config files 

418 searchPaths = [ButlerURI(os.path.curdir, forceDirectory=True)] 

419 if self.configFile is not None: 419 ↛ 427line 419 didn't jump to line 427, because the condition on line 419 was never false

420 if isinstance(self.configFile, ButlerURI): 420 ↛ 423line 420 didn't jump to line 423, because the condition on line 420 was never false

421 configDir = self.configFile.dirname() 

422 else: 

423 raise RuntimeError(f"Unexpected type for config file: {self.configFile}") 

424 searchPaths.append(configDir) 

425 

426 # Ensure we know what delimiter to use 

427 names = self.nameTuples() 

428 for path in names: 

429 if path[-1] == self.includeKey: 429 ↛ 431line 429 didn't jump to line 431, because the condition on line 429 was never true

430 

431 log.debug("Processing file include directive at %s", self._D + self._D.join(path)) 

432 basePath = path[:-1] 

433 

434 # Extract the includes and then delete them from the config 

435 includes = self[path] 

436 del self[path] 

437 

438 # Be consistent and convert to a list 

439 if not isinstance(includes, list): 

440 includes = [includes] 

441 

442 # Read each file assuming it is a reference to a file 

443 # The file can be relative to config file or cwd 

444 # ConfigSubset search paths are not used 

445 subConfigs = [] 

446 for fileName in includes: 

447 # Expand any shell variables -- this could be URI 

448 fileName = ButlerURI(os.path.expandvars(fileName), forceAbsolute=False) 

449 found = None 

450 if fileName.isabs(): 

451 found = fileName 

452 else: 

453 for dir in searchPaths: 

454 if isinstance(dir, ButlerURI): 

455 specific = dir.join(fileName.path) 

456 # Remote resource check might be expensive 

457 if specific.exists(): 

458 found = specific 

459 else: 

460 log.warning( 

461 "Do not understand search path entry '%s' of type %s", 

462 dir, 

463 type(dir).__name__, 

464 ) 

465 if not found: 

466 raise RuntimeError(f"Unable to find referenced include file: {fileName}") 

467 

468 # Read the referenced Config as a Config 

469 subConfigs.append(type(self)(found)) 

470 

471 # Now we need to merge these sub configs with the current 

472 # information that was present in this node in the config 

473 # tree with precedence given to the explicit values 

474 newConfig = subConfigs.pop(0) 

475 for sc in subConfigs: 

476 newConfig.update(sc) 

477 

478 # Explicit values take precedence 

479 if not basePath: 

480 # This is an include at the root config 

481 newConfig.update(self) 

482 # Replace the current config 

483 self._data = newConfig._data 

484 else: 

485 newConfig.update(self[basePath]) 

486 # And reattach to the base config 

487 self[basePath] = newConfig 

488 

489 @staticmethod 

490 def _splitIntoKeys(key): 

491 r"""Split the argument for get/set/in into a hierarchical list. 

492 

493 Parameters 

494 ---------- 

495 key : `str` or iterable 

496 Argument given to get/set/in. If an iterable is provided it will 

497 be converted to a list. If the first character of the string 

498 is not an alphanumeric character then it will be used as the 

499 delimiter for the purposes of splitting the remainder of the 

500 string. If the delimiter is also in one of the keys then it 

501 can be escaped using ``\``. There is no default delimiter. 

502 

503 Returns 

504 ------- 

505 keys : `list` 

506 Hierarchical keys as a `list`. 

507 """ 

508 if isinstance(key, str): 

509 if not key[0].isalnum(): 509 ↛ 510line 509 didn't jump to line 510, because the condition on line 509 was never true

510 d = key[0] 

511 key = key[1:] 

512 else: 

513 return [ 

514 key, 

515 ] 

516 escaped = f"\\{d}" 

517 temp = None 

518 if escaped in key: 

519 # Complain at the attempt to escape the escape 

520 doubled = fr"\{escaped}" 

521 if doubled in key: 

522 raise ValueError( 

523 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported." 

524 ) 

525 # Replace with a character that won't be in the string 

526 temp = "\r" 

527 if temp in key or d == temp: 

528 raise ValueError( 

529 f"Can not use character {temp!r} in hierarchical key or as" 

530 " delimiter if escaping the delimiter" 

531 ) 

532 key = key.replace(escaped, temp) 

533 hierarchy = key.split(d) 

534 if temp: 

535 hierarchy = [h.replace(temp, d) for h in hierarchy] 

536 return hierarchy 

537 elif isinstance(key, collections.abc.Iterable): 537 ↛ 541line 537 didn't jump to line 541, because the condition on line 537 was never false

538 return list(key) 

539 else: 

540 # Not sure what this is so try it anyway 

541 return [ 

542 key, 

543 ] 

544 

545 def _getKeyHierarchy(self, name): 

546 """Retrieve the key hierarchy for accessing the Config. 

547 

548 Parameters 

549 ---------- 

550 name : `str` or `tuple` 

551 Delimited string or `tuple` of hierarchical keys. 

552 

553 Returns 

554 ------- 

555 hierarchy : `list` of `str` 

556 Hierarchy to use as a `list`. If the name is available directly 

557 as a key in the Config it will be used regardless of the presence 

558 of any nominal delimiter. 

559 """ 

560 if name in self._data: 

561 keys = [ 

562 name, 

563 ] 

564 else: 

565 keys = self._splitIntoKeys(name) 

566 return keys 

567 

568 def _findInHierarchy(self, keys, create=False): 

569 """Look for hierarchy of keys in Config. 

570 

571 Parameters 

572 ---------- 

573 keys : `list` or `tuple` 

574 Keys to search in hierarchy. 

575 create : `bool`, optional 

576 If `True`, if a part of the hierarchy does not exist, insert an 

577 empty `dict` into the hierarchy. 

578 

579 Returns 

580 ------- 

581 hierarchy : `list` 

582 List of the value corresponding to each key in the supplied 

583 hierarchy. Only keys that exist in the hierarchy will have 

584 a value. 

585 complete : `bool` 

586 `True` if the full hierarchy exists and the final element 

587 in ``hierarchy`` is the value of relevant value. 

588 """ 

589 d = self._data 

590 

591 # For the first key, d must be a dict so it is a waste 

592 # of time to check for a sequence. 

593 must_be_dict = True 

594 

595 hierarchy = [] 

596 complete = True 

597 for k in keys: 

598 d, isThere = _checkNextItem(k, d, create, must_be_dict) 

599 if isThere: 

600 hierarchy.append(d) 

601 else: 

602 complete = False 

603 break 

604 # Second time round it might be a sequence. 

605 must_be_dict = False 

606 

607 return hierarchy, complete 

608 

609 def __getitem__(self, name): 

610 # Override the split for the simple case where there is an exact 

611 # match. This allows `Config.items()` to work via a simple 

612 # __iter__ implementation that returns top level keys of 

613 # self._data. 

614 

615 # If the name matches a key in the top-level hierarchy, bypass 

616 # all further cleverness. 

617 found_directly = False 

618 try: 

619 data = self._data[name] 

620 found_directly = True 

621 except KeyError: 

622 pass 

623 

624 if not found_directly: 624 ↛ 625line 624 didn't jump to line 625, because the condition on line 624 was never true

625 keys = self._getKeyHierarchy(name) 

626 

627 hierarchy, complete = self._findInHierarchy(keys) 

628 if not complete: 

629 raise KeyError(f"{name} not found") 

630 data = hierarchy[-1] 

631 

632 # In most cases we have a dict, and it's more efficient 

633 # to check for a dict instance before checking the generic mapping. 

634 if isinstance(data, (dict, collections.abc.Mapping)): 

635 data = Config(data) 

636 # Ensure that child configs inherit the parent internal delimiter 

637 if self._D != Config._D: 637 ↛ 638line 637 didn't jump to line 638, because the condition on line 637 was never true

638 data._D = self._D 

639 return data 

640 

641 def __setitem__(self, name, value): 

642 keys = self._getKeyHierarchy(name) 

643 last = keys.pop() 

644 if isinstance(value, Config): 

645 value = copy.deepcopy(value._data) 

646 

647 hierarchy, complete = self._findInHierarchy(keys, create=True) 

648 if hierarchy: 

649 data = hierarchy[-1] 

650 else: 

651 data = self._data 

652 

653 try: 

654 data[last] = value 

655 except TypeError: 

656 data[int(last)] = value 

657 

658 def __contains__(self, key): 

659 keys = self._getKeyHierarchy(key) 

660 hierarchy, complete = self._findInHierarchy(keys) 

661 return complete 

662 

663 def __delitem__(self, key): 

664 keys = self._getKeyHierarchy(key) 

665 last = keys.pop() 

666 hierarchy, complete = self._findInHierarchy(keys) 

667 if complete: 667 ↛ 674line 667 didn't jump to line 674, because the condition on line 667 was never false

668 if hierarchy: 668 ↛ 669line 668 didn't jump to line 669, because the condition on line 668 was never true

669 data = hierarchy[-1] 

670 else: 

671 data = self._data 

672 del data[last] 

673 else: 

674 raise KeyError(f"{key} not found in Config") 

675 

676 def update(self, other): 

677 """Update config from other `Config` or `dict`. 

678 

679 Like `dict.update()`, but will add or modify keys in nested dicts, 

680 instead of overwriting the nested dict entirely. 

681 

682 Parameters 

683 ---------- 

684 other : `dict` or `Config` 

685 Source of configuration: 

686 

687 Examples 

688 -------- 

689 >>> c = Config({"a": {"b": 1}}) 

690 >>> c.update({"a": {"c": 2}}) 

691 >>> print(c) 

692 {'a': {'b': 1, 'c': 2}} 

693 

694 >>> foo = {"a": {"b": 1}} 

695 >>> foo.update({"a": {"c": 2}}) 

696 >>> print(foo) 

697 {'a': {'c': 2}} 

698 """ 

699 _doUpdate(self._data, other) 

700 

701 def merge(self, other): 

702 """Merge another Config into this one. 

703 

704 Like `Config.update()`, but will add keys & values from other that 

705 DO NOT EXIST in self. 

706 

707 Keys and values that already exist in self will NOT be overwritten. 

708 

709 Parameters 

710 ---------- 

711 other : `dict` or `Config` 

712 Source of configuration: 

713 """ 

714 if not isinstance(other, collections.abc.Mapping): 

715 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}") 

716 

717 # Convert the supplied mapping to a Config for consistency 

718 # This will do a deepcopy if it is already a Config 

719 otherCopy = Config(other) 

720 otherCopy.update(self) 

721 self._data = otherCopy._data 

722 

723 def nameTuples(self, topLevelOnly=False): 

724 """Get tuples representing the name hierarchies of all keys. 

725 

726 The tuples returned from this method are guaranteed to be usable 

727 to access items in the configuration object. 

728 

729 Parameters 

730 ---------- 

731 topLevelOnly : `bool`, optional 

732 If False, the default, a full hierarchy of names is returned. 

733 If True, only the top level are returned. 

734 

735 Returns 

736 ------- 

737 names : `list` of `tuple` of `str` 

738 List of all names present in the `Config` where each element 

739 in the list is a `tuple` of strings representing the hierarchy. 

740 """ 

741 if topLevelOnly: 741 ↛ 742line 741 didn't jump to line 742, because the condition on line 741 was never true

742 return list((k,) for k in self) 

743 

744 def getKeysAsTuples(d, keys, base): 

745 if isinstance(d, collections.abc.Sequence): 

746 theseKeys = range(len(d)) 

747 else: 

748 theseKeys = d.keys() 

749 for key in theseKeys: 

750 val = d[key] 

751 levelKey = base + (key,) if base is not None else (key,) 

752 keys.append(levelKey) 

753 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) and not isinstance( 

754 val, str 

755 ): 

756 getKeysAsTuples(val, keys, levelKey) 

757 

758 keys: List[Tuple[str, ...]] = [] 

759 getKeysAsTuples(self._data, keys, None) 

760 return keys 

761 

762 def names(self, topLevelOnly=False, delimiter=None): 

763 """Get a delimited name of all the keys in the hierarchy. 

764 

765 The values returned from this method are guaranteed to be usable 

766 to access items in the configuration object. 

767 

768 Parameters 

769 ---------- 

770 topLevelOnly : `bool`, optional 

771 If False, the default, a full hierarchy of names is returned. 

772 If True, only the top level are returned. 

773 delimiter : `str`, optional 

774 Delimiter to use when forming the keys. If the delimiter is 

775 present in any of the keys, it will be escaped in the returned 

776 names. If `None` given a delimiter will be automatically provided. 

777 The delimiter can not be alphanumeric. 

778 

779 Returns 

780 ------- 

781 names : `list` of `str` 

782 List of all names present in the `Config`. 

783 

784 Notes 

785 ----- 

786 This is different than the built-in method `dict.keys`, which will 

787 return only the first level keys. 

788 

789 Raises 

790 ------ 

791 ValueError: 

792 The supplied delimiter is alphanumeric. 

793 """ 

794 if topLevelOnly: 

795 return list(self.keys()) 

796 

797 # Get all the tuples of hierarchical keys 

798 nameTuples = self.nameTuples() 

799 

800 if delimiter is not None and delimiter.isalnum(): 

801 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.") 

802 

803 if delimiter is None: 

804 # Start with something, and ensure it does not need to be 

805 # escaped (it is much easier to understand if not escaped) 

806 delimiter = self._D 

807 

808 # Form big string for easy check of delimiter clash 

809 combined = "".join("".join(str(s) for s in k) for k in nameTuples) 

810 

811 # Try a delimiter and keep trying until we get something that 

812 # works. 

813 ntries = 0 

814 while delimiter in combined: 

815 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter) 

816 ntries += 1 

817 

818 if ntries > 100: 

819 raise ValueError(f"Unable to determine a delimiter for Config {self}") 

820 

821 # try another one 

822 while True: 

823 delimiter = chr(ord(delimiter) + 1) 

824 if not delimiter.isalnum(): 

825 break 

826 

827 log.debug("Using delimiter %r", delimiter) 

828 

829 # Form the keys, escaping the delimiter if necessary 

830 strings = [ 

831 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k) 

832 for k in nameTuples 

833 ] 

834 return strings 

835 

836 def asArray(self, name): 

837 """Get a value as an array. 

838 

839 May contain one or more elements. 

840 

841 Parameters 

842 ---------- 

843 name : `str` 

844 Key to use to retrieve value. 

845 

846 Returns 

847 ------- 

848 array : `collections.abc.Sequence` 

849 The value corresponding to name, but guaranteed to be returned 

850 as a list with at least one element. If the value is a 

851 `~collections.abc.Sequence` (and not a `str`) the value itself 

852 will be returned, else the value will be the first element. 

853 """ 

854 val = self.get(name) 

855 if isinstance(val, str): 

856 val = [val] 

857 elif not isinstance(val, collections.abc.Sequence): 

858 val = [val] 

859 return val 

860 

861 def __eq__(self, other): 

862 if isinstance(other, Config): 

863 other = other._data 

864 return self._data == other 

865 

866 def __ne__(self, other): 

867 if isinstance(other, Config): 

868 other = other._data 

869 return self._data != other 

870 

871 ####### 

872 # i/o # 

873 

874 def dump(self, output: Optional[IO] = None, format: str = "yaml") -> Optional[str]: 

875 """Write the config to an output stream. 

876 

877 Parameters 

878 ---------- 

879 output : `IO`, optional 

880 The stream to use for output. If `None` the serialized content 

881 will be returned. 

882 format : `str`, optional 

883 The format to use for the output. Can be "yaml" or "json". 

884 

885 Returns 

886 ------- 

887 serialized : `str` or `None` 

888 If a stream was given the stream will be used and the return 

889 value will be `None`. If the stream was `None` the 

890 serialization will be returned as a string. 

891 """ 

892 if format == "yaml": 

893 return yaml.safe_dump(self._data, output, default_flow_style=False) 

894 elif format == "json": 

895 if output is not None: 

896 json.dump(self._data, output, ensure_ascii=False) 

897 return None 

898 else: 

899 return json.dumps(self._data, ensure_ascii=False) 

900 raise ValueError(f"Unsupported format for Config serialization: {format}") 

901 

902 def dumpToUri( 

903 self, 

904 uri: Union[ButlerURI, str], 

905 updateFile: bool = True, 

906 defaultFileName: str = "butler.yaml", 

907 overwrite: bool = True, 

908 ) -> None: 

909 """Write the config to location pointed to by given URI. 

910 

911 Currently supports 's3' and 'file' URI schemes. 

912 

913 Parameters 

914 ---------- 

915 uri: `str` or `ButlerURI` 

916 URI of location where the Config will be written. 

917 updateFile : bool, optional 

918 If True and uri does not end on a filename with extension, will 

919 append `defaultFileName` to the target uri. True by default. 

920 defaultFileName : bool, optional 

921 The file name that will be appended to target uri if updateFile is 

922 True and uri does not end on a file with an extension. 

923 overwrite : bool, optional 

924 If True the configuration will be written even if it already 

925 exists at that location. 

926 """ 

927 # Make local copy of URI or create new one 

928 uri = ButlerURI(uri) 

929 

930 if updateFile and not uri.getExtension(): 

931 uri = uri.updatedFile(defaultFileName) 

932 

933 # Try to work out the format from the extension 

934 ext = uri.getExtension() 

935 format = ext[1:].lower() 

936 

937 output = self.dump(format=format) 

938 assert output is not None, "Config.dump guarantees not-None return when output arg is None" 

939 uri.write(output.encode(), overwrite=overwrite) 

940 self.configFile = uri 

941 

942 @staticmethod 

943 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True, toMerge=None): 

944 """Update specific config parameters. 

945 

946 Allows for named parameters to be set to new values in bulk, and 

947 for other values to be set by copying from a reference config. 

948 

949 Assumes that the supplied config is compatible with ``configType`` 

950 and will attach the updated values to the supplied config by 

951 looking for the related component key. It is assumed that 

952 ``config`` and ``full`` are from the same part of the 

953 configuration hierarchy. 

954 

955 Parameters 

956 ---------- 

957 configType : `ConfigSubset` 

958 Config type to use to extract relevant items from ``config``. 

959 config : `Config` 

960 A `Config` to update. Only the subset understood by 

961 the supplied `ConfigSubset` will be modified. Default values 

962 will not be inserted and the content will not be validated 

963 since mandatory keys are allowed to be missing until 

964 populated later by merging. 

965 full : `Config` 

966 A complete config with all defaults expanded that can be 

967 converted to a ``configType``. Read-only and will not be 

968 modified by this method. Values are read from here if 

969 ``toCopy`` is defined. 

970 

971 Repository-specific options that should not be obtained 

972 from defaults when Butler instances are constructed 

973 should be copied from ``full`` to ``config``. 

974 toUpdate : `dict`, optional 

975 A `dict` defining the keys to update and the new value to use. 

976 The keys and values can be any supported by `Config` 

977 assignment. 

978 toCopy : `tuple`, optional 

979 `tuple` of keys whose values should be copied from ``full`` 

980 into ``config``. 

981 overwrite : `bool`, optional 

982 If `False`, do not modify a value in ``config`` if the key 

983 already exists. Default is always to overwrite. 

984 toMerge : `tuple`, optional 

985 Keys to merge content from full to config without overwriting 

986 pre-existing values. Only works if the key refers to a hierarchy. 

987 The ``overwrite`` flag is ignored. 

988 

989 Raises 

990 ------ 

991 ValueError 

992 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined. 

993 """ 

994 if toUpdate is None and toCopy is None and toMerge is None: 

995 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.") 

996 

997 # If this is a parent configuration then we need to ensure that 

998 # the supplied config has the relevant component key in it. 

999 # If this is a parent configuration we add in the stub entry 

1000 # so that the ConfigSubset constructor will do the right thing. 

1001 # We check full for this since that is guaranteed to be complete. 

1002 if configType.component in full and configType.component not in config: 

1003 config[configType.component] = {} 

1004 

1005 # Extract the part of the config we wish to update 

1006 localConfig = configType(config, mergeDefaults=False, validate=False) 

1007 

1008 if toUpdate: 

1009 for key, value in toUpdate.items(): 

1010 if key in localConfig and not overwrite: 

1011 log.debug( 

1012 "Not overriding key '%s' with value '%s' in config %s", 

1013 key, 

1014 value, 

1015 localConfig.__class__.__name__, 

1016 ) 

1017 else: 

1018 localConfig[key] = value 

1019 

1020 if toCopy or toMerge: 

1021 localFullConfig = configType(full, mergeDefaults=False) 

1022 

1023 if toCopy: 

1024 for key in toCopy: 

1025 if key in localConfig and not overwrite: 

1026 log.debug( 

1027 "Not overriding key '%s' from defaults in config %s", 

1028 key, 

1029 localConfig.__class__.__name__, 

1030 ) 

1031 else: 

1032 localConfig[key] = localFullConfig[key] 

1033 if toMerge: 

1034 for key in toMerge: 

1035 if key in localConfig: 

1036 # Get the node from the config to do the merge 

1037 # but then have to reattach to the config. 

1038 subset = localConfig[key] 

1039 subset.merge(localFullConfig[key]) 

1040 localConfig[key] = subset 

1041 else: 

1042 localConfig[key] = localFullConfig[key] 

1043 

1044 # Reattach to parent if this is a child config 

1045 if configType.component in config: 

1046 config[configType.component] = localConfig 

1047 else: 

1048 config.update(localConfig) 

1049 

1050 def toDict(self): 

1051 """Convert a `Config` to a standalone hierarchical `dict`. 

1052 

1053 Returns 

1054 ------- 

1055 d : `dict` 

1056 The standalone hierarchical `dict` with any `Config` classes 

1057 in the hierarchy converted to `dict`. 

1058 

1059 Notes 

1060 ----- 

1061 This can be useful when passing a Config to some code that 

1062 expects native Python types. 

1063 """ 

1064 output = copy.deepcopy(self._data) 

1065 for k, v in output.items(): 

1066 if isinstance(v, Config): 1066 ↛ 1067line 1066 didn't jump to line 1067, because the condition on line 1066 was never true

1067 v = v.toDict() 

1068 output[k] = v 

1069 return output 

1070 

1071 

1072class ConfigSubset(Config): 

1073 """Config representing a subset of a more general configuration. 

1074 

1075 Subclasses define their own component and when given a configuration 

1076 that includes that component, the resulting configuration only includes 

1077 the subset. For example, your config might contain ``dimensions`` if it's 

1078 part of a global config and that subset will be stored. If ``dimensions`` 

1079 can not be found it is assumed that the entire contents of the 

1080 configuration should be used. 

1081 

1082 Default values are read from the environment or supplied search paths 

1083 using the default configuration file name specified in the subclass. 

1084 This allows a configuration class to be instantiated without any 

1085 additional arguments. 

1086 

1087 Additional validation can be specified to check for keys that are mandatory 

1088 in the configuration. 

1089 

1090 Parameters 

1091 ---------- 

1092 other : `Config` or `str` or `dict` 

1093 Argument specifying the configuration information as understood 

1094 by `Config` 

1095 validate : `bool`, optional 

1096 If `True` required keys will be checked to ensure configuration 

1097 consistency. 

1098 mergeDefaults : `bool`, optional 

1099 If `True` defaults will be read and the supplied config will 

1100 be combined with the defaults, with the supplied values taking 

1101 precedence. 

1102 searchPaths : `list` or `tuple`, optional 

1103 Explicit additional paths to search for defaults. They should 

1104 be supplied in priority order. These paths have higher priority 

1105 than those read from the environment in 

1106 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to 

1107 the local file system or URIs, `ButlerURI`. 

1108 """ 

1109 

1110 component: ClassVar[Optional[str]] = None 

1111 """Component to use from supplied config. Can be None. If specified the 

1112 key is not required. Can be a full dot-separated path to a component. 

1113 """ 

1114 

1115 requiredKeys: ClassVar[Sequence[str]] = () 

1116 """Keys that are required to be specified in the configuration. 

1117 """ 

1118 

1119 defaultConfigFile: ClassVar[Optional[str]] = None 

1120 """Name of the file containing defaults for this config class. 

1121 """ 

1122 

1123 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None): 

1124 

1125 # Create a blank object to receive the defaults 

1126 # Once we have the defaults we then update with the external values 

1127 super().__init__() 

1128 

1129 # Create a standard Config rather than subset 

1130 externalConfig = Config(other) 

1131 

1132 # Select the part we need from it 

1133 # To simplify the use of !include we also check for the existence of 

1134 # component.component (since the included files can themselves 

1135 # include the component name) 

1136 if self.component is not None: 1136 ↛ 1145line 1136 didn't jump to line 1145, because the condition on line 1136 was never false

1137 doubled = (self.component, self.component) 

1138 # Must check for double depth first 

1139 if doubled in externalConfig: 1139 ↛ 1140line 1139 didn't jump to line 1140, because the condition on line 1139 was never true

1140 externalConfig = externalConfig[doubled] 

1141 elif self.component in externalConfig: 

1142 externalConfig._data = externalConfig._data[self.component] 

1143 

1144 # Default files read to create this configuration 

1145 self.filesRead = [] 

1146 

1147 # Assume we are not looking up child configurations 

1148 containerKey = None 

1149 

1150 # Sometimes we do not want to merge with defaults. 

1151 if mergeDefaults: 

1152 

1153 # Supplied search paths have highest priority 

1154 fullSearchPath = [] 

1155 if searchPaths: 1155 ↛ 1156line 1155 didn't jump to line 1156, because the condition on line 1155 was never true

1156 fullSearchPath.extend(searchPaths) 

1157 

1158 # Read default paths from environment 

1159 fullSearchPath.extend(self.defaultSearchPaths()) 

1160 

1161 # There are two places to find defaults for this particular config 

1162 # - The "defaultConfigFile" defined in the subclass 

1163 # - The class specified in the "cls" element in the config. 

1164 # Read cls after merging in case it changes. 

1165 if self.defaultConfigFile is not None: 1165 ↛ 1170line 1165 didn't jump to line 1170, because the condition on line 1165 was never false

1166 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile) 

1167 

1168 # Can have a class specification in the external config (priority) 

1169 # or from the defaults. 

1170 pytype = None 

1171 if "cls" in externalConfig: 1171 ↛ 1172line 1171 didn't jump to line 1172, because the condition on line 1171 was never true

1172 pytype = externalConfig["cls"] 

1173 elif "cls" in self: 1173 ↛ 1174line 1173 didn't jump to line 1174, because the condition on line 1173 was never true

1174 pytype = self["cls"] 

1175 

1176 if pytype is not None: 1176 ↛ 1177line 1176 didn't jump to line 1177, because the condition on line 1176 was never true

1177 try: 

1178 cls = doImport(pytype) 

1179 except ImportError as e: 

1180 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e 

1181 defaultsFile = cls.defaultConfigFile 

1182 if defaultsFile is not None: 

1183 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile) 

1184 

1185 # Get the container key in case we need it 

1186 try: 

1187 containerKey = cls.containerKey 

1188 except AttributeError: 

1189 pass 

1190 

1191 # Now update this object with the external values so that the external 

1192 # values always override the defaults 

1193 self.update(externalConfig) 

1194 

1195 # If this configuration has child configurations of the same 

1196 # config class, we need to expand those defaults as well. 

1197 

1198 if mergeDefaults and containerKey is not None and containerKey in self: 1198 ↛ 1199line 1198 didn't jump to line 1199, because the condition on line 1198 was never true

1199 for idx, subConfig in enumerate(self[containerKey]): 

1200 self[containerKey, idx] = type(self)( 

1201 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths 

1202 ) 

1203 

1204 if validate: 

1205 self.validate() 

1206 

1207 @classmethod 

1208 def defaultSearchPaths(cls): 

1209 """Read environment to determine search paths to use. 

1210 

1211 Global defaults, at lowest priority, are found in the ``config`` 

1212 directory of the butler source tree. Additional defaults can be 

1213 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS`` 

1214 which is a PATH-like variable where paths at the front of the list 

1215 have priority over those later. 

1216 

1217 Returns 

1218 ------- 

1219 paths : `list` 

1220 Returns a list of paths to search. The returned order is in 

1221 priority with the highest priority paths first. The butler config 

1222 configuration resources will not be included here but will 

1223 always be searched last. 

1224 

1225 Notes 

1226 ----- 

1227 The environment variable is split on the standard ``:`` path separator. 

1228 This currently makes it incompatible with usage of URIs. 

1229 """ 

1230 # We can pick up defaults from multiple search paths 

1231 # We fill defaults by using the butler config path and then 

1232 # the config path environment variable in reverse order. 

1233 defaultsPaths: List[Union[str, ButlerURI]] = [] 

1234 

1235 if CONFIG_PATH in os.environ: 1235 ↛ 1236line 1235 didn't jump to line 1236, because the condition on line 1235 was never true

1236 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep) 

1237 defaultsPaths.extend(externalPaths) 

1238 

1239 # Add the package defaults as a resource 

1240 defaultsPaths.append(ButlerURI(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True)) 

1241 return defaultsPaths 

1242 

1243 def _updateWithConfigsFromPath(self, searchPaths, configFile): 

1244 """Search the supplied paths, merging the configuration values. 

1245 

1246 The values read will override values currently stored in the object. 

1247 Every file found in the path will be read, such that the earlier 

1248 path entries have higher priority. 

1249 

1250 Parameters 

1251 ---------- 

1252 searchPaths : `list` of `ButlerURI`, `str` 

1253 Paths to search for the supplied configFile. This path 

1254 is the priority order, such that files read from the 

1255 first path entry will be selected over those read from 

1256 a later path. Can contain `str` referring to the local file 

1257 system or a URI string. 

1258 configFile : `ButlerURI` 

1259 File to locate in path. If absolute path it will be read 

1260 directly and the search path will not be used. Can be a URI 

1261 to an explicit resource (which will ignore the search path) 

1262 which is assumed to exist. 

1263 """ 

1264 uri = ButlerURI(configFile) 

1265 if uri.isabs() and uri.exists(): 1265 ↛ 1267line 1265 didn't jump to line 1267, because the condition on line 1265 was never true

1266 # Assume this resource exists 

1267 self._updateWithOtherConfigFile(configFile) 

1268 self.filesRead.append(configFile) 

1269 else: 

1270 # Reverse order so that high priority entries 

1271 # update the object last. 

1272 for pathDir in reversed(searchPaths): 

1273 if isinstance(pathDir, (str, ButlerURI)): 1273 ↛ 1280line 1273 didn't jump to line 1280, because the condition on line 1273 was never false

1274 pathDir = ButlerURI(pathDir, forceDirectory=True) 

1275 file = pathDir.join(configFile) 

1276 if file.exists(): 1276 ↛ 1272line 1276 didn't jump to line 1272, because the condition on line 1276 was never false

1277 self.filesRead.append(file) 

1278 self._updateWithOtherConfigFile(file) 

1279 else: 

1280 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}") 

1281 

1282 def _updateWithOtherConfigFile(self, file): 

1283 """Read in some defaults and update. 

1284 

1285 Update the configuration by reading the supplied file as a config 

1286 of this class, and merging such that these values override the 

1287 current values. Contents of the external config are not validated. 

1288 

1289 Parameters 

1290 ---------- 

1291 file : `Config`, `str`, `ButlerURI`, or `dict` 

1292 Entity that can be converted to a `ConfigSubset`. 

1293 """ 

1294 # Use this class to read the defaults so that subsetting can happen 

1295 # correctly. 

1296 externalConfig = type(self)(file, validate=False, mergeDefaults=False) 

1297 self.update(externalConfig) 

1298 

1299 def validate(self): 

1300 """Check that mandatory keys are present in this configuration. 

1301 

1302 Ignored if ``requiredKeys`` is empty. 

1303 """ 

1304 # Validation 

1305 missing = [k for k in self.requiredKeys if k not in self._data] 

1306 if missing: 1306 ↛ 1307line 1306 didn't jump to line 1307, because the condition on line 1306 was never true

1307 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")