Coverage for python/lsst/daf/butler/core/config.py: 42%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

472 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Configuration control.""" 

25 

26__all__ = ("Config", "ConfigSubset") 

27 

28import collections 

29import copy 

30import json 

31import logging 

32import pprint 

33import os 

34import yaml 

35import sys 

36from pathlib import Path 

37from yaml.representer import Representer 

38import io 

39from typing import Any, Dict, List, Sequence, Optional, ClassVar, IO, Tuple, Union 

40 

41from lsst.utils import doImport 

42from ._butlerUri import ButlerURI 

43 

44yaml.add_representer(collections.defaultdict, Representer.represent_dict) 

45 

46 

47# Config module logger 

48log = logging.getLogger(__name__) 

49 

50# PATH-like environment variable to use for defaults. 

51CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH" 

52 

53try: 

54 yamlLoader = yaml.CSafeLoader 

55except AttributeError: 

56 # Not all installations have the C library 

57 # (but assume for mypy's sake that they're the same) 

58 yamlLoader = yaml.SafeLoader # type: ignore 

59 

60 

61def _doUpdate(d, u): 

62 if not isinstance(u, collections.abc.Mapping) or \ 62 ↛ 64line 62 didn't jump to line 64, because the condition on line 62 was never true

63 not isinstance(d, collections.abc.MutableMapping): 

64 raise RuntimeError("Only call update with Mapping, not {}".format(type(d))) 

65 for k, v in u.items(): 

66 if isinstance(v, collections.abc.Mapping): 

67 d[k] = _doUpdate(d.get(k, {}), v) 

68 else: 

69 d[k] = v 

70 return d 

71 

72 

73def _checkNextItem(k, d, create, must_be_dict): 

74 """See if k is in d and if it is return the new child.""" 

75 nextVal = None 

76 isThere = False 

77 if d is None: 77 ↛ 79line 77 didn't jump to line 79, because the condition on line 77 was never true

78 # We have gone past the end of the hierarchy 

79 pass 

80 elif not must_be_dict and isinstance(d, collections.abc.Sequence): 80 ↛ 85line 80 didn't jump to line 85, because the condition on line 80 was never true

81 # Check for Sequence first because for lists 

82 # __contains__ checks whether value is found in list 

83 # not whether the index exists in list. When we traverse 

84 # the hierarchy we are interested in the index. 

85 try: 

86 nextVal = d[int(k)] 

87 isThere = True 

88 except IndexError: 

89 pass 

90 except ValueError: 

91 isThere = k in d 

92 elif k in d: 

93 nextVal = d[k] 

94 isThere = True 

95 elif create: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 d[k] = {} 

97 nextVal = d[k] 

98 isThere = True 

99 

100 return nextVal, isThere 

101 

102 

103class Loader(yamlLoader): 

104 """YAML Loader that supports file include directives. 

105 

106 Uses ``!include`` directive in a YAML file to point to another 

107 YAML file to be included. The path in the include directive is relative 

108 to the file containing that directive. 

109 

110 storageClasses: !include storageClasses.yaml 

111 

112 Examples 

113 -------- 

114 >>> with open("document.yaml", "r") as f: 

115 data = yaml.load(f, Loader=Loader) 

116 

117 Notes 

118 ----- 

119 See https://davidchall.github.io/yaml-includes.html 

120 """ 

121 

122 def __init__(self, stream): 

123 super().__init__(stream) 

124 # if this is a string and not a stream we may well lack a name 

125 try: 

126 self._root = ButlerURI(stream.name) 

127 except AttributeError: 

128 # No choice but to assume a local filesystem 

129 self._root = ButlerURI("no-file.yaml") 

130 Loader.add_constructor("!include", Loader.include) 

131 

132 def include(self, node): 

133 result: Union[List[Any], Dict[str, Any]] 

134 if isinstance(node, yaml.ScalarNode): 

135 return self.extractFile(self.construct_scalar(node)) 

136 

137 elif isinstance(node, yaml.SequenceNode): 

138 result = [] 

139 for filename in self.construct_sequence(node): 

140 result.append(self.extractFile(filename)) 

141 return result 

142 

143 elif isinstance(node, yaml.MappingNode): 

144 result = {} 

145 for k, v in self.construct_mapping(node).items(): 

146 result[k] = self.extractFile(v) 

147 return result 

148 

149 else: 

150 print("Error:: unrecognised node type in !include statement", file=sys.stderr) 

151 raise yaml.constructor.ConstructorError 

152 

153 def extractFile(self, filename): 

154 # It is possible for the !include to point to an explicit URI 

155 # instead of a relative URI, therefore we first see if it is 

156 # scheme-less or not. If it has a scheme we use it directly 

157 # if it is scheme-less we use it relative to the file root. 

158 requesteduri = ButlerURI(filename, forceAbsolute=False) 

159 

160 if requesteduri.scheme: 

161 fileuri = requesteduri 

162 else: 

163 fileuri = self._root.updatedFile(filename) 

164 

165 log.debug("Opening YAML file via !include: %s", fileuri) 

166 

167 # Read all the data from the resource 

168 data = fileuri.read() 

169 

170 # Store the bytes into a BytesIO so we can attach a .name 

171 stream = io.BytesIO(data) 

172 stream.name = fileuri.geturl() 

173 return yaml.load(stream, Loader) 

174 

175 

176class Config(collections.abc.MutableMapping): 

177 r"""Implements a datatype that is used by `Butler` for configuration. 

178 

179 It is essentially a `dict` with key/value pairs, including nested dicts 

180 (as values). In fact, it can be initialized with a `dict`. 

181 This is explained next: 

182 

183 Config extends the `dict` api so that hierarchical values may be accessed 

184 with delimited notation or as a tuple. If a string is given the delimiter 

185 is picked up from the first character in that string. For example, 

186 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``, 

187 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome. 

188 If the first character is alphanumeric, no delimiter will be used. 

189 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``. 

190 Unicode characters can be used as the delimiter for distinctiveness if 

191 required. 

192 

193 If a key in the hierarchy starts with a non-alphanumeric character care 

194 should be used to ensure that either the tuple interface is used or 

195 a distinct delimiter is always given in string form. 

196 

197 Finally, the delimiter can be escaped if it is part of a key and also 

198 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in 

199 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is 

200 always better to use a different delimiter in these cases. 

201 

202 Note that adding a multi-level key implicitly creates any nesting levels 

203 that do not exist, but removing multi-level keys does not automatically 

204 remove empty nesting levels. As a result: 

205 

206 >>> c = Config() 

207 >>> c[".a.b"] = 1 

208 >>> del c[".a.b"] 

209 >>> c["a"] 

210 Config({'a': {}}) 

211 

212 Storage formats supported: 

213 

214 - yaml: read and write is supported. 

215 - json: read and write is supported but no ``!include`` directive. 

216 

217 Parameters 

218 ---------- 

219 other : `str` or `Config` or `dict` or `ButlerURI` or `pathlib.Path` 

220 Other source of configuration, can be: 

221 

222 - (`str` or `ButlerURI`) Treated as a URI to a config file. Must end 

223 with ".yaml". 

224 - (`Config`) Copies the other Config's values into this one. 

225 - (`dict`) Copies the values from the dict into this Config. 

226 

227 If `None` is provided an empty `Config` will be created. 

228 """ 

229 

230 _D: str = "→" 

231 """Default internal delimiter to use for components in the hierarchy when 

232 constructing keys for external use (see `Config.names()`).""" 

233 

234 includeKey: ClassVar[str] = "includeConfigs" 

235 """Key used to indicate that another config should be included at this 

236 part of the hierarchy.""" 

237 

238 resourcesPackage: str = "lsst.daf.butler" 

239 """Package to search for default configuration data. The resources 

240 themselves will be within a ``configs`` resource hierarchy.""" 

241 

242 def __init__(self, other=None): 

243 self._data: Dict[str, Any] = {} 

244 self.configFile = None 

245 

246 if other is None: 

247 return 

248 

249 if isinstance(other, Config): 

250 self._data = copy.deepcopy(other._data) 

251 self.configFile = other.configFile 

252 elif isinstance(other, (dict, collections.abc.Mapping)): 

253 # In most cases we have a dict, and it's more efficient 

254 # to check for a dict instance before checking the generic mapping. 

255 self.update(other) 

256 elif isinstance(other, (str, ButlerURI, Path)): 256 ↛ 263line 256 didn't jump to line 263, because the condition on line 256 was never false

257 # if other is a string, assume it is a file path/URI 

258 self.__initFromUri(other) 

259 self._processExplicitIncludes() 

260 else: 

261 # if the config specified by other could not be recognized raise 

262 # a runtime error. 

263 raise RuntimeError(f"A Config could not be loaded from other: {other}") 

264 

265 def ppprint(self): 

266 """Return config as formatted readable string. 

267 

268 Examples 

269 -------- 

270 use: ``pdb> print(myConfigObject.ppprint())`` 

271 

272 Returns 

273 ------- 

274 s : `str` 

275 A prettyprint formatted string representing the config 

276 """ 

277 return pprint.pformat(self._data, indent=2, width=1) 

278 

279 def __repr__(self): 

280 return f"{type(self).__name__}({self._data!r})" 

281 

282 def __str__(self): 

283 return self.ppprint() 

284 

285 def __len__(self): 

286 return len(self._data) 

287 

288 def __iter__(self): 

289 return iter(self._data) 

290 

291 def copy(self): 

292 return type(self)(self) 

293 

294 @classmethod 

295 def fromString(cls, string: str, format: str = "yaml") -> Config: 

296 """Create a new Config instance from a serialized string. 

297 

298 Parameters 

299 ---------- 

300 string : `str` 

301 String containing content in specified format 

302 format : `str`, optional 

303 Format of the supplied string. Can be ``json`` or ``yaml``. 

304 

305 Returns 

306 ------- 

307 c : `Config` 

308 Newly-constructed Config. 

309 """ 

310 if format == "yaml": 

311 new_config = cls().__initFromYaml(string) 

312 elif format == "json": 

313 new_config = cls().__initFromJson(string) 

314 else: 

315 raise ValueError(f"Unexpected format of string: {format}") 

316 new_config._processExplicitIncludes() 

317 return new_config 

318 

319 @classmethod 

320 def fromYaml(cls, string: str) -> Config: 

321 """Create a new Config instance from a YAML string. 

322 

323 Parameters 

324 ---------- 

325 string : `str` 

326 String containing content in YAML format 

327 

328 Returns 

329 ------- 

330 c : `Config` 

331 Newly-constructed Config. 

332 """ 

333 return cls.fromString(string, format="yaml") 

334 

335 def __initFromUri(self, path: Union[str, ButlerURI, Path]) -> None: 

336 """Load a file from a path or an URI. 

337 

338 Parameters 

339 ---------- 

340 path : `str` 

341 Path or a URI to a persisted config file. 

342 """ 

343 uri = ButlerURI(path) 

344 ext = uri.getExtension() 

345 if ext == ".yaml": 345 ↛ 352line 345 didn't jump to line 352, because the condition on line 345 was never false

346 log.debug("Opening YAML config file: %s", uri.geturl()) 

347 content = uri.read() 

348 # Use a stream so we can name it 

349 stream = io.BytesIO(content) 

350 stream.name = uri.geturl() 

351 self.__initFromYaml(stream) 

352 elif ext == ".json": 

353 log.debug("Opening JSON config file: %s", uri.geturl()) 

354 content = uri.read() 

355 self.__initFromJson(content) 

356 else: 

357 # This URI does not have a valid extension. It might be because 

358 # we ended up with a directory and not a file. Before we complain 

359 # about an extension, do an existence check. No need to do 

360 # the (possibly expensive) existence check in the default code 

361 # path above because we will find out soon enough that the file 

362 # is not there. 

363 if not uri.exists(): 

364 raise FileNotFoundError(f"Config location {uri} does not exist.") 

365 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}") 

366 self.configFile = uri 

367 

368 def __initFromYaml(self, stream): 

369 """Load a YAML config from any readable stream that contains one. 

370 

371 Parameters 

372 ---------- 

373 stream: `IO` or `str` 

374 Stream to pass to the YAML loader. Accepts anything that 

375 `yaml.load` accepts. This can include a string as well as an 

376 IO stream. 

377 

378 Raises 

379 ------ 

380 yaml.YAMLError 

381 If there is an error loading the file. 

382 """ 

383 content = yaml.load(stream, Loader=Loader) 

384 if content is None: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true

385 content = {} 

386 self._data = content 

387 return self 

388 

389 def __initFromJson(self, stream): 

390 """Load a JSON config from any readable stream that contains one. 

391 

392 Parameters 

393 ---------- 

394 stream: `IO` or `str` 

395 Stream to pass to the JSON loader. This can include a string as 

396 well as an IO stream. 

397 

398 Raises 

399 ------ 

400 TypeError: 

401 Raised if there is an error loading the content. 

402 """ 

403 if isinstance(stream, (bytes, str)): 

404 content = json.loads(stream) 

405 else: 

406 content = json.load(stream) 

407 if content is None: 

408 content = {} 

409 self._data = content 

410 return self 

411 

412 def _processExplicitIncludes(self): 

413 """Scan through the configuration searching for the special includes. 

414 

415 Looks for ``includeConfigs`` directive and processes the includes. 

416 """ 

417 # Search paths for config files 

418 searchPaths = [ButlerURI(os.path.curdir, forceDirectory=True)] 

419 if self.configFile is not None: 419 ↛ 427line 419 didn't jump to line 427, because the condition on line 419 was never false

420 if isinstance(self.configFile, ButlerURI): 420 ↛ 423line 420 didn't jump to line 423, because the condition on line 420 was never false

421 configDir = self.configFile.dirname() 

422 else: 

423 raise RuntimeError(f"Unexpected type for config file: {self.configFile}") 

424 searchPaths.append(configDir) 

425 

426 # Ensure we know what delimiter to use 

427 names = self.nameTuples() 

428 for path in names: 

429 if path[-1] == self.includeKey: 429 ↛ 431line 429 didn't jump to line 431, because the condition on line 429 was never true

430 

431 log.debug("Processing file include directive at %s", self._D + self._D.join(path)) 

432 basePath = path[:-1] 

433 

434 # Extract the includes and then delete them from the config 

435 includes = self[path] 

436 del self[path] 

437 

438 # Be consistent and convert to a list 

439 if not isinstance(includes, list): 

440 includes = [includes] 

441 

442 # Read each file assuming it is a reference to a file 

443 # The file can be relative to config file or cwd 

444 # ConfigSubset search paths are not used 

445 subConfigs = [] 

446 for fileName in includes: 

447 # Expand any shell variables -- this could be URI 

448 fileName = ButlerURI(os.path.expandvars(fileName), forceAbsolute=False) 

449 found = None 

450 if fileName.isabs(): 

451 found = fileName 

452 else: 

453 for dir in searchPaths: 

454 if isinstance(dir, ButlerURI): 

455 specific = dir.join(fileName.path) 

456 # Remote resource check might be expensive 

457 if specific.exists(): 

458 found = specific 

459 else: 

460 log.warning("Do not understand search path entry '%s' of type %s", 

461 dir, type(dir).__name__) 

462 if not found: 

463 raise RuntimeError(f"Unable to find referenced include file: {fileName}") 

464 

465 # Read the referenced Config as a Config 

466 subConfigs.append(type(self)(found)) 

467 

468 # Now we need to merge these sub configs with the current 

469 # information that was present in this node in the config 

470 # tree with precedence given to the explicit values 

471 newConfig = subConfigs.pop(0) 

472 for sc in subConfigs: 

473 newConfig.update(sc) 

474 

475 # Explicit values take precedence 

476 if not basePath: 

477 # This is an include at the root config 

478 newConfig.update(self) 

479 # Replace the current config 

480 self._data = newConfig._data 

481 else: 

482 newConfig.update(self[basePath]) 

483 # And reattach to the base config 

484 self[basePath] = newConfig 

485 

486 @staticmethod 

487 def _splitIntoKeys(key): 

488 r"""Split the argument for get/set/in into a hierarchical list. 

489 

490 Parameters 

491 ---------- 

492 key : `str` or iterable 

493 Argument given to get/set/in. If an iterable is provided it will 

494 be converted to a list. If the first character of the string 

495 is not an alphanumeric character then it will be used as the 

496 delimiter for the purposes of splitting the remainder of the 

497 string. If the delimiter is also in one of the keys then it 

498 can be escaped using ``\``. There is no default delimiter. 

499 

500 Returns 

501 ------- 

502 keys : `list` 

503 Hierarchical keys as a `list`. 

504 """ 

505 if isinstance(key, str): 

506 if not key[0].isalnum(): 506 ↛ 507line 506 didn't jump to line 507, because the condition on line 506 was never true

507 d = key[0] 

508 key = key[1:] 

509 else: 

510 return [key, ] 

511 escaped = f"\\{d}" 

512 temp = None 

513 if escaped in key: 

514 # Complain at the attempt to escape the escape 

515 doubled = fr"\{escaped}" 

516 if doubled in key: 

517 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})" 

518 " is not yet supported.") 

519 # Replace with a character that won't be in the string 

520 temp = "\r" 

521 if temp in key or d == temp: 

522 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as" 

523 " delimiter if escaping the delimiter") 

524 key = key.replace(escaped, temp) 

525 hierarchy = key.split(d) 

526 if temp: 

527 hierarchy = [h.replace(temp, d) for h in hierarchy] 

528 return hierarchy 

529 elif isinstance(key, collections.abc.Iterable): 529 ↛ 533line 529 didn't jump to line 533, because the condition on line 529 was never false

530 return list(key) 

531 else: 

532 # Not sure what this is so try it anyway 

533 return [key, ] 

534 

535 def _getKeyHierarchy(self, name): 

536 """Retrieve the key hierarchy for accessing the Config. 

537 

538 Parameters 

539 ---------- 

540 name : `str` or `tuple` 

541 Delimited string or `tuple` of hierarchical keys. 

542 

543 Returns 

544 ------- 

545 hierarchy : `list` of `str` 

546 Hierarchy to use as a `list`. If the name is available directly 

547 as a key in the Config it will be used regardless of the presence 

548 of any nominal delimiter. 

549 """ 

550 if name in self._data: 

551 keys = [name, ] 

552 else: 

553 keys = self._splitIntoKeys(name) 

554 return keys 

555 

556 def _findInHierarchy(self, keys, create=False): 

557 """Look for hierarchy of keys in Config. 

558 

559 Parameters 

560 ---------- 

561 keys : `list` or `tuple` 

562 Keys to search in hierarchy. 

563 create : `bool`, optional 

564 If `True`, if a part of the hierarchy does not exist, insert an 

565 empty `dict` into the hierarchy. 

566 

567 Returns 

568 ------- 

569 hierarchy : `list` 

570 List of the value corresponding to each key in the supplied 

571 hierarchy. Only keys that exist in the hierarchy will have 

572 a value. 

573 complete : `bool` 

574 `True` if the full hierarchy exists and the final element 

575 in ``hierarchy`` is the value of relevant value. 

576 """ 

577 d = self._data 

578 

579 # For the first key, d must be a dict so it is a waste 

580 # of time to check for a sequence. 

581 must_be_dict = True 

582 

583 hierarchy = [] 

584 complete = True 

585 for k in keys: 

586 d, isThere = _checkNextItem(k, d, create, must_be_dict) 

587 if isThere: 

588 hierarchy.append(d) 

589 else: 

590 complete = False 

591 break 

592 # Second time round it might be a sequence. 

593 must_be_dict = False 

594 

595 return hierarchy, complete 

596 

597 def __getitem__(self, name): 

598 # Override the split for the simple case where there is an exact 

599 # match. This allows `Config.items()` to work via a simple 

600 # __iter__ implementation that returns top level keys of 

601 # self._data. 

602 

603 # If the name matches a key in the top-level hierarchy, bypass 

604 # all further cleverness. 

605 found_directly = False 

606 try: 

607 data = self._data[name] 

608 found_directly = True 

609 except KeyError: 

610 pass 

611 

612 if not found_directly: 612 ↛ 613line 612 didn't jump to line 613, because the condition on line 612 was never true

613 keys = self._getKeyHierarchy(name) 

614 

615 hierarchy, complete = self._findInHierarchy(keys) 

616 if not complete: 

617 raise KeyError(f"{name} not found") 

618 data = hierarchy[-1] 

619 

620 # In most cases we have a dict, and it's more efficient 

621 # to check for a dict instance before checking the generic mapping. 

622 if isinstance(data, (dict, collections.abc.Mapping)): 

623 data = Config(data) 

624 # Ensure that child configs inherit the parent internal delimiter 

625 if self._D != Config._D: 625 ↛ 626line 625 didn't jump to line 626, because the condition on line 625 was never true

626 data._D = self._D 

627 return data 

628 

629 def __setitem__(self, name, value): 

630 keys = self._getKeyHierarchy(name) 

631 last = keys.pop() 

632 if isinstance(value, Config): 

633 value = copy.deepcopy(value._data) 

634 

635 hierarchy, complete = self._findInHierarchy(keys, create=True) 

636 if hierarchy: 

637 data = hierarchy[-1] 

638 else: 

639 data = self._data 

640 

641 try: 

642 data[last] = value 

643 except TypeError: 

644 data[int(last)] = value 

645 

646 def __contains__(self, key): 

647 keys = self._getKeyHierarchy(key) 

648 hierarchy, complete = self._findInHierarchy(keys) 

649 return complete 

650 

651 def __delitem__(self, key): 

652 keys = self._getKeyHierarchy(key) 

653 last = keys.pop() 

654 hierarchy, complete = self._findInHierarchy(keys) 

655 if complete: 655 ↛ 662line 655 didn't jump to line 662, because the condition on line 655 was never false

656 if hierarchy: 656 ↛ 657line 656 didn't jump to line 657, because the condition on line 656 was never true

657 data = hierarchy[-1] 

658 else: 

659 data = self._data 

660 del data[last] 

661 else: 

662 raise KeyError(f"{key} not found in Config") 

663 

664 def update(self, other): 

665 """Update config from other `Config` or `dict`. 

666 

667 Like `dict.update()`, but will add or modify keys in nested dicts, 

668 instead of overwriting the nested dict entirely. 

669 

670 Parameters 

671 ---------- 

672 other : `dict` or `Config` 

673 Source of configuration: 

674 

675 Examples 

676 -------- 

677 >>> c = Config({"a": {"b": 1}}) 

678 >>> c.update({"a": {"c": 2}}) 

679 >>> print(c) 

680 {'a': {'b': 1, 'c': 2}} 

681 

682 >>> foo = {"a": {"b": 1}} 

683 >>> foo.update({"a": {"c": 2}}) 

684 >>> print(foo) 

685 {'a': {'c': 2}} 

686 """ 

687 _doUpdate(self._data, other) 

688 

689 def merge(self, other): 

690 """Merge another Config into this one. 

691 

692 Like `Config.update()`, but will add keys & values from other that 

693 DO NOT EXIST in self. 

694 

695 Keys and values that already exist in self will NOT be overwritten. 

696 

697 Parameters 

698 ---------- 

699 other : `dict` or `Config` 

700 Source of configuration: 

701 """ 

702 if not isinstance(other, collections.abc.Mapping): 

703 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}") 

704 

705 # Convert the supplied mapping to a Config for consistency 

706 # This will do a deepcopy if it is already a Config 

707 otherCopy = Config(other) 

708 otherCopy.update(self) 

709 self._data = otherCopy._data 

710 

711 def nameTuples(self, topLevelOnly=False): 

712 """Get tuples representing the name hierarchies of all keys. 

713 

714 The tuples returned from this method are guaranteed to be usable 

715 to access items in the configuration object. 

716 

717 Parameters 

718 ---------- 

719 topLevelOnly : `bool`, optional 

720 If False, the default, a full hierarchy of names is returned. 

721 If True, only the top level are returned. 

722 

723 Returns 

724 ------- 

725 names : `list` of `tuple` of `str` 

726 List of all names present in the `Config` where each element 

727 in the list is a `tuple` of strings representing the hierarchy. 

728 """ 

729 if topLevelOnly: 729 ↛ 730line 729 didn't jump to line 730, because the condition on line 729 was never true

730 return list((k,) for k in self) 

731 

732 def getKeysAsTuples(d, keys, base): 

733 if isinstance(d, collections.abc.Sequence): 

734 theseKeys = range(len(d)) 

735 else: 

736 theseKeys = d.keys() 

737 for key in theseKeys: 

738 val = d[key] 

739 levelKey = base + (key,) if base is not None else (key,) 

740 keys.append(levelKey) 

741 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \ 

742 and not isinstance(val, str): 

743 getKeysAsTuples(val, keys, levelKey) 

744 keys: List[Tuple[str, ...]] = [] 

745 getKeysAsTuples(self._data, keys, None) 

746 return keys 

747 

748 def names(self, topLevelOnly=False, delimiter=None): 

749 """Get a delimited name of all the keys in the hierarchy. 

750 

751 The values returned from this method are guaranteed to be usable 

752 to access items in the configuration object. 

753 

754 Parameters 

755 ---------- 

756 topLevelOnly : `bool`, optional 

757 If False, the default, a full hierarchy of names is returned. 

758 If True, only the top level are returned. 

759 delimiter : `str`, optional 

760 Delimiter to use when forming the keys. If the delimiter is 

761 present in any of the keys, it will be escaped in the returned 

762 names. If `None` given a delimiter will be automatically provided. 

763 The delimiter can not be alphanumeric. 

764 

765 Returns 

766 ------- 

767 names : `list` of `str` 

768 List of all names present in the `Config`. 

769 

770 Notes 

771 ----- 

772 This is different than the built-in method `dict.keys`, which will 

773 return only the first level keys. 

774 

775 Raises 

776 ------ 

777 ValueError: 

778 The supplied delimiter is alphanumeric. 

779 """ 

780 if topLevelOnly: 

781 return list(self.keys()) 

782 

783 # Get all the tuples of hierarchical keys 

784 nameTuples = self.nameTuples() 

785 

786 if delimiter is not None and delimiter.isalnum(): 

787 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.") 

788 

789 if delimiter is None: 

790 # Start with something, and ensure it does not need to be 

791 # escaped (it is much easier to understand if not escaped) 

792 delimiter = self._D 

793 

794 # Form big string for easy check of delimiter clash 

795 combined = "".join("".join(str(s) for s in k) for k in nameTuples) 

796 

797 # Try a delimiter and keep trying until we get something that 

798 # works. 

799 ntries = 0 

800 while delimiter in combined: 

801 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter) 

802 ntries += 1 

803 

804 if ntries > 100: 

805 raise ValueError(f"Unable to determine a delimiter for Config {self}") 

806 

807 # try another one 

808 while True: 

809 delimiter = chr(ord(delimiter)+1) 

810 if not delimiter.isalnum(): 

811 break 

812 

813 log.debug("Using delimiter %r", delimiter) 

814 

815 # Form the keys, escaping the delimiter if necessary 

816 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k) 

817 for k in nameTuples] 

818 return strings 

819 

820 def asArray(self, name): 

821 """Get a value as an array. 

822 

823 May contain one or more elements. 

824 

825 Parameters 

826 ---------- 

827 name : `str` 

828 Key to use to retrieve value. 

829 

830 Returns 

831 ------- 

832 array : `collections.abc.Sequence` 

833 The value corresponding to name, but guaranteed to be returned 

834 as a list with at least one element. If the value is a 

835 `~collections.abc.Sequence` (and not a `str`) the value itself 

836 will be returned, else the value will be the first element. 

837 """ 

838 val = self.get(name) 

839 if isinstance(val, str): 

840 val = [val] 

841 elif not isinstance(val, collections.abc.Sequence): 

842 val = [val] 

843 return val 

844 

845 def __eq__(self, other): 

846 if isinstance(other, Config): 

847 other = other._data 

848 return self._data == other 

849 

850 def __ne__(self, other): 

851 if isinstance(other, Config): 

852 other = other._data 

853 return self._data != other 

854 

855 ####### 

856 # i/o # 

857 

858 def dump(self, output: Optional[IO] = None, format: str = "yaml") -> Optional[str]: 

859 """Write the config to an output stream. 

860 

861 Parameters 

862 ---------- 

863 output : `IO`, optional 

864 The stream to use for output. If `None` the serialized content 

865 will be returned. 

866 format : `str`, optional 

867 The format to use for the output. Can be "yaml" or "json". 

868 

869 Returns 

870 ------- 

871 serialized : `str` or `None` 

872 If a stream was given the stream will be used and the return 

873 value will be `None`. If the stream was `None` the 

874 serialization will be returned as a string. 

875 """ 

876 if format == "yaml": 

877 return yaml.safe_dump(self._data, output, default_flow_style=False) 

878 elif format == "json": 

879 if output is not None: 

880 json.dump(self._data, output, ensure_ascii=False) 

881 return None 

882 else: 

883 return json.dumps(self._data, ensure_ascii=False) 

884 raise ValueError(f"Unsupported format for Config serialization: {format}") 

885 

886 def dumpToUri(self, uri: Union[ButlerURI, str], updateFile: bool = True, 

887 defaultFileName: str = "butler.yaml", 

888 overwrite: bool = True) -> None: 

889 """Write the config to location pointed to by given URI. 

890 

891 Currently supports 's3' and 'file' URI schemes. 

892 

893 Parameters 

894 ---------- 

895 uri: `str` or `ButlerURI` 

896 URI of location where the Config will be written. 

897 updateFile : bool, optional 

898 If True and uri does not end on a filename with extension, will 

899 append `defaultFileName` to the target uri. True by default. 

900 defaultFileName : bool, optional 

901 The file name that will be appended to target uri if updateFile is 

902 True and uri does not end on a file with an extension. 

903 overwrite : bool, optional 

904 If True the configuration will be written even if it already 

905 exists at that location. 

906 """ 

907 # Make local copy of URI or create new one 

908 uri = ButlerURI(uri) 

909 

910 if updateFile and not uri.getExtension(): 

911 uri = uri.updatedFile(defaultFileName) 

912 

913 # Try to work out the format from the extension 

914 ext = uri.getExtension() 

915 format = ext[1:].lower() 

916 

917 output = self.dump(format=format) 

918 assert output is not None, "Config.dump guarantees not-None return when output arg is None" 

919 uri.write(output.encode(), overwrite=overwrite) 

920 self.configFile = uri 

921 

922 @staticmethod 

923 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True, toMerge=None): 

924 """Update specific config parameters. 

925 

926 Allows for named parameters to be set to new values in bulk, and 

927 for other values to be set by copying from a reference config. 

928 

929 Assumes that the supplied config is compatible with ``configType`` 

930 and will attach the updated values to the supplied config by 

931 looking for the related component key. It is assumed that 

932 ``config`` and ``full`` are from the same part of the 

933 configuration hierarchy. 

934 

935 Parameters 

936 ---------- 

937 configType : `ConfigSubset` 

938 Config type to use to extract relevant items from ``config``. 

939 config : `Config` 

940 A `Config` to update. Only the subset understood by 

941 the supplied `ConfigSubset` will be modified. Default values 

942 will not be inserted and the content will not be validated 

943 since mandatory keys are allowed to be missing until 

944 populated later by merging. 

945 full : `Config` 

946 A complete config with all defaults expanded that can be 

947 converted to a ``configType``. Read-only and will not be 

948 modified by this method. Values are read from here if 

949 ``toCopy`` is defined. 

950 

951 Repository-specific options that should not be obtained 

952 from defaults when Butler instances are constructed 

953 should be copied from ``full`` to ``config``. 

954 toUpdate : `dict`, optional 

955 A `dict` defining the keys to update and the new value to use. 

956 The keys and values can be any supported by `Config` 

957 assignment. 

958 toCopy : `tuple`, optional 

959 `tuple` of keys whose values should be copied from ``full`` 

960 into ``config``. 

961 overwrite : `bool`, optional 

962 If `False`, do not modify a value in ``config`` if the key 

963 already exists. Default is always to overwrite. 

964 toMerge : `tuple`, optional 

965 Keys to merge content from full to config without overwriting 

966 pre-existing values. Only works if the key refers to a hierarchy. 

967 The ``overwrite`` flag is ignored. 

968 

969 Raises 

970 ------ 

971 ValueError 

972 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined. 

973 """ 

974 if toUpdate is None and toCopy is None and toMerge is None: 

975 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.") 

976 

977 # If this is a parent configuration then we need to ensure that 

978 # the supplied config has the relevant component key in it. 

979 # If this is a parent configuration we add in the stub entry 

980 # so that the ConfigSubset constructor will do the right thing. 

981 # We check full for this since that is guaranteed to be complete. 

982 if configType.component in full and configType.component not in config: 

983 config[configType.component] = {} 

984 

985 # Extract the part of the config we wish to update 

986 localConfig = configType(config, mergeDefaults=False, validate=False) 

987 

988 if toUpdate: 

989 for key, value in toUpdate.items(): 

990 if key in localConfig and not overwrite: 

991 log.debug("Not overriding key '%s' with value '%s' in config %s", 

992 key, value, localConfig.__class__.__name__) 

993 else: 

994 localConfig[key] = value 

995 

996 if toCopy or toMerge: 

997 localFullConfig = configType(full, mergeDefaults=False) 

998 

999 if toCopy: 

1000 for key in toCopy: 

1001 if key in localConfig and not overwrite: 

1002 log.debug("Not overriding key '%s' from defaults in config %s", 

1003 key, localConfig.__class__.__name__) 

1004 else: 

1005 localConfig[key] = localFullConfig[key] 

1006 if toMerge: 

1007 for key in toMerge: 

1008 if key in localConfig: 

1009 # Get the node from the config to do the merge 

1010 # but then have to reattach to the config. 

1011 subset = localConfig[key] 

1012 subset.merge(localFullConfig[key]) 

1013 localConfig[key] = subset 

1014 else: 

1015 localConfig[key] = localFullConfig[key] 

1016 

1017 # Reattach to parent if this is a child config 

1018 if configType.component in config: 

1019 config[configType.component] = localConfig 

1020 else: 

1021 config.update(localConfig) 

1022 

1023 def toDict(self): 

1024 """Convert a `Config` to a standalone hierarchical `dict`. 

1025 

1026 Returns 

1027 ------- 

1028 d : `dict` 

1029 The standalone hierarchical `dict` with any `Config` classes 

1030 in the hierarchy converted to `dict`. 

1031 

1032 Notes 

1033 ----- 

1034 This can be useful when passing a Config to some code that 

1035 expects native Python types. 

1036 """ 

1037 output = copy.deepcopy(self._data) 

1038 for k, v in output.items(): 

1039 if isinstance(v, Config): 

1040 v = v.toDict() 

1041 output[k] = v 

1042 return output 

1043 

1044 

1045class ConfigSubset(Config): 

1046 """Config representing a subset of a more general configuration. 

1047 

1048 Subclasses define their own component and when given a configuration 

1049 that includes that component, the resulting configuration only includes 

1050 the subset. For example, your config might contain ``dimensions`` if it's 

1051 part of a global config and that subset will be stored. If ``dimensions`` 

1052 can not be found it is assumed that the entire contents of the 

1053 configuration should be used. 

1054 

1055 Default values are read from the environment or supplied search paths 

1056 using the default configuration file name specified in the subclass. 

1057 This allows a configuration class to be instantiated without any 

1058 additional arguments. 

1059 

1060 Additional validation can be specified to check for keys that are mandatory 

1061 in the configuration. 

1062 

1063 Parameters 

1064 ---------- 

1065 other : `Config` or `str` or `dict` 

1066 Argument specifying the configuration information as understood 

1067 by `Config` 

1068 validate : `bool`, optional 

1069 If `True` required keys will be checked to ensure configuration 

1070 consistency. 

1071 mergeDefaults : `bool`, optional 

1072 If `True` defaults will be read and the supplied config will 

1073 be combined with the defaults, with the supplied values taking 

1074 precedence. 

1075 searchPaths : `list` or `tuple`, optional 

1076 Explicit additional paths to search for defaults. They should 

1077 be supplied in priority order. These paths have higher priority 

1078 than those read from the environment in 

1079 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to 

1080 the local file system or URIs, `ButlerURI`. 

1081 """ 

1082 

1083 component: ClassVar[Optional[str]] = None 

1084 """Component to use from supplied config. Can be None. If specified the 

1085 key is not required. Can be a full dot-separated path to a component. 

1086 """ 

1087 

1088 requiredKeys: ClassVar[Sequence[str]] = () 

1089 """Keys that are required to be specified in the configuration. 

1090 """ 

1091 

1092 defaultConfigFile: ClassVar[Optional[str]] = None 

1093 """Name of the file containing defaults for this config class. 

1094 """ 

1095 

1096 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None): 

1097 

1098 # Create a blank object to receive the defaults 

1099 # Once we have the defaults we then update with the external values 

1100 super().__init__() 

1101 

1102 # Create a standard Config rather than subset 

1103 externalConfig = Config(other) 

1104 

1105 # Select the part we need from it 

1106 # To simplify the use of !include we also check for the existence of 

1107 # component.component (since the included files can themselves 

1108 # include the component name) 

1109 if self.component is not None: 1109 ↛ 1118line 1109 didn't jump to line 1118, because the condition on line 1109 was never false

1110 doubled = (self.component, self.component) 

1111 # Must check for double depth first 

1112 if doubled in externalConfig: 1112 ↛ 1113line 1112 didn't jump to line 1113, because the condition on line 1112 was never true

1113 externalConfig = externalConfig[doubled] 

1114 elif self.component in externalConfig: 

1115 externalConfig._data = externalConfig._data[self.component] 

1116 

1117 # Default files read to create this configuration 

1118 self.filesRead = [] 

1119 

1120 # Assume we are not looking up child configurations 

1121 containerKey = None 

1122 

1123 # Sometimes we do not want to merge with defaults. 

1124 if mergeDefaults: 

1125 

1126 # Supplied search paths have highest priority 

1127 fullSearchPath = [] 

1128 if searchPaths: 1128 ↛ 1129line 1128 didn't jump to line 1129, because the condition on line 1128 was never true

1129 fullSearchPath.extend(searchPaths) 

1130 

1131 # Read default paths from environment 

1132 fullSearchPath.extend(self.defaultSearchPaths()) 

1133 

1134 # There are two places to find defaults for this particular config 

1135 # - The "defaultConfigFile" defined in the subclass 

1136 # - The class specified in the "cls" element in the config. 

1137 # Read cls after merging in case it changes. 

1138 if self.defaultConfigFile is not None: 1138 ↛ 1143line 1138 didn't jump to line 1143, because the condition on line 1138 was never false

1139 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile) 

1140 

1141 # Can have a class specification in the external config (priority) 

1142 # or from the defaults. 

1143 pytype = None 

1144 if "cls" in externalConfig: 1144 ↛ 1145line 1144 didn't jump to line 1145, because the condition on line 1144 was never true

1145 pytype = externalConfig["cls"] 

1146 elif "cls" in self: 1146 ↛ 1147line 1146 didn't jump to line 1147, because the condition on line 1146 was never true

1147 pytype = self["cls"] 

1148 

1149 if pytype is not None: 1149 ↛ 1150line 1149 didn't jump to line 1150, because the condition on line 1149 was never true

1150 try: 

1151 cls = doImport(pytype) 

1152 except ImportError as e: 

1153 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e 

1154 defaultsFile = cls.defaultConfigFile 

1155 if defaultsFile is not None: 

1156 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile) 

1157 

1158 # Get the container key in case we need it 

1159 try: 

1160 containerKey = cls.containerKey 

1161 except AttributeError: 

1162 pass 

1163 

1164 # Now update this object with the external values so that the external 

1165 # values always override the defaults 

1166 self.update(externalConfig) 

1167 

1168 # If this configuration has child configurations of the same 

1169 # config class, we need to expand those defaults as well. 

1170 

1171 if mergeDefaults and containerKey is not None and containerKey in self: 1171 ↛ 1172line 1171 didn't jump to line 1172, because the condition on line 1171 was never true

1172 for idx, subConfig in enumerate(self[containerKey]): 

1173 self[containerKey, idx] = type(self)(other=subConfig, validate=validate, 

1174 mergeDefaults=mergeDefaults, 

1175 searchPaths=searchPaths) 

1176 

1177 if validate: 

1178 self.validate() 

1179 

1180 @classmethod 

1181 def defaultSearchPaths(cls): 

1182 """Read environment to determine search paths to use. 

1183 

1184 Global defaults, at lowest priority, are found in the ``config`` 

1185 directory of the butler source tree. Additional defaults can be 

1186 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS`` 

1187 which is a PATH-like variable where paths at the front of the list 

1188 have priority over those later. 

1189 

1190 Returns 

1191 ------- 

1192 paths : `list` 

1193 Returns a list of paths to search. The returned order is in 

1194 priority with the highest priority paths first. The butler config 

1195 configuration resources will not be included here but will 

1196 always be searched last. 

1197 

1198 Notes 

1199 ----- 

1200 The environment variable is split on the standard ``:`` path separator. 

1201 This currently makes it incompatible with usage of URIs. 

1202 """ 

1203 # We can pick up defaults from multiple search paths 

1204 # We fill defaults by using the butler config path and then 

1205 # the config path environment variable in reverse order. 

1206 defaultsPaths: List[Union[str, ButlerURI]] = [] 

1207 

1208 if CONFIG_PATH in os.environ: 1208 ↛ 1209line 1208 didn't jump to line 1209, because the condition on line 1208 was never true

1209 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep) 

1210 defaultsPaths.extend(externalPaths) 

1211 

1212 # Add the package defaults as a resource 

1213 defaultsPaths.append(ButlerURI(f"resource://{cls.resourcesPackage}/configs", 

1214 forceDirectory=True)) 

1215 return defaultsPaths 

1216 

1217 def _updateWithConfigsFromPath(self, searchPaths, configFile): 

1218 """Search the supplied paths, merging the configuration values. 

1219 

1220 The values read will override values currently stored in the object. 

1221 Every file found in the path will be read, such that the earlier 

1222 path entries have higher priority. 

1223 

1224 Parameters 

1225 ---------- 

1226 searchPaths : `list` of `ButlerURI`, `str` 

1227 Paths to search for the supplied configFile. This path 

1228 is the priority order, such that files read from the 

1229 first path entry will be selected over those read from 

1230 a later path. Can contain `str` referring to the local file 

1231 system or a URI string. 

1232 configFile : `ButlerURI` 

1233 File to locate in path. If absolute path it will be read 

1234 directly and the search path will not be used. Can be a URI 

1235 to an explicit resource (which will ignore the search path) 

1236 which is assumed to exist. 

1237 """ 

1238 uri = ButlerURI(configFile) 

1239 if uri.isabs() and uri.exists(): 1239 ↛ 1241line 1239 didn't jump to line 1241, because the condition on line 1239 was never true

1240 # Assume this resource exists 

1241 self._updateWithOtherConfigFile(configFile) 

1242 self.filesRead.append(configFile) 

1243 else: 

1244 # Reverse order so that high priority entries 

1245 # update the object last. 

1246 for pathDir in reversed(searchPaths): 

1247 if isinstance(pathDir, (str, ButlerURI)): 1247 ↛ 1254line 1247 didn't jump to line 1254, because the condition on line 1247 was never false

1248 pathDir = ButlerURI(pathDir, forceDirectory=True) 

1249 file = pathDir.join(configFile) 

1250 if file.exists(): 1250 ↛ 1246line 1250 didn't jump to line 1246, because the condition on line 1250 was never false

1251 self.filesRead.append(file) 

1252 self._updateWithOtherConfigFile(file) 

1253 else: 

1254 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}") 

1255 

1256 def _updateWithOtherConfigFile(self, file): 

1257 """Read in some defaults and update. 

1258 

1259 Update the configuration by reading the supplied file as a config 

1260 of this class, and merging such that these values override the 

1261 current values. Contents of the external config are not validated. 

1262 

1263 Parameters 

1264 ---------- 

1265 file : `Config`, `str`, `ButlerURI`, or `dict` 

1266 Entity that can be converted to a `ConfigSubset`. 

1267 """ 

1268 # Use this class to read the defaults so that subsetting can happen 

1269 # correctly. 

1270 externalConfig = type(self)(file, validate=False, mergeDefaults=False) 

1271 self.update(externalConfig) 

1272 

1273 def validate(self): 

1274 """Check that mandatory keys are present in this configuration. 

1275 

1276 Ignored if ``requiredKeys`` is empty. 

1277 """ 

1278 # Validation 

1279 missing = [k for k in self.requiredKeys if k not in self._data] 

1280 if missing: 1280 ↛ 1281line 1280 didn't jump to line 1281, because the condition on line 1280 was never true

1281 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")