Coverage for python/lsst/daf/butler/core/config.py: 44%

482 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-05 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Configuration control.""" 

25 

26__all__ = ("Config", "ConfigSubset") 

27 

28import collections 

29import copy 

30import io 

31import json 

32import logging 

33import os 

34import pprint 

35import sys 

36from pathlib import Path 

37from typing import IO, Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Union 

38 

39import yaml 

40from lsst.resources import ResourcePath, ResourcePathExpression 

41from lsst.utils import doImport 

42from yaml.representer import Representer 

43 

44yaml.add_representer(collections.defaultdict, Representer.represent_dict) 

45 

46 

47# Config module logger 

48log = logging.getLogger(__name__) 

49 

50# PATH-like environment variable to use for defaults. 

51CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH" 

52 

53try: 

54 yamlLoader = yaml.CSafeLoader 

55except AttributeError: 

56 # Not all installations have the C library 

57 # (but assume for mypy's sake that they're the same) 

58 yamlLoader = yaml.SafeLoader # type: ignore 

59 

60 

61def _doUpdate(d, u): 

62 if not isinstance(u, collections.abc.Mapping) or not isinstance(d, collections.abc.MutableMapping): 62 ↛ 63line 62 didn't jump to line 63, because the condition on line 62 was never true

63 raise RuntimeError("Only call update with Mapping, not {}".format(type(d))) 

64 for k, v in u.items(): 

65 if isinstance(v, collections.abc.Mapping): 

66 d[k] = _doUpdate(d.get(k, {}), v) 

67 else: 

68 d[k] = v 

69 return d 

70 

71 

72def _checkNextItem(k, d, create, must_be_dict): 

73 """See if k is in d and if it is return the new child.""" 

74 nextVal = None 

75 isThere = False 

76 if d is None: 76 ↛ 78line 76 didn't jump to line 78, because the condition on line 76 was never true

77 # We have gone past the end of the hierarchy 

78 pass 

79 elif not must_be_dict and isinstance(d, collections.abc.Sequence): 79 ↛ 84line 79 didn't jump to line 84, because the condition on line 79 was never true

80 # Check for Sequence first because for lists 

81 # __contains__ checks whether value is found in list 

82 # not whether the index exists in list. When we traverse 

83 # the hierarchy we are interested in the index. 

84 try: 

85 nextVal = d[int(k)] 

86 isThere = True 

87 except IndexError: 

88 pass 

89 except ValueError: 

90 isThere = k in d 

91 elif k in d: 

92 nextVal = d[k] 

93 isThere = True 

94 elif create: 94 ↛ 95line 94 didn't jump to line 95, because the condition on line 94 was never true

95 d[k] = {} 

96 nextVal = d[k] 

97 isThere = True 

98 

99 return nextVal, isThere 

100 

101 

102class Loader(yamlLoader): 

103 """YAML Loader that supports file include directives. 

104 

105 Uses ``!include`` directive in a YAML file to point to another 

106 YAML file to be included. The path in the include directive is relative 

107 to the file containing that directive. 

108 

109 storageClasses: !include storageClasses.yaml 

110 

111 Examples 

112 -------- 

113 >>> with open("document.yaml", "r") as f: 

114 data = yaml.load(f, Loader=Loader) 

115 

116 Notes 

117 ----- 

118 See https://davidchall.github.io/yaml-includes.html 

119 """ 

120 

121 def __init__(self, stream): 

122 super().__init__(stream) 

123 # if this is a string and not a stream we may well lack a name 

124 try: 

125 self._root = ResourcePath(stream.name) 

126 except AttributeError: 

127 # No choice but to assume a local filesystem 

128 self._root = ResourcePath("no-file.yaml") 

129 Loader.add_constructor("!include", Loader.include) 

130 

131 def include(self, node): 

132 result: Union[List[Any], Dict[str, Any]] 

133 if isinstance(node, yaml.ScalarNode): 

134 return self.extractFile(self.construct_scalar(node)) 

135 

136 elif isinstance(node, yaml.SequenceNode): 

137 result = [] 

138 for filename in self.construct_sequence(node): 

139 result.append(self.extractFile(filename)) 

140 return result 

141 

142 elif isinstance(node, yaml.MappingNode): 

143 result = {} 

144 for k, v in self.construct_mapping(node).items(): 

145 result[k] = self.extractFile(v) 

146 return result 

147 

148 else: 

149 print("Error:: unrecognised node type in !include statement", file=sys.stderr) 

150 raise yaml.constructor.ConstructorError 

151 

152 def extractFile(self, filename): 

153 # It is possible for the !include to point to an explicit URI 

154 # instead of a relative URI, therefore we first see if it is 

155 # scheme-less or not. If it has a scheme we use it directly 

156 # if it is scheme-less we use it relative to the file root. 

157 requesteduri = ResourcePath(filename, forceAbsolute=False) 

158 

159 if requesteduri.scheme: 

160 fileuri = requesteduri 

161 else: 

162 fileuri = self._root.updatedFile(filename) 

163 

164 log.debug("Opening YAML file via !include: %s", fileuri) 

165 

166 # Read all the data from the resource 

167 data = fileuri.read() 

168 

169 # Store the bytes into a BytesIO so we can attach a .name 

170 stream = io.BytesIO(data) 

171 stream.name = fileuri.geturl() 

172 return yaml.load(stream, Loader) 

173 

174 

175class Config(collections.abc.MutableMapping): 

176 r"""Implements a datatype that is used by `Butler` for configuration. 

177 

178 It is essentially a `dict` with key/value pairs, including nested dicts 

179 (as values). In fact, it can be initialized with a `dict`. 

180 This is explained next: 

181 

182 Config extends the `dict` api so that hierarchical values may be accessed 

183 with delimited notation or as a tuple. If a string is given the delimiter 

184 is picked up from the first character in that string. For example, 

185 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``, 

186 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome. 

187 If the first character is alphanumeric, no delimiter will be used. 

188 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``. 

189 Unicode characters can be used as the delimiter for distinctiveness if 

190 required. 

191 

192 If a key in the hierarchy starts with a non-alphanumeric character care 

193 should be used to ensure that either the tuple interface is used or 

194 a distinct delimiter is always given in string form. 

195 

196 Finally, the delimiter can be escaped if it is part of a key and also 

197 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in 

198 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is 

199 always better to use a different delimiter in these cases. 

200 

201 Note that adding a multi-level key implicitly creates any nesting levels 

202 that do not exist, but removing multi-level keys does not automatically 

203 remove empty nesting levels. As a result: 

204 

205 >>> c = Config() 

206 >>> c[".a.b"] = 1 

207 >>> del c[".a.b"] 

208 >>> c["a"] 

209 Config({'a': {}}) 

210 

211 Storage formats supported: 

212 

213 - yaml: read and write is supported. 

214 - json: read and write is supported but no ``!include`` directive. 

215 

216 Parameters 

217 ---------- 

218 other : `lsst.resources.ResourcePath` or `Config` or `dict` 

219 Other source of configuration, can be: 

220 

221 - (`lsst.resources.ResourcePathExpression`) 

222 Treated as a URI to a config file. Must end with ".yaml". 

223 - (`Config`) Copies the other Config's values into this one. 

224 - (`dict`) Copies the values from the dict into this Config. 

225 

226 If `None` is provided an empty `Config` will be created. 

227 """ 

228 

229 _D: str = "→" 

230 """Default internal delimiter to use for components in the hierarchy when 

231 constructing keys for external use (see `Config.names()`).""" 

232 

233 includeKey: ClassVar[str] = "includeConfigs" 

234 """Key used to indicate that another config should be included at this 

235 part of the hierarchy.""" 

236 

237 resourcesPackage: str = "lsst.daf.butler" 

238 """Package to search for default configuration data. The resources 

239 themselves will be within a ``configs`` resource hierarchy.""" 

240 

241 def __init__(self, other=None): 

242 self._data: Dict[str, Any] = {} 

243 self.configFile = None 

244 

245 if other is None: 

246 return 

247 

248 if isinstance(other, Config): 

249 # Deep copy might be more efficient but if someone has overridden 

250 # a config entry to store a complex object then deep copy may 

251 # fail. Safer to use update(). 

252 self.update(other._data) 

253 self.configFile = other.configFile 

254 elif isinstance(other, (dict, collections.abc.Mapping)): 

255 # In most cases we have a dict, and it's more efficient 

256 # to check for a dict instance before checking the generic mapping. 

257 self.update(other) 

258 elif isinstance(other, (str, ResourcePath, Path)): 258 ↛ 265line 258 didn't jump to line 265, because the condition on line 258 was never false

259 # if other is a string, assume it is a file path/URI 

260 self.__initFromUri(other) 

261 self._processExplicitIncludes() 

262 else: 

263 # if the config specified by other could not be recognized raise 

264 # a runtime error. 

265 raise RuntimeError(f"A Config could not be loaded from other: {other}") 

266 

267 def ppprint(self): 

268 """Return config as formatted readable string. 

269 

270 Examples 

271 -------- 

272 use: ``pdb> print(myConfigObject.ppprint())`` 

273 

274 Returns 

275 ------- 

276 s : `str` 

277 A prettyprint formatted string representing the config 

278 """ 

279 return pprint.pformat(self._data, indent=2, width=1) 

280 

281 def __repr__(self): 

282 return f"{type(self).__name__}({self._data!r})" 

283 

284 def __str__(self): 

285 return self.ppprint() 

286 

287 def __len__(self): 

288 return len(self._data) 

289 

290 def __iter__(self): 

291 return iter(self._data) 

292 

293 def copy(self): 

294 return type(self)(self) 

295 

296 @classmethod 

297 def fromString(cls, string: str, format: str = "yaml") -> Config: 

298 """Create a new Config instance from a serialized string. 

299 

300 Parameters 

301 ---------- 

302 string : `str` 

303 String containing content in specified format 

304 format : `str`, optional 

305 Format of the supplied string. Can be ``json`` or ``yaml``. 

306 

307 Returns 

308 ------- 

309 c : `Config` 

310 Newly-constructed Config. 

311 """ 

312 if format == "yaml": 

313 new_config = cls().__initFromYaml(string) 

314 elif format == "json": 

315 new_config = cls().__initFromJson(string) 

316 else: 

317 raise ValueError(f"Unexpected format of string: {format}") 

318 new_config._processExplicitIncludes() 

319 return new_config 

320 

321 @classmethod 

322 def fromYaml(cls, string: str) -> Config: 

323 """Create a new Config instance from a YAML string. 

324 

325 Parameters 

326 ---------- 

327 string : `str` 

328 String containing content in YAML format 

329 

330 Returns 

331 ------- 

332 c : `Config` 

333 Newly-constructed Config. 

334 """ 

335 return cls.fromString(string, format="yaml") 

336 

337 def __initFromUri(self, path: ResourcePathExpression) -> None: 

338 """Load a file from a path or an URI. 

339 

340 Parameters 

341 ---------- 

342 path : `lsst.resources.ResourcePathExpression` 

343 Path or a URI to a persisted config file. 

344 """ 

345 uri = ResourcePath(path) 

346 ext = uri.getExtension() 

347 if ext == ".yaml": 347 ↛ 354line 347 didn't jump to line 354, because the condition on line 347 was never false

348 log.debug("Opening YAML config file: %s", uri.geturl()) 

349 content = uri.read() 

350 # Use a stream so we can name it 

351 stream = io.BytesIO(content) 

352 stream.name = uri.geturl() 

353 self.__initFromYaml(stream) 

354 elif ext == ".json": 

355 log.debug("Opening JSON config file: %s", uri.geturl()) 

356 content = uri.read() 

357 self.__initFromJson(content) 

358 else: 

359 # This URI does not have a valid extension. It might be because 

360 # we ended up with a directory and not a file. Before we complain 

361 # about an extension, do an existence check. No need to do 

362 # the (possibly expensive) existence check in the default code 

363 # path above because we will find out soon enough that the file 

364 # is not there. 

365 if not uri.exists(): 

366 raise FileNotFoundError(f"Config location {uri} does not exist.") 

367 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}") 

368 self.configFile = uri 

369 

370 def __initFromYaml(self, stream): 

371 """Load a YAML config from any readable stream that contains one. 

372 

373 Parameters 

374 ---------- 

375 stream: `IO` or `str` 

376 Stream to pass to the YAML loader. Accepts anything that 

377 `yaml.load` accepts. This can include a string as well as an 

378 IO stream. 

379 

380 Raises 

381 ------ 

382 yaml.YAMLError 

383 If there is an error loading the file. 

384 """ 

385 content = yaml.load(stream, Loader=Loader) 

386 if content is None: 386 ↛ 387line 386 didn't jump to line 387, because the condition on line 386 was never true

387 content = {} 

388 self._data = content 

389 return self 

390 

391 def __initFromJson(self, stream): 

392 """Load a JSON config from any readable stream that contains one. 

393 

394 Parameters 

395 ---------- 

396 stream: `IO` or `str` 

397 Stream to pass to the JSON loader. This can include a string as 

398 well as an IO stream. 

399 

400 Raises 

401 ------ 

402 TypeError: 

403 Raised if there is an error loading the content. 

404 """ 

405 if isinstance(stream, (bytes, str)): 

406 content = json.loads(stream) 

407 else: 

408 content = json.load(stream) 

409 if content is None: 

410 content = {} 

411 self._data = content 

412 return self 

413 

414 def _processExplicitIncludes(self): 

415 """Scan through the configuration searching for the special includes. 

416 

417 Looks for ``includeConfigs`` directive and processes the includes. 

418 """ 

419 # Search paths for config files 

420 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)] 

421 if self.configFile is not None: 421 ↛ 429line 421 didn't jump to line 429, because the condition on line 421 was never false

422 if isinstance(self.configFile, ResourcePath): 422 ↛ 425line 422 didn't jump to line 425, because the condition on line 422 was never false

423 configDir = self.configFile.dirname() 

424 else: 

425 raise RuntimeError(f"Unexpected type for config file: {self.configFile}") 

426 searchPaths.append(configDir) 

427 

428 # Ensure we know what delimiter to use 

429 names = self.nameTuples() 

430 for path in names: 

431 if path[-1] == self.includeKey: 431 ↛ 432line 431 didn't jump to line 432, because the condition on line 431 was never true

432 log.debug("Processing file include directive at %s", self._D + self._D.join(path)) 

433 basePath = path[:-1] 

434 

435 # Extract the includes and then delete them from the config 

436 includes = self[path] 

437 del self[path] 

438 

439 # Be consistent and convert to a list 

440 if not isinstance(includes, list): 

441 includes = [includes] 

442 

443 # Read each file assuming it is a reference to a file 

444 # The file can be relative to config file or cwd 

445 # ConfigSubset search paths are not used 

446 subConfigs = [] 

447 for fileName in includes: 

448 # Expand any shell variables -- this could be URI 

449 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False) 

450 found = None 

451 if fileName.isabs(): 

452 found = fileName 

453 else: 

454 for dir in searchPaths: 

455 if isinstance(dir, ResourcePath): 

456 specific = dir.join(fileName.path) 

457 # Remote resource check might be expensive 

458 if specific.exists(): 

459 found = specific 

460 else: 

461 log.warning( 

462 "Do not understand search path entry '%s' of type %s", 

463 dir, 

464 type(dir).__name__, 

465 ) 

466 if not found: 

467 raise RuntimeError(f"Unable to find referenced include file: {fileName}") 

468 

469 # Read the referenced Config as a Config 

470 subConfigs.append(type(self)(found)) 

471 

472 # Now we need to merge these sub configs with the current 

473 # information that was present in this node in the config 

474 # tree with precedence given to the explicit values 

475 newConfig = subConfigs.pop(0) 

476 for sc in subConfigs: 

477 newConfig.update(sc) 

478 

479 # Explicit values take precedence 

480 if not basePath: 

481 # This is an include at the root config 

482 newConfig.update(self) 

483 # Replace the current config 

484 self._data = newConfig._data 

485 else: 

486 newConfig.update(self[basePath]) 

487 # And reattach to the base config 

488 self[basePath] = newConfig 

489 

490 @staticmethod 

491 def _splitIntoKeys(key): 

492 r"""Split the argument for get/set/in into a hierarchical list. 

493 

494 Parameters 

495 ---------- 

496 key : `str` or iterable 

497 Argument given to get/set/in. If an iterable is provided it will 

498 be converted to a list. If the first character of the string 

499 is not an alphanumeric character then it will be used as the 

500 delimiter for the purposes of splitting the remainder of the 

501 string. If the delimiter is also in one of the keys then it 

502 can be escaped using ``\``. There is no default delimiter. 

503 

504 Returns 

505 ------- 

506 keys : `list` 

507 Hierarchical keys as a `list`. 

508 """ 

509 if isinstance(key, str): 

510 if not key[0].isalnum(): 510 ↛ 511line 510 didn't jump to line 511, because the condition on line 510 was never true

511 d = key[0] 

512 key = key[1:] 

513 else: 

514 return [ 

515 key, 

516 ] 

517 escaped = f"\\{d}" 

518 temp = None 

519 if escaped in key: 

520 # Complain at the attempt to escape the escape 

521 doubled = rf"\{escaped}" 

522 if doubled in key: 

523 raise ValueError( 

524 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported." 

525 ) 

526 # Replace with a character that won't be in the string 

527 temp = "\r" 

528 if temp in key or d == temp: 

529 raise ValueError( 

530 f"Can not use character {temp!r} in hierarchical key or as" 

531 " delimiter if escaping the delimiter" 

532 ) 

533 key = key.replace(escaped, temp) 

534 hierarchy = key.split(d) 

535 if temp: 

536 hierarchy = [h.replace(temp, d) for h in hierarchy] 

537 return hierarchy 

538 elif isinstance(key, collections.abc.Iterable): 538 ↛ 542line 538 didn't jump to line 542, because the condition on line 538 was never false

539 return list(key) 

540 else: 

541 # Not sure what this is so try it anyway 

542 return [ 

543 key, 

544 ] 

545 

546 def _getKeyHierarchy(self, name): 

547 """Retrieve the key hierarchy for accessing the Config. 

548 

549 Parameters 

550 ---------- 

551 name : `str` or `tuple` 

552 Delimited string or `tuple` of hierarchical keys. 

553 

554 Returns 

555 ------- 

556 hierarchy : `list` of `str` 

557 Hierarchy to use as a `list`. If the name is available directly 

558 as a key in the Config it will be used regardless of the presence 

559 of any nominal delimiter. 

560 """ 

561 if name in self._data: 

562 keys = [ 

563 name, 

564 ] 

565 else: 

566 keys = self._splitIntoKeys(name) 

567 return keys 

568 

569 def _findInHierarchy(self, keys, create=False): 

570 """Look for hierarchy of keys in Config. 

571 

572 Parameters 

573 ---------- 

574 keys : `list` or `tuple` 

575 Keys to search in hierarchy. 

576 create : `bool`, optional 

577 If `True`, if a part of the hierarchy does not exist, insert an 

578 empty `dict` into the hierarchy. 

579 

580 Returns 

581 ------- 

582 hierarchy : `list` 

583 List of the value corresponding to each key in the supplied 

584 hierarchy. Only keys that exist in the hierarchy will have 

585 a value. 

586 complete : `bool` 

587 `True` if the full hierarchy exists and the final element 

588 in ``hierarchy`` is the value of relevant value. 

589 """ 

590 d = self._data 

591 

592 # For the first key, d must be a dict so it is a waste 

593 # of time to check for a sequence. 

594 must_be_dict = True 

595 

596 hierarchy = [] 

597 complete = True 

598 for k in keys: 

599 d, isThere = _checkNextItem(k, d, create, must_be_dict) 

600 if isThere: 

601 hierarchy.append(d) 

602 else: 

603 complete = False 

604 break 

605 # Second time round it might be a sequence. 

606 must_be_dict = False 

607 

608 return hierarchy, complete 

609 

610 def __getitem__(self, name): 

611 # Override the split for the simple case where there is an exact 

612 # match. This allows `Config.items()` to work via a simple 

613 # __iter__ implementation that returns top level keys of 

614 # self._data. 

615 

616 # If the name matches a key in the top-level hierarchy, bypass 

617 # all further cleverness. 

618 found_directly = False 

619 try: 

620 data = self._data[name] 

621 found_directly = True 

622 except KeyError: 

623 pass 

624 

625 if not found_directly: 625 ↛ 626line 625 didn't jump to line 626, because the condition on line 625 was never true

626 keys = self._getKeyHierarchy(name) 

627 

628 hierarchy, complete = self._findInHierarchy(keys) 

629 if not complete: 

630 raise KeyError(f"{name} not found") 

631 data = hierarchy[-1] 

632 

633 # In most cases we have a dict, and it's more efficient 

634 # to check for a dict instance before checking the generic mapping. 

635 if isinstance(data, (dict, collections.abc.Mapping)): 

636 data = Config(data) 

637 # Ensure that child configs inherit the parent internal delimiter 

638 if self._D != Config._D: 638 ↛ 639line 638 didn't jump to line 639, because the condition on line 638 was never true

639 data._D = self._D 

640 return data 

641 

642 def __setitem__(self, name, value): 

643 keys = self._getKeyHierarchy(name) 

644 last = keys.pop() 

645 if isinstance(value, Config): 

646 value = copy.deepcopy(value._data) 

647 

648 hierarchy, complete = self._findInHierarchy(keys, create=True) 

649 if hierarchy: 

650 data = hierarchy[-1] 

651 else: 

652 data = self._data 

653 

654 try: 

655 data[last] = value 

656 except TypeError: 

657 data[int(last)] = value 

658 

659 def __contains__(self, key): 

660 keys = self._getKeyHierarchy(key) 

661 hierarchy, complete = self._findInHierarchy(keys) 

662 return complete 

663 

664 def __delitem__(self, key): 

665 keys = self._getKeyHierarchy(key) 

666 last = keys.pop() 

667 hierarchy, complete = self._findInHierarchy(keys) 

668 if complete: 668 ↛ 675line 668 didn't jump to line 675, because the condition on line 668 was never false

669 if hierarchy: 669 ↛ 670line 669 didn't jump to line 670, because the condition on line 669 was never true

670 data = hierarchy[-1] 

671 else: 

672 data = self._data 

673 del data[last] 

674 else: 

675 raise KeyError(f"{key} not found in Config") 

676 

677 def update(self, other): 

678 """Update config from other `Config` or `dict`. 

679 

680 Like `dict.update()`, but will add or modify keys in nested dicts, 

681 instead of overwriting the nested dict entirely. 

682 

683 Parameters 

684 ---------- 

685 other : `dict` or `Config` 

686 Source of configuration: 

687 

688 Examples 

689 -------- 

690 >>> c = Config({"a": {"b": 1}}) 

691 >>> c.update({"a": {"c": 2}}) 

692 >>> print(c) 

693 {'a': {'b': 1, 'c': 2}} 

694 

695 >>> foo = {"a": {"b": 1}} 

696 >>> foo.update({"a": {"c": 2}}) 

697 >>> print(foo) 

698 {'a': {'c': 2}} 

699 """ 

700 _doUpdate(self._data, other) 

701 

702 def merge(self, other): 

703 """Merge another Config into this one. 

704 

705 Like `Config.update()`, but will add keys & values from other that 

706 DO NOT EXIST in self. 

707 

708 Keys and values that already exist in self will NOT be overwritten. 

709 

710 Parameters 

711 ---------- 

712 other : `dict` or `Config` 

713 Source of configuration: 

714 """ 

715 if not isinstance(other, collections.abc.Mapping): 

716 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}") 

717 

718 # Convert the supplied mapping to a Config for consistency 

719 # This will do a deepcopy if it is already a Config 

720 otherCopy = Config(other) 

721 otherCopy.update(self) 

722 self._data = otherCopy._data 

723 

724 def nameTuples(self, topLevelOnly=False): 

725 """Get tuples representing the name hierarchies of all keys. 

726 

727 The tuples returned from this method are guaranteed to be usable 

728 to access items in the configuration object. 

729 

730 Parameters 

731 ---------- 

732 topLevelOnly : `bool`, optional 

733 If False, the default, a full hierarchy of names is returned. 

734 If True, only the top level are returned. 

735 

736 Returns 

737 ------- 

738 names : `list` of `tuple` of `str` 

739 List of all names present in the `Config` where each element 

740 in the list is a `tuple` of strings representing the hierarchy. 

741 """ 

742 if topLevelOnly: 742 ↛ 743line 742 didn't jump to line 743, because the condition on line 742 was never true

743 return list((k,) for k in self) 

744 

745 def getKeysAsTuples(d, keys, base): 

746 if isinstance(d, collections.abc.Sequence): 

747 theseKeys = range(len(d)) 

748 else: 

749 theseKeys = d.keys() 

750 for key in theseKeys: 

751 val = d[key] 

752 levelKey = base + (key,) if base is not None else (key,) 

753 keys.append(levelKey) 

754 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) and not isinstance( 

755 val, str 

756 ): 

757 getKeysAsTuples(val, keys, levelKey) 

758 

759 keys: List[Tuple[str, ...]] = [] 

760 getKeysAsTuples(self._data, keys, None) 

761 return keys 

762 

763 def names(self, topLevelOnly=False, delimiter=None): 

764 """Get a delimited name of all the keys in the hierarchy. 

765 

766 The values returned from this method are guaranteed to be usable 

767 to access items in the configuration object. 

768 

769 Parameters 

770 ---------- 

771 topLevelOnly : `bool`, optional 

772 If False, the default, a full hierarchy of names is returned. 

773 If True, only the top level are returned. 

774 delimiter : `str`, optional 

775 Delimiter to use when forming the keys. If the delimiter is 

776 present in any of the keys, it will be escaped in the returned 

777 names. If `None` given a delimiter will be automatically provided. 

778 The delimiter can not be alphanumeric. 

779 

780 Returns 

781 ------- 

782 names : `list` of `str` 

783 List of all names present in the `Config`. 

784 

785 Notes 

786 ----- 

787 This is different than the built-in method `dict.keys`, which will 

788 return only the first level keys. 

789 

790 Raises 

791 ------ 

792 ValueError: 

793 The supplied delimiter is alphanumeric. 

794 """ 

795 if topLevelOnly: 

796 return list(self.keys()) 

797 

798 # Get all the tuples of hierarchical keys 

799 nameTuples = self.nameTuples() 

800 

801 if delimiter is not None and delimiter.isalnum(): 

802 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.") 

803 

804 if delimiter is None: 

805 # Start with something, and ensure it does not need to be 

806 # escaped (it is much easier to understand if not escaped) 

807 delimiter = self._D 

808 

809 # Form big string for easy check of delimiter clash 

810 combined = "".join("".join(str(s) for s in k) for k in nameTuples) 

811 

812 # Try a delimiter and keep trying until we get something that 

813 # works. 

814 ntries = 0 

815 while delimiter in combined: 

816 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter) 

817 ntries += 1 

818 

819 if ntries > 100: 

820 raise ValueError(f"Unable to determine a delimiter for Config {self}") 

821 

822 # try another one 

823 while True: 

824 delimiter = chr(ord(delimiter) + 1) 

825 if not delimiter.isalnum(): 

826 break 

827 

828 log.debug("Using delimiter %r", delimiter) 

829 

830 # Form the keys, escaping the delimiter if necessary 

831 strings = [ 

832 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k) 

833 for k in nameTuples 

834 ] 

835 return strings 

836 

837 def asArray(self, name): 

838 """Get a value as an array. 

839 

840 May contain one or more elements. 

841 

842 Parameters 

843 ---------- 

844 name : `str` 

845 Key to use to retrieve value. 

846 

847 Returns 

848 ------- 

849 array : `collections.abc.Sequence` 

850 The value corresponding to name, but guaranteed to be returned 

851 as a list with at least one element. If the value is a 

852 `~collections.abc.Sequence` (and not a `str`) the value itself 

853 will be returned, else the value will be the first element. 

854 """ 

855 val = self.get(name) 

856 if isinstance(val, str): 

857 val = [val] 

858 elif not isinstance(val, collections.abc.Sequence): 

859 val = [val] 

860 return val 

861 

862 def __eq__(self, other): 

863 if isinstance(other, Config): 

864 other = other._data 

865 return self._data == other 

866 

867 def __ne__(self, other): 

868 if isinstance(other, Config): 

869 other = other._data 

870 return self._data != other 

871 

872 ####### 

873 # i/o # 

874 

875 def dump(self, output: Optional[IO] = None, format: str = "yaml") -> Optional[str]: 

876 """Write the config to an output stream. 

877 

878 Parameters 

879 ---------- 

880 output : `IO`, optional 

881 The stream to use for output. If `None` the serialized content 

882 will be returned. 

883 format : `str`, optional 

884 The format to use for the output. Can be "yaml" or "json". 

885 

886 Returns 

887 ------- 

888 serialized : `str` or `None` 

889 If a stream was given the stream will be used and the return 

890 value will be `None`. If the stream was `None` the 

891 serialization will be returned as a string. 

892 """ 

893 if format == "yaml": 

894 return yaml.safe_dump(self._data, output, default_flow_style=False) 

895 elif format == "json": 

896 if output is not None: 

897 json.dump(self._data, output, ensure_ascii=False) 

898 return None 

899 else: 

900 return json.dumps(self._data, ensure_ascii=False) 

901 raise ValueError(f"Unsupported format for Config serialization: {format}") 

902 

903 def dumpToUri( 

904 self, 

905 uri: ResourcePathExpression, 

906 updateFile: bool = True, 

907 defaultFileName: str = "butler.yaml", 

908 overwrite: bool = True, 

909 ) -> None: 

910 """Write the config to location pointed to by given URI. 

911 

912 Currently supports 's3' and 'file' URI schemes. 

913 

914 Parameters 

915 ---------- 

916 uri: `lsst.resources.ResourcePathExpression` 

917 URI of location where the Config will be written. 

918 updateFile : bool, optional 

919 If True and uri does not end on a filename with extension, will 

920 append `defaultFileName` to the target uri. True by default. 

921 defaultFileName : bool, optional 

922 The file name that will be appended to target uri if updateFile is 

923 True and uri does not end on a file with an extension. 

924 overwrite : bool, optional 

925 If True the configuration will be written even if it already 

926 exists at that location. 

927 """ 

928 # Make local copy of URI or create new one 

929 uri = ResourcePath(uri) 

930 

931 if updateFile and not uri.getExtension(): 

932 uri = uri.updatedFile(defaultFileName) 

933 

934 # Try to work out the format from the extension 

935 ext = uri.getExtension() 

936 format = ext[1:].lower() 

937 

938 output = self.dump(format=format) 

939 assert output is not None, "Config.dump guarantees not-None return when output arg is None" 

940 uri.write(output.encode(), overwrite=overwrite) 

941 self.configFile = uri 

942 

943 @staticmethod 

944 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True, toMerge=None): 

945 """Update specific config parameters. 

946 

947 Allows for named parameters to be set to new values in bulk, and 

948 for other values to be set by copying from a reference config. 

949 

950 Assumes that the supplied config is compatible with ``configType`` 

951 and will attach the updated values to the supplied config by 

952 looking for the related component key. It is assumed that 

953 ``config`` and ``full`` are from the same part of the 

954 configuration hierarchy. 

955 

956 Parameters 

957 ---------- 

958 configType : `ConfigSubset` 

959 Config type to use to extract relevant items from ``config``. 

960 config : `Config` 

961 A `Config` to update. Only the subset understood by 

962 the supplied `ConfigSubset` will be modified. Default values 

963 will not be inserted and the content will not be validated 

964 since mandatory keys are allowed to be missing until 

965 populated later by merging. 

966 full : `Config` 

967 A complete config with all defaults expanded that can be 

968 converted to a ``configType``. Read-only and will not be 

969 modified by this method. Values are read from here if 

970 ``toCopy`` is defined. 

971 

972 Repository-specific options that should not be obtained 

973 from defaults when Butler instances are constructed 

974 should be copied from ``full`` to ``config``. 

975 toUpdate : `dict`, optional 

976 A `dict` defining the keys to update and the new value to use. 

977 The keys and values can be any supported by `Config` 

978 assignment. 

979 toCopy : `tuple`, optional 

980 `tuple` of keys whose values should be copied from ``full`` 

981 into ``config``. 

982 overwrite : `bool`, optional 

983 If `False`, do not modify a value in ``config`` if the key 

984 already exists. Default is always to overwrite. 

985 toMerge : `tuple`, optional 

986 Keys to merge content from full to config without overwriting 

987 pre-existing values. Only works if the key refers to a hierarchy. 

988 The ``overwrite`` flag is ignored. 

989 

990 Raises 

991 ------ 

992 ValueError 

993 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined. 

994 """ 

995 if toUpdate is None and toCopy is None and toMerge is None: 

996 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.") 

997 

998 # If this is a parent configuration then we need to ensure that 

999 # the supplied config has the relevant component key in it. 

1000 # If this is a parent configuration we add in the stub entry 

1001 # so that the ConfigSubset constructor will do the right thing. 

1002 # We check full for this since that is guaranteed to be complete. 

1003 if configType.component in full and configType.component not in config: 

1004 config[configType.component] = {} 

1005 

1006 # Extract the part of the config we wish to update 

1007 localConfig = configType(config, mergeDefaults=False, validate=False) 

1008 

1009 if toUpdate: 

1010 for key, value in toUpdate.items(): 

1011 if key in localConfig and not overwrite: 

1012 log.debug( 

1013 "Not overriding key '%s' with value '%s' in config %s", 

1014 key, 

1015 value, 

1016 localConfig.__class__.__name__, 

1017 ) 

1018 else: 

1019 localConfig[key] = value 

1020 

1021 if toCopy or toMerge: 

1022 localFullConfig = configType(full, mergeDefaults=False) 

1023 

1024 if toCopy: 

1025 for key in toCopy: 

1026 if key in localConfig and not overwrite: 

1027 log.debug( 

1028 "Not overriding key '%s' from defaults in config %s", 

1029 key, 

1030 localConfig.__class__.__name__, 

1031 ) 

1032 else: 

1033 localConfig[key] = localFullConfig[key] 

1034 if toMerge: 

1035 for key in toMerge: 

1036 if key in localConfig: 

1037 # Get the node from the config to do the merge 

1038 # but then have to reattach to the config. 

1039 subset = localConfig[key] 

1040 subset.merge(localFullConfig[key]) 

1041 localConfig[key] = subset 

1042 else: 

1043 localConfig[key] = localFullConfig[key] 

1044 

1045 # Reattach to parent if this is a child config 

1046 if configType.component in config: 

1047 config[configType.component] = localConfig 

1048 else: 

1049 config.update(localConfig) 

1050 

1051 def toDict(self): 

1052 """Convert a `Config` to a standalone hierarchical `dict`. 

1053 

1054 Returns 

1055 ------- 

1056 d : `dict` 

1057 The standalone hierarchical `dict` with any `Config` classes 

1058 in the hierarchy converted to `dict`. 

1059 

1060 Notes 

1061 ----- 

1062 This can be useful when passing a Config to some code that 

1063 expects native Python types. 

1064 """ 

1065 output = copy.deepcopy(self._data) 

1066 for k, v in output.items(): 

1067 if isinstance(v, Config): 1067 ↛ 1068line 1067 didn't jump to line 1068, because the condition on line 1067 was never true

1068 v = v.toDict() 

1069 output[k] = v 

1070 return output 

1071 

1072 

1073class ConfigSubset(Config): 

1074 """Config representing a subset of a more general configuration. 

1075 

1076 Subclasses define their own component and when given a configuration 

1077 that includes that component, the resulting configuration only includes 

1078 the subset. For example, your config might contain ``dimensions`` if it's 

1079 part of a global config and that subset will be stored. If ``dimensions`` 

1080 can not be found it is assumed that the entire contents of the 

1081 configuration should be used. 

1082 

1083 Default values are read from the environment or supplied search paths 

1084 using the default configuration file name specified in the subclass. 

1085 This allows a configuration class to be instantiated without any 

1086 additional arguments. 

1087 

1088 Additional validation can be specified to check for keys that are mandatory 

1089 in the configuration. 

1090 

1091 Parameters 

1092 ---------- 

1093 other : `Config` or `str` or `dict` 

1094 Argument specifying the configuration information as understood 

1095 by `Config` 

1096 validate : `bool`, optional 

1097 If `True` required keys will be checked to ensure configuration 

1098 consistency. 

1099 mergeDefaults : `bool`, optional 

1100 If `True` defaults will be read and the supplied config will 

1101 be combined with the defaults, with the supplied values taking 

1102 precedence. 

1103 searchPaths : `list` or `tuple`, optional 

1104 Explicit additional paths to search for defaults. They should 

1105 be supplied in priority order. These paths have higher priority 

1106 than those read from the environment in 

1107 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to 

1108 the local file system or URIs, `lsst.resources.ResourcePath`. 

1109 """ 

1110 

1111 component: ClassVar[Optional[str]] = None 

1112 """Component to use from supplied config. Can be None. If specified the 

1113 key is not required. Can be a full dot-separated path to a component. 

1114 """ 

1115 

1116 requiredKeys: ClassVar[Sequence[str]] = () 

1117 """Keys that are required to be specified in the configuration. 

1118 """ 

1119 

1120 defaultConfigFile: ClassVar[Optional[str]] = None 

1121 """Name of the file containing defaults for this config class. 

1122 """ 

1123 

1124 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None): 

1125 # Create a blank object to receive the defaults 

1126 # Once we have the defaults we then update with the external values 

1127 super().__init__() 

1128 

1129 # Create a standard Config rather than subset 

1130 externalConfig = Config(other) 

1131 

1132 # Select the part we need from it 

1133 # To simplify the use of !include we also check for the existence of 

1134 # component.component (since the included files can themselves 

1135 # include the component name) 

1136 if self.component is not None: 1136 ↛ 1145line 1136 didn't jump to line 1145, because the condition on line 1136 was never false

1137 doubled = (self.component, self.component) 

1138 # Must check for double depth first 

1139 if doubled in externalConfig: 1139 ↛ 1140line 1139 didn't jump to line 1140, because the condition on line 1139 was never true

1140 externalConfig = externalConfig[doubled] 

1141 elif self.component in externalConfig: 

1142 externalConfig._data = externalConfig._data[self.component] 

1143 

1144 # Default files read to create this configuration 

1145 self.filesRead = [] 

1146 

1147 # Assume we are not looking up child configurations 

1148 containerKey = None 

1149 

1150 # Sometimes we do not want to merge with defaults. 

1151 if mergeDefaults: 

1152 # Supplied search paths have highest priority 

1153 fullSearchPath = [] 

1154 if searchPaths: 1154 ↛ 1155line 1154 didn't jump to line 1155, because the condition on line 1154 was never true

1155 fullSearchPath.extend(searchPaths) 

1156 

1157 # Read default paths from environment 

1158 fullSearchPath.extend(self.defaultSearchPaths()) 

1159 

1160 # There are two places to find defaults for this particular config 

1161 # - The "defaultConfigFile" defined in the subclass 

1162 # - The class specified in the "cls" element in the config. 

1163 # Read cls after merging in case it changes. 

1164 if self.defaultConfigFile is not None: 1164 ↛ 1169line 1164 didn't jump to line 1169, because the condition on line 1164 was never false

1165 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile) 

1166 

1167 # Can have a class specification in the external config (priority) 

1168 # or from the defaults. 

1169 pytype = None 

1170 if "cls" in externalConfig: 1170 ↛ 1171line 1170 didn't jump to line 1171, because the condition on line 1170 was never true

1171 pytype = externalConfig["cls"] 

1172 elif "cls" in self: 1172 ↛ 1173line 1172 didn't jump to line 1173, because the condition on line 1172 was never true

1173 pytype = self["cls"] 

1174 

1175 if pytype is not None: 1175 ↛ 1176line 1175 didn't jump to line 1176, because the condition on line 1175 was never true

1176 try: 

1177 cls = doImport(pytype) 

1178 except ImportError as e: 

1179 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e 

1180 defaultsFile = cls.defaultConfigFile 

1181 if defaultsFile is not None: 

1182 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile) 

1183 

1184 # Get the container key in case we need it 

1185 try: 

1186 containerKey = cls.containerKey 

1187 except AttributeError: 

1188 pass 

1189 

1190 # Now update this object with the external values so that the external 

1191 # values always override the defaults 

1192 self.update(externalConfig) 

1193 if not self.configFile: 1193 ↛ 1199line 1193 didn't jump to line 1199, because the condition on line 1193 was never false

1194 self.configFile = externalConfig.configFile 

1195 

1196 # If this configuration has child configurations of the same 

1197 # config class, we need to expand those defaults as well. 

1198 

1199 if mergeDefaults and containerKey is not None and containerKey in self: 1199 ↛ 1200line 1199 didn't jump to line 1200, because the condition on line 1199 was never true

1200 for idx, subConfig in enumerate(self[containerKey]): 

1201 self[containerKey, idx] = type(self)( 

1202 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths 

1203 ) 

1204 

1205 if validate: 

1206 self.validate() 

1207 

1208 @classmethod 

1209 def defaultSearchPaths(cls): 

1210 """Read environment to determine search paths to use. 

1211 

1212 Global defaults, at lowest priority, are found in the ``config`` 

1213 directory of the butler source tree. Additional defaults can be 

1214 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS`` 

1215 which is a PATH-like variable where paths at the front of the list 

1216 have priority over those later. 

1217 

1218 Returns 

1219 ------- 

1220 paths : `list` 

1221 Returns a list of paths to search. The returned order is in 

1222 priority with the highest priority paths first. The butler config 

1223 configuration resources will not be included here but will 

1224 always be searched last. 

1225 

1226 Notes 

1227 ----- 

1228 The environment variable is split on the standard ``:`` path separator. 

1229 This currently makes it incompatible with usage of URIs. 

1230 """ 

1231 # We can pick up defaults from multiple search paths 

1232 # We fill defaults by using the butler config path and then 

1233 # the config path environment variable in reverse order. 

1234 defaultsPaths: List[Union[str, ResourcePath]] = [] 

1235 

1236 if CONFIG_PATH in os.environ: 1236 ↛ 1237line 1236 didn't jump to line 1237, because the condition on line 1236 was never true

1237 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep) 

1238 defaultsPaths.extend(externalPaths) 

1239 

1240 # Add the package defaults as a resource 

1241 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True)) 

1242 return defaultsPaths 

1243 

1244 def _updateWithConfigsFromPath(self, searchPaths, configFile): 

1245 """Search the supplied paths, merging the configuration values. 

1246 

1247 The values read will override values currently stored in the object. 

1248 Every file found in the path will be read, such that the earlier 

1249 path entries have higher priority. 

1250 

1251 Parameters 

1252 ---------- 

1253 searchPaths : `list` of `lsst.resources.ResourcePath`, `str` 

1254 Paths to search for the supplied configFile. This path 

1255 is the priority order, such that files read from the 

1256 first path entry will be selected over those read from 

1257 a later path. Can contain `str` referring to the local file 

1258 system or a URI string. 

1259 configFile : `lsst.resources.ResourcePath` 

1260 File to locate in path. If absolute path it will be read 

1261 directly and the search path will not be used. Can be a URI 

1262 to an explicit resource (which will ignore the search path) 

1263 which is assumed to exist. 

1264 """ 

1265 uri = ResourcePath(configFile) 

1266 if uri.isabs() and uri.exists(): 1266 ↛ 1268line 1266 didn't jump to line 1268, because the condition on line 1266 was never true

1267 # Assume this resource exists 

1268 self._updateWithOtherConfigFile(configFile) 

1269 self.filesRead.append(configFile) 

1270 else: 

1271 # Reverse order so that high priority entries 

1272 # update the object last. 

1273 for pathDir in reversed(searchPaths): 

1274 if isinstance(pathDir, (str, ResourcePath)): 1274 ↛ 1281line 1274 didn't jump to line 1281, because the condition on line 1274 was never false

1275 pathDir = ResourcePath(pathDir, forceDirectory=True) 

1276 file = pathDir.join(configFile) 

1277 if file.exists(): 1277 ↛ 1273line 1277 didn't jump to line 1273, because the condition on line 1277 was never false

1278 self.filesRead.append(file) 

1279 self._updateWithOtherConfigFile(file) 

1280 else: 

1281 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}") 

1282 

1283 def _updateWithOtherConfigFile(self, file): 

1284 """Read in some defaults and update. 

1285 

1286 Update the configuration by reading the supplied file as a config 

1287 of this class, and merging such that these values override the 

1288 current values. Contents of the external config are not validated. 

1289 

1290 Parameters 

1291 ---------- 

1292 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict` 

1293 Entity that can be converted to a `ConfigSubset`. 

1294 """ 

1295 # Use this class to read the defaults so that subsetting can happen 

1296 # correctly. 

1297 externalConfig = type(self)(file, validate=False, mergeDefaults=False) 

1298 self.update(externalConfig) 

1299 

1300 def validate(self): 

1301 """Check that mandatory keys are present in this configuration. 

1302 

1303 Ignored if ``requiredKeys`` is empty. 

1304 """ 

1305 # Validation 

1306 missing = [k for k in self.requiredKeys if k not in self._data] 

1307 if missing: 1307 ↛ 1308line 1307 didn't jump to line 1308, because the condition on line 1307 was never true

1308 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")