Coverage for python/lsst/daf/butler/core/config.py: 44%

482 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-26 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Configuration control.""" 

25 

26__all__ = ("Config", "ConfigSubset") 

27 

28import collections 

29import copy 

30import io 

31import json 

32import logging 

33import os 

34import pprint 

35import sys 

36from pathlib import Path 

37from typing import IO, Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Union 

38 

39import yaml 

40from lsst.resources import ResourcePath, ResourcePathExpression 

41from lsst.utils import doImport 

42from yaml.representer import Representer 

43 

44yaml.add_representer(collections.defaultdict, Representer.represent_dict) 

45 

46 

47# Config module logger 

48log = logging.getLogger(__name__) 

49 

50# PATH-like environment variable to use for defaults. 

51CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH" 

52 

53try: 

54 yamlLoader = yaml.CSafeLoader 

55except AttributeError: 

56 # Not all installations have the C library 

57 # (but assume for mypy's sake that they're the same) 

58 yamlLoader = yaml.SafeLoader # type: ignore 

59 

60 

61def _doUpdate(d, u): 

62 if not isinstance(u, collections.abc.Mapping) or not isinstance(d, collections.abc.MutableMapping): 62 ↛ 63line 62 didn't jump to line 63, because the condition on line 62 was never true

63 raise RuntimeError("Only call update with Mapping, not {}".format(type(d))) 

64 for k, v in u.items(): 

65 if isinstance(v, collections.abc.Mapping): 

66 d[k] = _doUpdate(d.get(k, {}), v) 

67 else: 

68 d[k] = v 

69 return d 

70 

71 

72def _checkNextItem(k, d, create, must_be_dict): 

73 """See if k is in d and if it is return the new child.""" 

74 nextVal = None 

75 isThere = False 

76 if d is None: 76 ↛ 78line 76 didn't jump to line 78, because the condition on line 76 was never true

77 # We have gone past the end of the hierarchy 

78 pass 

79 elif not must_be_dict and isinstance(d, collections.abc.Sequence): 79 ↛ 84line 79 didn't jump to line 84, because the condition on line 79 was never true

80 # Check for Sequence first because for lists 

81 # __contains__ checks whether value is found in list 

82 # not whether the index exists in list. When we traverse 

83 # the hierarchy we are interested in the index. 

84 try: 

85 nextVal = d[int(k)] 

86 isThere = True 

87 except IndexError: 

88 pass 

89 except ValueError: 

90 isThere = k in d 

91 elif k in d: 

92 nextVal = d[k] 

93 isThere = True 

94 elif create: 94 ↛ 95line 94 didn't jump to line 95, because the condition on line 94 was never true

95 d[k] = {} 

96 nextVal = d[k] 

97 isThere = True 

98 

99 return nextVal, isThere 

100 

101 

102class Loader(yamlLoader): 

103 """YAML Loader that supports file include directives. 

104 

105 Uses ``!include`` directive in a YAML file to point to another 

106 YAML file to be included. The path in the include directive is relative 

107 to the file containing that directive. 

108 

109 storageClasses: !include storageClasses.yaml 

110 

111 Examples 

112 -------- 

113 >>> with open("document.yaml", "r") as f: 

114 data = yaml.load(f, Loader=Loader) 

115 

116 Notes 

117 ----- 

118 See https://davidchall.github.io/yaml-includes.html 

119 """ 

120 

121 def __init__(self, stream): 

122 super().__init__(stream) 

123 # if this is a string and not a stream we may well lack a name 

124 try: 

125 self._root = ResourcePath(stream.name) 

126 except AttributeError: 

127 # No choice but to assume a local filesystem 

128 self._root = ResourcePath("no-file.yaml") 

129 Loader.add_constructor("!include", Loader.include) 

130 

131 def include(self, node): 

132 result: Union[List[Any], Dict[str, Any]] 

133 if isinstance(node, yaml.ScalarNode): 

134 return self.extractFile(self.construct_scalar(node)) 

135 

136 elif isinstance(node, yaml.SequenceNode): 

137 result = [] 

138 for filename in self.construct_sequence(node): 

139 result.append(self.extractFile(filename)) 

140 return result 

141 

142 elif isinstance(node, yaml.MappingNode): 

143 result = {} 

144 for k, v in self.construct_mapping(node).items(): 

145 result[k] = self.extractFile(v) 

146 return result 

147 

148 else: 

149 print("Error:: unrecognised node type in !include statement", file=sys.stderr) 

150 raise yaml.constructor.ConstructorError 

151 

152 def extractFile(self, filename): 

153 # It is possible for the !include to point to an explicit URI 

154 # instead of a relative URI, therefore we first see if it is 

155 # scheme-less or not. If it has a scheme we use it directly 

156 # if it is scheme-less we use it relative to the file root. 

157 requesteduri = ResourcePath(filename, forceAbsolute=False) 

158 

159 if requesteduri.scheme: 

160 fileuri = requesteduri 

161 else: 

162 fileuri = self._root.updatedFile(filename) 

163 

164 log.debug("Opening YAML file via !include: %s", fileuri) 

165 

166 # Read all the data from the resource 

167 data = fileuri.read() 

168 

169 # Store the bytes into a BytesIO so we can attach a .name 

170 stream = io.BytesIO(data) 

171 stream.name = fileuri.geturl() 

172 return yaml.load(stream, Loader) 

173 

174 

175class Config(collections.abc.MutableMapping): 

176 r"""Implements a datatype that is used by `Butler` for configuration. 

177 

178 It is essentially a `dict` with key/value pairs, including nested dicts 

179 (as values). In fact, it can be initialized with a `dict`. 

180 This is explained next: 

181 

182 Config extends the `dict` api so that hierarchical values may be accessed 

183 with delimited notation or as a tuple. If a string is given the delimiter 

184 is picked up from the first character in that string. For example, 

185 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``, 

186 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome. 

187 If the first character is alphanumeric, no delimiter will be used. 

188 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``. 

189 Unicode characters can be used as the delimiter for distinctiveness if 

190 required. 

191 

192 If a key in the hierarchy starts with a non-alphanumeric character care 

193 should be used to ensure that either the tuple interface is used or 

194 a distinct delimiter is always given in string form. 

195 

196 Finally, the delimiter can be escaped if it is part of a key and also 

197 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in 

198 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is 

199 always better to use a different delimiter in these cases. 

200 

201 Note that adding a multi-level key implicitly creates any nesting levels 

202 that do not exist, but removing multi-level keys does not automatically 

203 remove empty nesting levels. As a result: 

204 

205 >>> c = Config() 

206 >>> c[".a.b"] = 1 

207 >>> del c[".a.b"] 

208 >>> c["a"] 

209 Config({'a': {}}) 

210 

211 Storage formats supported: 

212 

213 - yaml: read and write is supported. 

214 - json: read and write is supported but no ``!include`` directive. 

215 

216 Parameters 

217 ---------- 

218 other : `lsst.resources.ResourcePath` or `Config` or `dict` 

219 Other source of configuration, can be: 

220 

221 - (`lsst.resources.ResourcePathExpression`) 

222 Treated as a URI to a config file. Must end with ".yaml". 

223 - (`Config`) Copies the other Config's values into this one. 

224 - (`dict`) Copies the values from the dict into this Config. 

225 

226 If `None` is provided an empty `Config` will be created. 

227 """ 

228 

229 _D: str = "→" 

230 """Default internal delimiter to use for components in the hierarchy when 

231 constructing keys for external use (see `Config.names()`).""" 

232 

233 includeKey: ClassVar[str] = "includeConfigs" 

234 """Key used to indicate that another config should be included at this 

235 part of the hierarchy.""" 

236 

237 resourcesPackage: str = "lsst.daf.butler" 

238 """Package to search for default configuration data. The resources 

239 themselves will be within a ``configs`` resource hierarchy.""" 

240 

241 def __init__(self, other=None): 

242 self._data: Dict[str, Any] = {} 

243 self.configFile = None 

244 

245 if other is None: 

246 return 

247 

248 if isinstance(other, Config): 

249 # Deep copy might be more efficient but if someone has overridden 

250 # a config entry to store a complex object then deep copy may 

251 # fail. Safer to use update(). 

252 self.update(other._data) 

253 self.configFile = other.configFile 

254 elif isinstance(other, (dict, collections.abc.Mapping)): 

255 # In most cases we have a dict, and it's more efficient 

256 # to check for a dict instance before checking the generic mapping. 

257 self.update(other) 

258 elif isinstance(other, (str, ResourcePath, Path)): 258 ↛ 265line 258 didn't jump to line 265, because the condition on line 258 was never false

259 # if other is a string, assume it is a file path/URI 

260 self.__initFromUri(other) 

261 self._processExplicitIncludes() 

262 else: 

263 # if the config specified by other could not be recognized raise 

264 # a runtime error. 

265 raise RuntimeError(f"A Config could not be loaded from other: {other}") 

266 

267 def ppprint(self): 

268 """Return config as formatted readable string. 

269 

270 Examples 

271 -------- 

272 use: ``pdb> print(myConfigObject.ppprint())`` 

273 

274 Returns 

275 ------- 

276 s : `str` 

277 A prettyprint formatted string representing the config 

278 """ 

279 return pprint.pformat(self._data, indent=2, width=1) 

280 

281 def __repr__(self): 

282 return f"{type(self).__name__}({self._data!r})" 

283 

284 def __str__(self): 

285 return self.ppprint() 

286 

287 def __len__(self): 

288 return len(self._data) 

289 

290 def __iter__(self): 

291 return iter(self._data) 

292 

293 def copy(self): 

294 return type(self)(self) 

295 

296 @classmethod 

297 def fromString(cls, string: str, format: str = "yaml") -> Config: 

298 """Create a new Config instance from a serialized string. 

299 

300 Parameters 

301 ---------- 

302 string : `str` 

303 String containing content in specified format 

304 format : `str`, optional 

305 Format of the supplied string. Can be ``json`` or ``yaml``. 

306 

307 Returns 

308 ------- 

309 c : `Config` 

310 Newly-constructed Config. 

311 """ 

312 if format == "yaml": 

313 new_config = cls().__initFromYaml(string) 

314 elif format == "json": 

315 new_config = cls().__initFromJson(string) 

316 else: 

317 raise ValueError(f"Unexpected format of string: {format}") 

318 new_config._processExplicitIncludes() 

319 return new_config 

320 

321 @classmethod 

322 def fromYaml(cls, string: str) -> Config: 

323 """Create a new Config instance from a YAML string. 

324 

325 Parameters 

326 ---------- 

327 string : `str` 

328 String containing content in YAML format 

329 

330 Returns 

331 ------- 

332 c : `Config` 

333 Newly-constructed Config. 

334 """ 

335 return cls.fromString(string, format="yaml") 

336 

337 def __initFromUri(self, path: ResourcePathExpression) -> None: 

338 """Load a file from a path or an URI. 

339 

340 Parameters 

341 ---------- 

342 path : `lsst.resources.ResourcePathExpression` 

343 Path or a URI to a persisted config file. 

344 """ 

345 uri = ResourcePath(path) 

346 ext = uri.getExtension() 

347 if ext == ".yaml": 347 ↛ 354line 347 didn't jump to line 354, because the condition on line 347 was never false

348 log.debug("Opening YAML config file: %s", uri.geturl()) 

349 content = uri.read() 

350 # Use a stream so we can name it 

351 stream = io.BytesIO(content) 

352 stream.name = uri.geturl() 

353 self.__initFromYaml(stream) 

354 elif ext == ".json": 

355 log.debug("Opening JSON config file: %s", uri.geturl()) 

356 content = uri.read() 

357 self.__initFromJson(content) 

358 else: 

359 # This URI does not have a valid extension. It might be because 

360 # we ended up with a directory and not a file. Before we complain 

361 # about an extension, do an existence check. No need to do 

362 # the (possibly expensive) existence check in the default code 

363 # path above because we will find out soon enough that the file 

364 # is not there. 

365 if not uri.exists(): 

366 raise FileNotFoundError(f"Config location {uri} does not exist.") 

367 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}") 

368 self.configFile = uri 

369 

370 def __initFromYaml(self, stream): 

371 """Load a YAML config from any readable stream that contains one. 

372 

373 Parameters 

374 ---------- 

375 stream: `IO` or `str` 

376 Stream to pass to the YAML loader. Accepts anything that 

377 `yaml.load` accepts. This can include a string as well as an 

378 IO stream. 

379 

380 Raises 

381 ------ 

382 yaml.YAMLError 

383 If there is an error loading the file. 

384 """ 

385 content = yaml.load(stream, Loader=Loader) 

386 if content is None: 386 ↛ 387line 386 didn't jump to line 387, because the condition on line 386 was never true

387 content = {} 

388 self._data = content 

389 return self 

390 

391 def __initFromJson(self, stream): 

392 """Load a JSON config from any readable stream that contains one. 

393 

394 Parameters 

395 ---------- 

396 stream: `IO` or `str` 

397 Stream to pass to the JSON loader. This can include a string as 

398 well as an IO stream. 

399 

400 Raises 

401 ------ 

402 TypeError: 

403 Raised if there is an error loading the content. 

404 """ 

405 if isinstance(stream, (bytes, str)): 

406 content = json.loads(stream) 

407 else: 

408 content = json.load(stream) 

409 if content is None: 

410 content = {} 

411 self._data = content 

412 return self 

413 

414 def _processExplicitIncludes(self): 

415 """Scan through the configuration searching for the special includes. 

416 

417 Looks for ``includeConfigs`` directive and processes the includes. 

418 """ 

419 # Search paths for config files 

420 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)] 

421 if self.configFile is not None: 421 ↛ 429line 421 didn't jump to line 429, because the condition on line 421 was never false

422 if isinstance(self.configFile, ResourcePath): 422 ↛ 425line 422 didn't jump to line 425, because the condition on line 422 was never false

423 configDir = self.configFile.dirname() 

424 else: 

425 raise RuntimeError(f"Unexpected type for config file: {self.configFile}") 

426 searchPaths.append(configDir) 

427 

428 # Ensure we know what delimiter to use 

429 names = self.nameTuples() 

430 for path in names: 

431 if path[-1] == self.includeKey: 431 ↛ 433line 431 didn't jump to line 433, because the condition on line 431 was never true

432 

433 log.debug("Processing file include directive at %s", self._D + self._D.join(path)) 

434 basePath = path[:-1] 

435 

436 # Extract the includes and then delete them from the config 

437 includes = self[path] 

438 del self[path] 

439 

440 # Be consistent and convert to a list 

441 if not isinstance(includes, list): 

442 includes = [includes] 

443 

444 # Read each file assuming it is a reference to a file 

445 # The file can be relative to config file or cwd 

446 # ConfigSubset search paths are not used 

447 subConfigs = [] 

448 for fileName in includes: 

449 # Expand any shell variables -- this could be URI 

450 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False) 

451 found = None 

452 if fileName.isabs(): 

453 found = fileName 

454 else: 

455 for dir in searchPaths: 

456 if isinstance(dir, ResourcePath): 

457 specific = dir.join(fileName.path) 

458 # Remote resource check might be expensive 

459 if specific.exists(): 

460 found = specific 

461 else: 

462 log.warning( 

463 "Do not understand search path entry '%s' of type %s", 

464 dir, 

465 type(dir).__name__, 

466 ) 

467 if not found: 

468 raise RuntimeError(f"Unable to find referenced include file: {fileName}") 

469 

470 # Read the referenced Config as a Config 

471 subConfigs.append(type(self)(found)) 

472 

473 # Now we need to merge these sub configs with the current 

474 # information that was present in this node in the config 

475 # tree with precedence given to the explicit values 

476 newConfig = subConfigs.pop(0) 

477 for sc in subConfigs: 

478 newConfig.update(sc) 

479 

480 # Explicit values take precedence 

481 if not basePath: 

482 # This is an include at the root config 

483 newConfig.update(self) 

484 # Replace the current config 

485 self._data = newConfig._data 

486 else: 

487 newConfig.update(self[basePath]) 

488 # And reattach to the base config 

489 self[basePath] = newConfig 

490 

491 @staticmethod 

492 def _splitIntoKeys(key): 

493 r"""Split the argument for get/set/in into a hierarchical list. 

494 

495 Parameters 

496 ---------- 

497 key : `str` or iterable 

498 Argument given to get/set/in. If an iterable is provided it will 

499 be converted to a list. If the first character of the string 

500 is not an alphanumeric character then it will be used as the 

501 delimiter for the purposes of splitting the remainder of the 

502 string. If the delimiter is also in one of the keys then it 

503 can be escaped using ``\``. There is no default delimiter. 

504 

505 Returns 

506 ------- 

507 keys : `list` 

508 Hierarchical keys as a `list`. 

509 """ 

510 if isinstance(key, str): 

511 if not key[0].isalnum(): 511 ↛ 512line 511 didn't jump to line 512, because the condition on line 511 was never true

512 d = key[0] 

513 key = key[1:] 

514 else: 

515 return [ 

516 key, 

517 ] 

518 escaped = f"\\{d}" 

519 temp = None 

520 if escaped in key: 

521 # Complain at the attempt to escape the escape 

522 doubled = rf"\{escaped}" 

523 if doubled in key: 

524 raise ValueError( 

525 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported." 

526 ) 

527 # Replace with a character that won't be in the string 

528 temp = "\r" 

529 if temp in key or d == temp: 

530 raise ValueError( 

531 f"Can not use character {temp!r} in hierarchical key or as" 

532 " delimiter if escaping the delimiter" 

533 ) 

534 key = key.replace(escaped, temp) 

535 hierarchy = key.split(d) 

536 if temp: 

537 hierarchy = [h.replace(temp, d) for h in hierarchy] 

538 return hierarchy 

539 elif isinstance(key, collections.abc.Iterable): 539 ↛ 543line 539 didn't jump to line 543, because the condition on line 539 was never false

540 return list(key) 

541 else: 

542 # Not sure what this is so try it anyway 

543 return [ 

544 key, 

545 ] 

546 

547 def _getKeyHierarchy(self, name): 

548 """Retrieve the key hierarchy for accessing the Config. 

549 

550 Parameters 

551 ---------- 

552 name : `str` or `tuple` 

553 Delimited string or `tuple` of hierarchical keys. 

554 

555 Returns 

556 ------- 

557 hierarchy : `list` of `str` 

558 Hierarchy to use as a `list`. If the name is available directly 

559 as a key in the Config it will be used regardless of the presence 

560 of any nominal delimiter. 

561 """ 

562 if name in self._data: 

563 keys = [ 

564 name, 

565 ] 

566 else: 

567 keys = self._splitIntoKeys(name) 

568 return keys 

569 

570 def _findInHierarchy(self, keys, create=False): 

571 """Look for hierarchy of keys in Config. 

572 

573 Parameters 

574 ---------- 

575 keys : `list` or `tuple` 

576 Keys to search in hierarchy. 

577 create : `bool`, optional 

578 If `True`, if a part of the hierarchy does not exist, insert an 

579 empty `dict` into the hierarchy. 

580 

581 Returns 

582 ------- 

583 hierarchy : `list` 

584 List of the value corresponding to each key in the supplied 

585 hierarchy. Only keys that exist in the hierarchy will have 

586 a value. 

587 complete : `bool` 

588 `True` if the full hierarchy exists and the final element 

589 in ``hierarchy`` is the value of relevant value. 

590 """ 

591 d = self._data 

592 

593 # For the first key, d must be a dict so it is a waste 

594 # of time to check for a sequence. 

595 must_be_dict = True 

596 

597 hierarchy = [] 

598 complete = True 

599 for k in keys: 

600 d, isThere = _checkNextItem(k, d, create, must_be_dict) 

601 if isThere: 

602 hierarchy.append(d) 

603 else: 

604 complete = False 

605 break 

606 # Second time round it might be a sequence. 

607 must_be_dict = False 

608 

609 return hierarchy, complete 

610 

611 def __getitem__(self, name): 

612 # Override the split for the simple case where there is an exact 

613 # match. This allows `Config.items()` to work via a simple 

614 # __iter__ implementation that returns top level keys of 

615 # self._data. 

616 

617 # If the name matches a key in the top-level hierarchy, bypass 

618 # all further cleverness. 

619 found_directly = False 

620 try: 

621 data = self._data[name] 

622 found_directly = True 

623 except KeyError: 

624 pass 

625 

626 if not found_directly: 626 ↛ 627line 626 didn't jump to line 627, because the condition on line 626 was never true

627 keys = self._getKeyHierarchy(name) 

628 

629 hierarchy, complete = self._findInHierarchy(keys) 

630 if not complete: 

631 raise KeyError(f"{name} not found") 

632 data = hierarchy[-1] 

633 

634 # In most cases we have a dict, and it's more efficient 

635 # to check for a dict instance before checking the generic mapping. 

636 if isinstance(data, (dict, collections.abc.Mapping)): 

637 data = Config(data) 

638 # Ensure that child configs inherit the parent internal delimiter 

639 if self._D != Config._D: 639 ↛ 640line 639 didn't jump to line 640, because the condition on line 639 was never true

640 data._D = self._D 

641 return data 

642 

643 def __setitem__(self, name, value): 

644 keys = self._getKeyHierarchy(name) 

645 last = keys.pop() 

646 if isinstance(value, Config): 

647 value = copy.deepcopy(value._data) 

648 

649 hierarchy, complete = self._findInHierarchy(keys, create=True) 

650 if hierarchy: 

651 data = hierarchy[-1] 

652 else: 

653 data = self._data 

654 

655 try: 

656 data[last] = value 

657 except TypeError: 

658 data[int(last)] = value 

659 

660 def __contains__(self, key): 

661 keys = self._getKeyHierarchy(key) 

662 hierarchy, complete = self._findInHierarchy(keys) 

663 return complete 

664 

665 def __delitem__(self, key): 

666 keys = self._getKeyHierarchy(key) 

667 last = keys.pop() 

668 hierarchy, complete = self._findInHierarchy(keys) 

669 if complete: 669 ↛ 676line 669 didn't jump to line 676, because the condition on line 669 was never false

670 if hierarchy: 670 ↛ 671line 670 didn't jump to line 671, because the condition on line 670 was never true

671 data = hierarchy[-1] 

672 else: 

673 data = self._data 

674 del data[last] 

675 else: 

676 raise KeyError(f"{key} not found in Config") 

677 

678 def update(self, other): 

679 """Update config from other `Config` or `dict`. 

680 

681 Like `dict.update()`, but will add or modify keys in nested dicts, 

682 instead of overwriting the nested dict entirely. 

683 

684 Parameters 

685 ---------- 

686 other : `dict` or `Config` 

687 Source of configuration: 

688 

689 Examples 

690 -------- 

691 >>> c = Config({"a": {"b": 1}}) 

692 >>> c.update({"a": {"c": 2}}) 

693 >>> print(c) 

694 {'a': {'b': 1, 'c': 2}} 

695 

696 >>> foo = {"a": {"b": 1}} 

697 >>> foo.update({"a": {"c": 2}}) 

698 >>> print(foo) 

699 {'a': {'c': 2}} 

700 """ 

701 _doUpdate(self._data, other) 

702 

703 def merge(self, other): 

704 """Merge another Config into this one. 

705 

706 Like `Config.update()`, but will add keys & values from other that 

707 DO NOT EXIST in self. 

708 

709 Keys and values that already exist in self will NOT be overwritten. 

710 

711 Parameters 

712 ---------- 

713 other : `dict` or `Config` 

714 Source of configuration: 

715 """ 

716 if not isinstance(other, collections.abc.Mapping): 

717 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}") 

718 

719 # Convert the supplied mapping to a Config for consistency 

720 # This will do a deepcopy if it is already a Config 

721 otherCopy = Config(other) 

722 otherCopy.update(self) 

723 self._data = otherCopy._data 

724 

725 def nameTuples(self, topLevelOnly=False): 

726 """Get tuples representing the name hierarchies of all keys. 

727 

728 The tuples returned from this method are guaranteed to be usable 

729 to access items in the configuration object. 

730 

731 Parameters 

732 ---------- 

733 topLevelOnly : `bool`, optional 

734 If False, the default, a full hierarchy of names is returned. 

735 If True, only the top level are returned. 

736 

737 Returns 

738 ------- 

739 names : `list` of `tuple` of `str` 

740 List of all names present in the `Config` where each element 

741 in the list is a `tuple` of strings representing the hierarchy. 

742 """ 

743 if topLevelOnly: 743 ↛ 744line 743 didn't jump to line 744, because the condition on line 743 was never true

744 return list((k,) for k in self) 

745 

746 def getKeysAsTuples(d, keys, base): 

747 if isinstance(d, collections.abc.Sequence): 

748 theseKeys = range(len(d)) 

749 else: 

750 theseKeys = d.keys() 

751 for key in theseKeys: 

752 val = d[key] 

753 levelKey = base + (key,) if base is not None else (key,) 

754 keys.append(levelKey) 

755 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) and not isinstance( 

756 val, str 

757 ): 

758 getKeysAsTuples(val, keys, levelKey) 

759 

760 keys: List[Tuple[str, ...]] = [] 

761 getKeysAsTuples(self._data, keys, None) 

762 return keys 

763 

764 def names(self, topLevelOnly=False, delimiter=None): 

765 """Get a delimited name of all the keys in the hierarchy. 

766 

767 The values returned from this method are guaranteed to be usable 

768 to access items in the configuration object. 

769 

770 Parameters 

771 ---------- 

772 topLevelOnly : `bool`, optional 

773 If False, the default, a full hierarchy of names is returned. 

774 If True, only the top level are returned. 

775 delimiter : `str`, optional 

776 Delimiter to use when forming the keys. If the delimiter is 

777 present in any of the keys, it will be escaped in the returned 

778 names. If `None` given a delimiter will be automatically provided. 

779 The delimiter can not be alphanumeric. 

780 

781 Returns 

782 ------- 

783 names : `list` of `str` 

784 List of all names present in the `Config`. 

785 

786 Notes 

787 ----- 

788 This is different than the built-in method `dict.keys`, which will 

789 return only the first level keys. 

790 

791 Raises 

792 ------ 

793 ValueError: 

794 The supplied delimiter is alphanumeric. 

795 """ 

796 if topLevelOnly: 

797 return list(self.keys()) 

798 

799 # Get all the tuples of hierarchical keys 

800 nameTuples = self.nameTuples() 

801 

802 if delimiter is not None and delimiter.isalnum(): 

803 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.") 

804 

805 if delimiter is None: 

806 # Start with something, and ensure it does not need to be 

807 # escaped (it is much easier to understand if not escaped) 

808 delimiter = self._D 

809 

810 # Form big string for easy check of delimiter clash 

811 combined = "".join("".join(str(s) for s in k) for k in nameTuples) 

812 

813 # Try a delimiter and keep trying until we get something that 

814 # works. 

815 ntries = 0 

816 while delimiter in combined: 

817 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter) 

818 ntries += 1 

819 

820 if ntries > 100: 

821 raise ValueError(f"Unable to determine a delimiter for Config {self}") 

822 

823 # try another one 

824 while True: 

825 delimiter = chr(ord(delimiter) + 1) 

826 if not delimiter.isalnum(): 

827 break 

828 

829 log.debug("Using delimiter %r", delimiter) 

830 

831 # Form the keys, escaping the delimiter if necessary 

832 strings = [ 

833 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k) 

834 for k in nameTuples 

835 ] 

836 return strings 

837 

838 def asArray(self, name): 

839 """Get a value as an array. 

840 

841 May contain one or more elements. 

842 

843 Parameters 

844 ---------- 

845 name : `str` 

846 Key to use to retrieve value. 

847 

848 Returns 

849 ------- 

850 array : `collections.abc.Sequence` 

851 The value corresponding to name, but guaranteed to be returned 

852 as a list with at least one element. If the value is a 

853 `~collections.abc.Sequence` (and not a `str`) the value itself 

854 will be returned, else the value will be the first element. 

855 """ 

856 val = self.get(name) 

857 if isinstance(val, str): 

858 val = [val] 

859 elif not isinstance(val, collections.abc.Sequence): 

860 val = [val] 

861 return val 

862 

863 def __eq__(self, other): 

864 if isinstance(other, Config): 

865 other = other._data 

866 return self._data == other 

867 

868 def __ne__(self, other): 

869 if isinstance(other, Config): 

870 other = other._data 

871 return self._data != other 

872 

873 ####### 

874 # i/o # 

875 

876 def dump(self, output: Optional[IO] = None, format: str = "yaml") -> Optional[str]: 

877 """Write the config to an output stream. 

878 

879 Parameters 

880 ---------- 

881 output : `IO`, optional 

882 The stream to use for output. If `None` the serialized content 

883 will be returned. 

884 format : `str`, optional 

885 The format to use for the output. Can be "yaml" or "json". 

886 

887 Returns 

888 ------- 

889 serialized : `str` or `None` 

890 If a stream was given the stream will be used and the return 

891 value will be `None`. If the stream was `None` the 

892 serialization will be returned as a string. 

893 """ 

894 if format == "yaml": 

895 return yaml.safe_dump(self._data, output, default_flow_style=False) 

896 elif format == "json": 

897 if output is not None: 

898 json.dump(self._data, output, ensure_ascii=False) 

899 return None 

900 else: 

901 return json.dumps(self._data, ensure_ascii=False) 

902 raise ValueError(f"Unsupported format for Config serialization: {format}") 

903 

904 def dumpToUri( 

905 self, 

906 uri: ResourcePathExpression, 

907 updateFile: bool = True, 

908 defaultFileName: str = "butler.yaml", 

909 overwrite: bool = True, 

910 ) -> None: 

911 """Write the config to location pointed to by given URI. 

912 

913 Currently supports 's3' and 'file' URI schemes. 

914 

915 Parameters 

916 ---------- 

917 uri: `lsst.resources.ResourcePathExpression` 

918 URI of location where the Config will be written. 

919 updateFile : bool, optional 

920 If True and uri does not end on a filename with extension, will 

921 append `defaultFileName` to the target uri. True by default. 

922 defaultFileName : bool, optional 

923 The file name that will be appended to target uri if updateFile is 

924 True and uri does not end on a file with an extension. 

925 overwrite : bool, optional 

926 If True the configuration will be written even if it already 

927 exists at that location. 

928 """ 

929 # Make local copy of URI or create new one 

930 uri = ResourcePath(uri) 

931 

932 if updateFile and not uri.getExtension(): 

933 uri = uri.updatedFile(defaultFileName) 

934 

935 # Try to work out the format from the extension 

936 ext = uri.getExtension() 

937 format = ext[1:].lower() 

938 

939 output = self.dump(format=format) 

940 assert output is not None, "Config.dump guarantees not-None return when output arg is None" 

941 uri.write(output.encode(), overwrite=overwrite) 

942 self.configFile = uri 

943 

944 @staticmethod 

945 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True, toMerge=None): 

946 """Update specific config parameters. 

947 

948 Allows for named parameters to be set to new values in bulk, and 

949 for other values to be set by copying from a reference config. 

950 

951 Assumes that the supplied config is compatible with ``configType`` 

952 and will attach the updated values to the supplied config by 

953 looking for the related component key. It is assumed that 

954 ``config`` and ``full`` are from the same part of the 

955 configuration hierarchy. 

956 

957 Parameters 

958 ---------- 

959 configType : `ConfigSubset` 

960 Config type to use to extract relevant items from ``config``. 

961 config : `Config` 

962 A `Config` to update. Only the subset understood by 

963 the supplied `ConfigSubset` will be modified. Default values 

964 will not be inserted and the content will not be validated 

965 since mandatory keys are allowed to be missing until 

966 populated later by merging. 

967 full : `Config` 

968 A complete config with all defaults expanded that can be 

969 converted to a ``configType``. Read-only and will not be 

970 modified by this method. Values are read from here if 

971 ``toCopy`` is defined. 

972 

973 Repository-specific options that should not be obtained 

974 from defaults when Butler instances are constructed 

975 should be copied from ``full`` to ``config``. 

976 toUpdate : `dict`, optional 

977 A `dict` defining the keys to update and the new value to use. 

978 The keys and values can be any supported by `Config` 

979 assignment. 

980 toCopy : `tuple`, optional 

981 `tuple` of keys whose values should be copied from ``full`` 

982 into ``config``. 

983 overwrite : `bool`, optional 

984 If `False`, do not modify a value in ``config`` if the key 

985 already exists. Default is always to overwrite. 

986 toMerge : `tuple`, optional 

987 Keys to merge content from full to config without overwriting 

988 pre-existing values. Only works if the key refers to a hierarchy. 

989 The ``overwrite`` flag is ignored. 

990 

991 Raises 

992 ------ 

993 ValueError 

994 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined. 

995 """ 

996 if toUpdate is None and toCopy is None and toMerge is None: 

997 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.") 

998 

999 # If this is a parent configuration then we need to ensure that 

1000 # the supplied config has the relevant component key in it. 

1001 # If this is a parent configuration we add in the stub entry 

1002 # so that the ConfigSubset constructor will do the right thing. 

1003 # We check full for this since that is guaranteed to be complete. 

1004 if configType.component in full and configType.component not in config: 

1005 config[configType.component] = {} 

1006 

1007 # Extract the part of the config we wish to update 

1008 localConfig = configType(config, mergeDefaults=False, validate=False) 

1009 

1010 if toUpdate: 

1011 for key, value in toUpdate.items(): 

1012 if key in localConfig and not overwrite: 

1013 log.debug( 

1014 "Not overriding key '%s' with value '%s' in config %s", 

1015 key, 

1016 value, 

1017 localConfig.__class__.__name__, 

1018 ) 

1019 else: 

1020 localConfig[key] = value 

1021 

1022 if toCopy or toMerge: 

1023 localFullConfig = configType(full, mergeDefaults=False) 

1024 

1025 if toCopy: 

1026 for key in toCopy: 

1027 if key in localConfig and not overwrite: 

1028 log.debug( 

1029 "Not overriding key '%s' from defaults in config %s", 

1030 key, 

1031 localConfig.__class__.__name__, 

1032 ) 

1033 else: 

1034 localConfig[key] = localFullConfig[key] 

1035 if toMerge: 

1036 for key in toMerge: 

1037 if key in localConfig: 

1038 # Get the node from the config to do the merge 

1039 # but then have to reattach to the config. 

1040 subset = localConfig[key] 

1041 subset.merge(localFullConfig[key]) 

1042 localConfig[key] = subset 

1043 else: 

1044 localConfig[key] = localFullConfig[key] 

1045 

1046 # Reattach to parent if this is a child config 

1047 if configType.component in config: 

1048 config[configType.component] = localConfig 

1049 else: 

1050 config.update(localConfig) 

1051 

1052 def toDict(self): 

1053 """Convert a `Config` to a standalone hierarchical `dict`. 

1054 

1055 Returns 

1056 ------- 

1057 d : `dict` 

1058 The standalone hierarchical `dict` with any `Config` classes 

1059 in the hierarchy converted to `dict`. 

1060 

1061 Notes 

1062 ----- 

1063 This can be useful when passing a Config to some code that 

1064 expects native Python types. 

1065 """ 

1066 output = copy.deepcopy(self._data) 

1067 for k, v in output.items(): 

1068 if isinstance(v, Config): 1068 ↛ 1069line 1068 didn't jump to line 1069, because the condition on line 1068 was never true

1069 v = v.toDict() 

1070 output[k] = v 

1071 return output 

1072 

1073 

1074class ConfigSubset(Config): 

1075 """Config representing a subset of a more general configuration. 

1076 

1077 Subclasses define their own component and when given a configuration 

1078 that includes that component, the resulting configuration only includes 

1079 the subset. For example, your config might contain ``dimensions`` if it's 

1080 part of a global config and that subset will be stored. If ``dimensions`` 

1081 can not be found it is assumed that the entire contents of the 

1082 configuration should be used. 

1083 

1084 Default values are read from the environment or supplied search paths 

1085 using the default configuration file name specified in the subclass. 

1086 This allows a configuration class to be instantiated without any 

1087 additional arguments. 

1088 

1089 Additional validation can be specified to check for keys that are mandatory 

1090 in the configuration. 

1091 

1092 Parameters 

1093 ---------- 

1094 other : `Config` or `str` or `dict` 

1095 Argument specifying the configuration information as understood 

1096 by `Config` 

1097 validate : `bool`, optional 

1098 If `True` required keys will be checked to ensure configuration 

1099 consistency. 

1100 mergeDefaults : `bool`, optional 

1101 If `True` defaults will be read and the supplied config will 

1102 be combined with the defaults, with the supplied values taking 

1103 precedence. 

1104 searchPaths : `list` or `tuple`, optional 

1105 Explicit additional paths to search for defaults. They should 

1106 be supplied in priority order. These paths have higher priority 

1107 than those read from the environment in 

1108 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to 

1109 the local file system or URIs, `lsst.resources.ResourcePath`. 

1110 """ 

1111 

1112 component: ClassVar[Optional[str]] = None 

1113 """Component to use from supplied config. Can be None. If specified the 

1114 key is not required. Can be a full dot-separated path to a component. 

1115 """ 

1116 

1117 requiredKeys: ClassVar[Sequence[str]] = () 

1118 """Keys that are required to be specified in the configuration. 

1119 """ 

1120 

1121 defaultConfigFile: ClassVar[Optional[str]] = None 

1122 """Name of the file containing defaults for this config class. 

1123 """ 

1124 

1125 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None): 

1126 

1127 # Create a blank object to receive the defaults 

1128 # Once we have the defaults we then update with the external values 

1129 super().__init__() 

1130 

1131 # Create a standard Config rather than subset 

1132 externalConfig = Config(other) 

1133 

1134 # Select the part we need from it 

1135 # To simplify the use of !include we also check for the existence of 

1136 # component.component (since the included files can themselves 

1137 # include the component name) 

1138 if self.component is not None: 1138 ↛ 1147line 1138 didn't jump to line 1147, because the condition on line 1138 was never false

1139 doubled = (self.component, self.component) 

1140 # Must check for double depth first 

1141 if doubled in externalConfig: 1141 ↛ 1142line 1141 didn't jump to line 1142, because the condition on line 1141 was never true

1142 externalConfig = externalConfig[doubled] 

1143 elif self.component in externalConfig: 

1144 externalConfig._data = externalConfig._data[self.component] 

1145 

1146 # Default files read to create this configuration 

1147 self.filesRead = [] 

1148 

1149 # Assume we are not looking up child configurations 

1150 containerKey = None 

1151 

1152 # Sometimes we do not want to merge with defaults. 

1153 if mergeDefaults: 

1154 

1155 # Supplied search paths have highest priority 

1156 fullSearchPath = [] 

1157 if searchPaths: 1157 ↛ 1158line 1157 didn't jump to line 1158, because the condition on line 1157 was never true

1158 fullSearchPath.extend(searchPaths) 

1159 

1160 # Read default paths from environment 

1161 fullSearchPath.extend(self.defaultSearchPaths()) 

1162 

1163 # There are two places to find defaults for this particular config 

1164 # - The "defaultConfigFile" defined in the subclass 

1165 # - The class specified in the "cls" element in the config. 

1166 # Read cls after merging in case it changes. 

1167 if self.defaultConfigFile is not None: 1167 ↛ 1172line 1167 didn't jump to line 1172, because the condition on line 1167 was never false

1168 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile) 

1169 

1170 # Can have a class specification in the external config (priority) 

1171 # or from the defaults. 

1172 pytype = None 

1173 if "cls" in externalConfig: 1173 ↛ 1174line 1173 didn't jump to line 1174, because the condition on line 1173 was never true

1174 pytype = externalConfig["cls"] 

1175 elif "cls" in self: 1175 ↛ 1176line 1175 didn't jump to line 1176, because the condition on line 1175 was never true

1176 pytype = self["cls"] 

1177 

1178 if pytype is not None: 1178 ↛ 1179line 1178 didn't jump to line 1179, because the condition on line 1178 was never true

1179 try: 

1180 cls = doImport(pytype) 

1181 except ImportError as e: 

1182 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e 

1183 defaultsFile = cls.defaultConfigFile 

1184 if defaultsFile is not None: 

1185 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile) 

1186 

1187 # Get the container key in case we need it 

1188 try: 

1189 containerKey = cls.containerKey 

1190 except AttributeError: 

1191 pass 

1192 

1193 # Now update this object with the external values so that the external 

1194 # values always override the defaults 

1195 self.update(externalConfig) 

1196 if not self.configFile: 1196 ↛ 1202line 1196 didn't jump to line 1202, because the condition on line 1196 was never false

1197 self.configFile = externalConfig.configFile 

1198 

1199 # If this configuration has child configurations of the same 

1200 # config class, we need to expand those defaults as well. 

1201 

1202 if mergeDefaults and containerKey is not None and containerKey in self: 1202 ↛ 1203line 1202 didn't jump to line 1203, because the condition on line 1202 was never true

1203 for idx, subConfig in enumerate(self[containerKey]): 

1204 self[containerKey, idx] = type(self)( 

1205 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths 

1206 ) 

1207 

1208 if validate: 

1209 self.validate() 

1210 

1211 @classmethod 

1212 def defaultSearchPaths(cls): 

1213 """Read environment to determine search paths to use. 

1214 

1215 Global defaults, at lowest priority, are found in the ``config`` 

1216 directory of the butler source tree. Additional defaults can be 

1217 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS`` 

1218 which is a PATH-like variable where paths at the front of the list 

1219 have priority over those later. 

1220 

1221 Returns 

1222 ------- 

1223 paths : `list` 

1224 Returns a list of paths to search. The returned order is in 

1225 priority with the highest priority paths first. The butler config 

1226 configuration resources will not be included here but will 

1227 always be searched last. 

1228 

1229 Notes 

1230 ----- 

1231 The environment variable is split on the standard ``:`` path separator. 

1232 This currently makes it incompatible with usage of URIs. 

1233 """ 

1234 # We can pick up defaults from multiple search paths 

1235 # We fill defaults by using the butler config path and then 

1236 # the config path environment variable in reverse order. 

1237 defaultsPaths: List[Union[str, ResourcePath]] = [] 

1238 

1239 if CONFIG_PATH in os.environ: 1239 ↛ 1240line 1239 didn't jump to line 1240, because the condition on line 1239 was never true

1240 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep) 

1241 defaultsPaths.extend(externalPaths) 

1242 

1243 # Add the package defaults as a resource 

1244 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True)) 

1245 return defaultsPaths 

1246 

1247 def _updateWithConfigsFromPath(self, searchPaths, configFile): 

1248 """Search the supplied paths, merging the configuration values. 

1249 

1250 The values read will override values currently stored in the object. 

1251 Every file found in the path will be read, such that the earlier 

1252 path entries have higher priority. 

1253 

1254 Parameters 

1255 ---------- 

1256 searchPaths : `list` of `lsst.resources.ResourcePath`, `str` 

1257 Paths to search for the supplied configFile. This path 

1258 is the priority order, such that files read from the 

1259 first path entry will be selected over those read from 

1260 a later path. Can contain `str` referring to the local file 

1261 system or a URI string. 

1262 configFile : `lsst.resources.ResourcePath` 

1263 File to locate in path. If absolute path it will be read 

1264 directly and the search path will not be used. Can be a URI 

1265 to an explicit resource (which will ignore the search path) 

1266 which is assumed to exist. 

1267 """ 

1268 uri = ResourcePath(configFile) 

1269 if uri.isabs() and uri.exists(): 1269 ↛ 1271line 1269 didn't jump to line 1271, because the condition on line 1269 was never true

1270 # Assume this resource exists 

1271 self._updateWithOtherConfigFile(configFile) 

1272 self.filesRead.append(configFile) 

1273 else: 

1274 # Reverse order so that high priority entries 

1275 # update the object last. 

1276 for pathDir in reversed(searchPaths): 

1277 if isinstance(pathDir, (str, ResourcePath)): 1277 ↛ 1284line 1277 didn't jump to line 1284, because the condition on line 1277 was never false

1278 pathDir = ResourcePath(pathDir, forceDirectory=True) 

1279 file = pathDir.join(configFile) 

1280 if file.exists(): 1280 ↛ 1276line 1280 didn't jump to line 1276, because the condition on line 1280 was never false

1281 self.filesRead.append(file) 

1282 self._updateWithOtherConfigFile(file) 

1283 else: 

1284 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}") 

1285 

1286 def _updateWithOtherConfigFile(self, file): 

1287 """Read in some defaults and update. 

1288 

1289 Update the configuration by reading the supplied file as a config 

1290 of this class, and merging such that these values override the 

1291 current values. Contents of the external config are not validated. 

1292 

1293 Parameters 

1294 ---------- 

1295 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict` 

1296 Entity that can be converted to a `ConfigSubset`. 

1297 """ 

1298 # Use this class to read the defaults so that subsetting can happen 

1299 # correctly. 

1300 externalConfig = type(self)(file, validate=False, mergeDefaults=False) 

1301 self.update(externalConfig) 

1302 

1303 def validate(self): 

1304 """Check that mandatory keys are present in this configuration. 

1305 

1306 Ignored if ``requiredKeys`` is empty. 

1307 """ 

1308 # Validation 

1309 missing = [k for k in self.requiredKeys if k not in self._data] 

1310 if missing: 1310 ↛ 1311line 1310 didn't jump to line 1311, because the condition on line 1310 was never true

1311 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")