Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Configuration control.""" 

25 

26__all__ = ("Config", "ConfigSubset") 

27 

28from dataclasses import dataclass 

29import collections 

30import copy 

31import logging 

32import pprint 

33import os 

34import pkg_resources 

35import posixpath 

36import yaml 

37import sys 

38from yaml.representer import Representer 

39import io 

40from typing import Sequence, Optional, ClassVar 

41 

42try: 

43 import boto3 

44except ImportError: 

45 boto3 = None 

46 

47from lsst.utils import doImport 

48from .location import ButlerURI 

49from .s3utils import getS3Client 

50 

51yaml.add_representer(collections.defaultdict, Representer.represent_dict) 

52 

53 

54# Config module logger 

55log = logging.getLogger(__name__) 

56 

57# PATH-like environment variable to use for defaults. 

58CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH" 

59 

60try: 

61 yamlLoader = yaml.CSafeLoader 

62except AttributeError: 

63 # Not all installations have the C library 

64 yamlLoader = yaml.SafeLoader 

65 

66 

67class Loader(yamlLoader): 

68 """YAML Loader that supports file include directives 

69 

70 Uses ``!include`` directive in a YAML file to point to another 

71 YAML file to be included. The path in the include directive is relative 

72 to the file containing that directive. 

73 

74 storageClasses: !include storageClasses.yaml 

75 

76 Examples 

77 -------- 

78 >>> with open("document.yaml", "r") as f: 

79 data = yaml.load(f, Loader=Loader) 

80 

81 Notes 

82 ----- 

83 See https://davidchall.github.io/yaml-includes.html 

84 """ 

85 

86 def __init__(self, stream): 

87 super().__init__(stream) 

88 # if this is a string and not a stream we may well lack a name 

89 try: 

90 self._root = ButlerURI(stream.name) 

91 except AttributeError: 

92 # No choice but to assume a local filesystem 

93 self._root = ButlerURI("no-file.yaml") 

94 Loader.add_constructor("!include", Loader.include) 

95 

96 def include(self, node): 

97 if isinstance(node, yaml.ScalarNode): 

98 return self.extractFile(self.construct_scalar(node)) 

99 

100 elif isinstance(node, yaml.SequenceNode): 

101 result = [] 

102 for filename in self.construct_sequence(node): 

103 result.append(self.extractFile(filename)) 

104 return result 

105 

106 elif isinstance(node, yaml.MappingNode): 

107 result = {} 

108 for k, v in self.construct_mapping(node).items(): 

109 result[k] = self.extractFile(v) 

110 return result 

111 

112 else: 

113 print("Error:: unrecognised node type in !include statement", file=sys.stderr) 

114 raise yaml.constructor.ConstructorError 

115 

116 def extractFile(self, filename): 

117 # It is possible for the !include to point to an explicit URI 

118 # instead of a relative URI, therefore we first see if it is 

119 # scheme-less or not. If it has a scheme we use it directly 

120 # if it is scheme-less we use it relative to the file root. 

121 requesteduri = ButlerURI(filename, forceAbsolute=False) 

122 

123 if requesteduri.scheme: 

124 fileuri = requesteduri 

125 else: 

126 fileuri = copy.copy(self._root) 

127 fileuri.updateFile(filename) 

128 

129 log.debug("Opening YAML file via !include: %s", fileuri) 

130 

131 if not fileuri.scheme or fileuri.scheme == "file": 

132 with open(fileuri.ospath, "r") as f: 

133 return yaml.load(f, Loader) 

134 elif fileuri.scheme == "s3": 

135 if boto3 is None: 

136 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?") 

137 s3 = getS3Client() 

138 try: 

139 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot) 

140 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err: 

141 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err 

142 

143 # boto3 response is a `StreamingBody`, but not a valid Python 

144 # IOStream. Loader will raise an error that the stream has no name. 

145 # The name is used to resolve the "!include" filename location to 

146 # download. A hackish solution is to name it explicitly. 

147 response["Body"].name = fileuri.geturl() 

148 return yaml.load(response["Body"], Loader) 

149 

150 

151@dataclass 

152class Resource: 

153 """A package and a resource within that package.""" 

154 

155 package: str 

156 """The package from which this resource is requested.""" 

157 

158 name: str 

159 """Full name of the resource.""" 

160 

161 def dirname(self) -> ResourceDir: 

162 """Returns the enclosing resource parent directory for this resource. 

163 

164 Returns 

165 ------- 

166 dir : `ResourceDir` 

167 The "directory" corresponding to this resource. 

168 """ 

169 # Resources always use POSIX-style path separators 

170 # so do not use os.path 

171 dir = posixpath.split(self.name)[0] 

172 return ResourceDir(self.package, dir) 

173 

174 def exists(self) -> bool: 

175 """Check that the resource exists. 

176 

177 Returns 

178 ------- 

179 exists : `bool` 

180 `True` if the resource exists. 

181 """ 

182 return pkg_resources.resource_exists(self.package, self.name) 

183 

184 

185@dataclass 

186class ResourceDir: 

187 """A "directory" within a package resource.""" 

188 

189 package: str 

190 """The package from which this resource is requested.""" 

191 

192 dir: str 

193 """A directory path to a resource in this package. Not a full path.""" 

194 

195 def toResource(self, file): 

196 """Convert a resource directory to a concrete resource. 

197 

198 Parameters 

199 ---------- 

200 file : `str` 

201 A file within this resource directory to return a concrete 

202 `Resource`. 

203 

204 Returns 

205 ------- 

206 resource : `Resource` 

207 A full definition of a resource. 

208 """ 

209 # Resources always use posix paths so do not use os.path 

210 return Resource(self.package, posixpath.join(self.dir, file)) 

211 

212 

213class Config(collections.abc.MutableMapping): 

214 r"""Implements a datatype that is used by `Butler` for configuration 

215 parameters. 

216 

217 It is essentially a `dict` with key/value pairs, including nested dicts 

218 (as values). In fact, it can be initialized with a `dict`. 

219 This is explained next: 

220 

221 Config extends the `dict` api so that hierarchical values may be accessed 

222 with delimited notation or as a tuple. If a string is given the delimiter 

223 is picked up from the first character in that string. For example, 

224 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``, 

225 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome. 

226 If the first character is alphanumeric, no delimiter will be used. 

227 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``. 

228 Unicode characters can be used as the delimiter for distinctiveness if 

229 required. 

230 

231 If a key in the hierarchy starts with a non-alphanumeric character care 

232 should be used to ensure that either the tuple interface is used or 

233 a distinct delimiter is always given in string form. 

234 

235 Finally, the delimiter can be escaped if it is part of a key and also 

236 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in 

237 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is 

238 always better to use a different delimiter in these cases. 

239 

240 Note that adding a multi-level key implicitly creates any nesting levels 

241 that do not exist, but removing multi-level keys does not automatically 

242 remove empty nesting levels. As a result: 

243 

244 >>> c = Config() 

245 >>> c[".a.b"] = 1 

246 >>> del c[".a.b"] 

247 >>> c["a"] 

248 Config({'a': {}}) 

249 

250 Storage formats supported: 

251 

252 - yaml: read and write is supported. 

253 

254 

255 Parameters 

256 ---------- 

257 other : `str` or `Config` or `dict` 

258 Other source of configuration, can be: 

259 

260 - (`str`) Treated as a path to a config file on disk. Must end with 

261 ".yaml". 

262 - (`Config`) Copies the other Config's values into this one. 

263 - (`dict`) Copies the values from the dict into this Config. 

264 

265 If `None` is provided an empty `Config` will be created. 

266 """ 

267 

268 _D: ClassVar[str] = "→" 

269 """Default internal delimiter to use for components in the hierarchy when 

270 constructing keys for external use (see `Config.names()`).""" 

271 

272 includeKey: ClassVar[str] = "includeConfigs" 

273 """Key used to indicate that another config should be included at this 

274 part of the hierarchy.""" 

275 

276 resourcesPackage: str = "lsst.daf.butler" 

277 """Package to search for default configuration data. The resources 

278 themselves will be within a ``configs`` resource hierarchy.""" 

279 

280 def __init__(self, other=None): 

281 self._data = {} 

282 self.configFile = None 

283 

284 if other is None: 

285 return 

286 

287 if isinstance(other, Config): 

288 self._data = copy.deepcopy(other._data) 

289 self.configFile = other.configFile 

290 elif isinstance(other, collections.abc.Mapping): 

291 self.update(other) 

292 elif isinstance(other, str): 

293 # if other is a string, assume it is a file path. 

294 self.__initFromFile(other) 

295 self._processExplicitIncludes() 

296 elif isinstance(other, Resource): 

297 # Assume this is a package resources request 

298 self.__initFromResource(other) 

299 else: 

300 # if the config specified by other could not be recognized raise 

301 # a runtime error. 

302 raise RuntimeError(f"A Config could not be loaded from other: {other}") 

303 

304 def ppprint(self): 

305 """helper function for debugging, prints a config out in a readable 

306 way in the debugger. 

307 

308 use: pdb> print(myConfigObject.ppprint()) 

309 

310 Returns 

311 ------- 

312 s : `str` 

313 A prettyprint formatted string representing the config 

314 """ 

315 return pprint.pformat(self._data, indent=2, width=1) 

316 

317 def __repr__(self): 

318 return f"{type(self).__name__}({self._data!r})" 

319 

320 def __str__(self): 

321 return self.ppprint() 

322 

323 def __len__(self): 

324 return len(self._data) 

325 

326 def __iter__(self): 

327 return iter(self._data) 

328 

329 def copy(self): 

330 return type(self)(self) 

331 

332 @classmethod 

333 def fromYaml(cls, string: str) -> Config: 

334 """Create a new Config instance from a YAML string. 

335 

336 Parameters 

337 ---------- 

338 string : `str` 

339 String containing content in YAML format 

340 

341 Returns 

342 ------- 

343 c : `Config` 

344 Newly-constructed Config. 

345 """ 

346 return cls().__initFromYaml(string) 

347 

348 def __initFromFile(self, path: str) -> None: 

349 """Load a file from a path or an URI. 

350 

351 Parameters 

352 ---------- 

353 path : `str` 

354 Path or an URI to a persisted config file. 

355 """ 

356 uri = ButlerURI(path) 

357 if uri.path.endswith("yaml"): 

358 if uri.scheme == "s3": 

359 self.__initFromS3YamlFile(uri.geturl()) 

360 else: 

361 self.__initFromYamlFile(uri.ospath) 

362 else: 

363 raise RuntimeError(f"Unhandled config file type: {uri}") 

364 self.configFile = uri 

365 

366 def __initFromResource(self, resource: Resource) -> None: 

367 """Load a config from a package resource. 

368 

369 Parameters 

370 ---------- 

371 resource : `Resource` 

372 The resource package and path. 

373 """ 

374 if not resource.exists(): 

375 raise RuntimeError(f"Package resource {resource} does not exist") 

376 if resource.name.endswith(".yaml"): 

377 self.__initFromYamlResource(resource) 

378 else: 

379 raise RuntimeError(f"Unhandled config resource type: {resource}") 

380 self.configFile = resource 

381 

382 def __initFromYamlResource(self, resource: Resource) -> None: 

383 """Load a config from a YAML package resource. 

384 

385 Parameters 

386 ---------- 

387 resource : `Resource` 

388 The resource package and path. 

389 """ 

390 log.debug("Opening YAML config resource: %s.%s", resource.package, resource.name) 

391 with pkg_resources.resource_stream(resource.package, resource.name) as fh: 

392 self.__initFromYaml(fh) 

393 

394 def __initFromS3YamlFile(self, url): 

395 """Load a file at a given S3 Bucket uri and attempts to load it from 

396 yaml. 

397 

398 Parameters 

399 ---------- 

400 path : `str` 

401 To a persisted config file. 

402 """ 

403 if boto3 is None: 

404 raise ModuleNotFoundError("boto3 not found." 

405 "Are you sure it is installed?") 

406 

407 uri = ButlerURI(url) 

408 s3 = getS3Client() 

409 try: 

410 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot) 

411 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err: 

412 raise FileNotFoundError(f"No such file or directory: {uri}") from err 

413 

414 # boto3 response is a `StreamingBody`, but not a valid Python IOStream. 

415 # Loader will raise an error that the stream has no name. A hackish 

416 # solution is to name it explicitly. 

417 response["Body"].name = url 

418 self.__initFromYaml(response["Body"]) 

419 response["Body"].close() 

420 

421 def __initFromYamlFile(self, path): 

422 """Opens a file at a given path and attempts to load it in from yaml. 

423 

424 Parameters 

425 ---------- 

426 path : `str` 

427 To a persisted config file in YAML format. 

428 """ 

429 log.debug("Opening YAML config file: %s", path) 

430 with open(path, "r") as f: 

431 self.__initFromYaml(f) 

432 

433 def __initFromYaml(self, stream): 

434 """Loads a YAML config from any readable stream that contains one. 

435 

436 Parameters 

437 ---------- 

438 stream: `IO` or `str` 

439 Stream to pass to the YAML loader. Accepts anything that 

440 `yaml.load` accepts. This can include a string as well as an 

441 IO stream. 

442 

443 Raises 

444 ------ 

445 yaml.YAMLError 

446 If there is an error loading the file. 

447 """ 

448 content = yaml.load(stream, Loader=Loader) 

449 if content is None: 

450 content = {} 

451 self._data = content 

452 return self 

453 

454 def _processExplicitIncludes(self): 

455 """Scan through the configuration searching for the special 

456 includeConfigs directive and process the includes.""" 

457 

458 # Search paths for config files 

459 searchPaths = [os.path.curdir] 

460 if self.configFile is not None: 

461 if isinstance(self.configFile, str): 

462 configDir = os.path.abspath(os.path.dirname(self.configFile)) 

463 elif isinstance(self.configFile, (ButlerURI, Resource)): 

464 configDir = self.configFile.dirname() 

465 else: 

466 raise RuntimeError(f"Unexpected type for config file: {self.configFile}") 

467 searchPaths.append(configDir) 

468 

469 # Ensure we know what delimiter to use 

470 names = self.nameTuples() 

471 for path in names: 

472 if path[-1] == self.includeKey: 

473 

474 log.debug("Processing file include directive at %s", self._D + self._D.join(path)) 

475 basePath = path[:-1] 

476 

477 # Extract the includes and then delete them from the config 

478 includes = self[path] 

479 del self[path] 

480 

481 # Be consistent and convert to a list 

482 if not isinstance(includes, list): 

483 includes = [includes] 

484 

485 # Read each file assuming it is a reference to a file 

486 # The file can be relative to config file or cwd 

487 # ConfigSubset search paths are not used 

488 # At some point these might be URIs which we will have to 

489 # assume resolve explicitly 

490 subConfigs = [] 

491 for fileName in includes: 

492 # Expand any shell variables 

493 fileName = os.path.expandvars(fileName) 

494 found = None 

495 if os.path.isabs(fileName): 

496 found = fileName 

497 else: 

498 for dir in searchPaths: 

499 # Convert a string directly to a ButlerURI 

500 # to unify the response below 

501 if isinstance(dir, str): 

502 dir = ButlerURI(dir, forceDirectory=True) 

503 

504 if isinstance(dir, ResourceDir): 

505 resource = dir.toResource(fileName) 

506 if resource.exists(): 

507 found = resource 

508 break 

509 elif isinstance(dir, ButlerURI): 

510 if not dir.scheme: 

511 filePath = os.path.join(dir.path, fileName) 

512 if os.path.exists(filePath): 

513 found = os.path.normpath(os.path.abspath(filePath)) 

514 break 

515 elif dir.scheme == "file": 

516 # import private helper function 

517 from .location import posix2os 

518 # File URIs always use posix path separator 

519 filePath = posix2os(posixpath.join(dir.path, fileName)) 

520 if os.path.exists(filePath): 

521 found = os.path.normpath(os.path.abspath(filePath)) 

522 break 

523 else: 

524 # For remote resource either we assume 

525 # the resource always exists even though 

526 # it likely does not and we pass it 

527 # directly to the Config constructor here. 

528 # Else we uses s3utils.s3CheckFileExists 

529 # Either way a network call is needed. 

530 # For now no-one is using this 

531 # functionality and there are no S3 tests 

532 # for it so defer implementation. 

533 raise RuntimeError("Can not currently follow includeConfigs to " 

534 f"{dir}") 

535 else: 

536 log.warning("Do not understand search path entry '%s' of type %s", 

537 dir, type(dir).__name__) 

538 if not found: 

539 raise RuntimeError(f"Unable to find referenced include file: {fileName}") 

540 

541 # Read the referenced Config as a Config 

542 subConfigs.append(type(self)(found)) 

543 

544 # Now we need to merge these sub configs with the current 

545 # information that was present in this node in the config 

546 # tree with precedence given to the explicit values 

547 newConfig = subConfigs.pop(0) 

548 for sc in subConfigs: 

549 newConfig.update(sc) 

550 

551 # Explicit values take precedence 

552 if not basePath: 

553 # This is an include at the root config 

554 newConfig.update(self) 

555 # Replace the current config 

556 self._data = newConfig._data 

557 else: 

558 newConfig.update(self[basePath]) 

559 # And reattach to the base config 

560 self[basePath] = newConfig 

561 

562 @staticmethod 

563 def _splitIntoKeys(key): 

564 r"""Split the argument for get/set/in into a hierarchical list. 

565 

566 Parameters 

567 ---------- 

568 key : `str` or iterable 

569 Argument given to get/set/in. If an iterable is provided it will 

570 be converted to a list. If the first character of the string 

571 is not an alphanumeric character then it will be used as the 

572 delimiter for the purposes of splitting the remainder of the 

573 string. If the delimiter is also in one of the keys then it 

574 can be escaped using ``\``. There is no default delimiter. 

575 

576 Returns 

577 ------- 

578 keys : `list` 

579 Hierarchical keys as a `list`. 

580 """ 

581 if isinstance(key, str): 

582 if not key[0].isalnum(): 

583 d = key[0] 

584 key = key[1:] 

585 else: 

586 return [key, ] 

587 escaped = f"\\{d}" 

588 temp = None 

589 if escaped in key: 

590 # Complain at the attempt to escape the escape 

591 doubled = fr"\{escaped}" 

592 if doubled in key: 

593 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})" 

594 " is not yet supported.") 

595 # Replace with a character that won't be in the string 

596 temp = "\r" 

597 if temp in key or d == temp: 

598 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as" 

599 " delimiter if escaping the delimiter") 

600 key = key.replace(escaped, temp) 

601 hierarchy = key.split(d) 

602 if temp: 

603 hierarchy = [h.replace(temp, d) for h in hierarchy] 

604 return hierarchy 

605 elif isinstance(key, collections.abc.Iterable): 

606 return list(key) 

607 else: 

608 # Not sure what this is so try it anyway 

609 return [key, ] 

610 

611 def _getKeyHierarchy(self, name): 

612 """Retrieve the key hierarchy for accessing the Config 

613 

614 Parameters 

615 ---------- 

616 name : `str` or `tuple` 

617 Delimited string or `tuple` of hierarchical keys. 

618 

619 Returns 

620 ------- 

621 hierarchy : `list` of `str` 

622 Hierarchy to use as a `list`. If the name is available directly 

623 as a key in the Config it will be used regardless of the presence 

624 of any nominal delimiter. 

625 """ 

626 if name in self._data: 

627 keys = [name, ] 

628 else: 

629 keys = self._splitIntoKeys(name) 

630 return keys 

631 

632 def _findInHierarchy(self, keys, create=False): 

633 """Look for hierarchy of keys in Config 

634 

635 Parameters 

636 ---------- 

637 keys : `list` or `tuple` 

638 Keys to search in hierarchy. 

639 create : `bool`, optional 

640 If `True`, if a part of the hierarchy does not exist, insert an 

641 empty `dict` into the hierarchy. 

642 

643 Returns 

644 ------- 

645 hierarchy : `list` 

646 List of the value corresponding to each key in the supplied 

647 hierarchy. Only keys that exist in the hierarchy will have 

648 a value. 

649 complete : `bool` 

650 `True` if the full hierarchy exists and the final element 

651 in ``hierarchy`` is the value of relevant value. 

652 """ 

653 d = self._data 

654 

655 def checkNextItem(k, d, create): 

656 """See if k is in d and if it is return the new child""" 

657 nextVal = None 

658 isThere = False 

659 if d is None: 

660 # We have gone past the end of the hierarchy 

661 pass 

662 elif isinstance(d, collections.abc.Sequence): 

663 # Check sequence first because for lists 

664 # __contains__ checks whether value is found in list 

665 # not whether the index exists in list. When we traverse 

666 # the hierarchy we are interested in the index. 

667 try: 

668 nextVal = d[int(k)] 

669 isThere = True 

670 except IndexError: 

671 pass 

672 except ValueError: 

673 isThere = k in d 

674 elif k in d: 

675 nextVal = d[k] 

676 isThere = True 

677 elif create: 

678 d[k] = {} 

679 nextVal = d[k] 

680 isThere = True 

681 return nextVal, isThere 

682 

683 hierarchy = [] 

684 complete = True 

685 for k in keys: 

686 d, isThere = checkNextItem(k, d, create) 

687 if isThere: 

688 hierarchy.append(d) 

689 else: 

690 complete = False 

691 break 

692 

693 return hierarchy, complete 

694 

695 def __getitem__(self, name): 

696 # Override the split for the simple case where there is an exact 

697 # match. This allows `Config.items()` to work via a simple 

698 # __iter__ implementation that returns top level keys of 

699 # self._data. 

700 keys = self._getKeyHierarchy(name) 

701 

702 hierarchy, complete = self._findInHierarchy(keys) 

703 if not complete: 

704 raise KeyError(f"{name} not found") 

705 data = hierarchy[-1] 

706 

707 if isinstance(data, collections.abc.Mapping): 

708 data = Config(data) 

709 # Ensure that child configs inherit the parent internal delimiter 

710 if self._D != Config._D: 

711 data._D = self._D 

712 return data 

713 

714 def __setitem__(self, name, value): 

715 keys = self._getKeyHierarchy(name) 

716 last = keys.pop() 

717 if isinstance(value, Config): 

718 value = copy.deepcopy(value._data) 

719 

720 hierarchy, complete = self._findInHierarchy(keys, create=True) 

721 if hierarchy: 

722 data = hierarchy[-1] 

723 else: 

724 data = self._data 

725 

726 try: 

727 data[last] = value 

728 except TypeError: 

729 data[int(last)] = value 

730 

731 def __contains__(self, key): 

732 keys = self._getKeyHierarchy(key) 

733 hierarchy, complete = self._findInHierarchy(keys) 

734 return complete 

735 

736 def __delitem__(self, key): 

737 keys = self._getKeyHierarchy(key) 

738 last = keys.pop() 

739 hierarchy, complete = self._findInHierarchy(keys) 

740 if complete: 

741 if hierarchy: 

742 data = hierarchy[-1] 

743 else: 

744 data = self._data 

745 del data[last] 

746 else: 

747 raise KeyError(f"{key} not found in Config") 

748 

749 def update(self, other): 

750 """Like dict.update, but will add or modify keys in nested dicts, 

751 instead of overwriting the nested dict entirely. 

752 

753 For example, for the given code: 

754 foo = {"a": {"b": 1}} 

755 foo.update({"a": {"c": 2}}) 

756 

757 Parameters 

758 ---------- 

759 other : `dict` or `Config` 

760 Source of configuration: 

761 

762 - If foo is a dict, then after the update foo == {"a": {"c": 2}} 

763 - But if foo is a Config, then after the update 

764 foo == {"a": {"b": 1, "c": 2}} 

765 """ 

766 def doUpdate(d, u): 

767 if not isinstance(u, collections.abc.Mapping) or \ 

768 not isinstance(d, collections.abc.Mapping): 

769 raise RuntimeError("Only call update with Mapping, not {}".format(type(d))) 

770 for k, v in u.items(): 

771 if isinstance(v, collections.abc.Mapping): 

772 d[k] = doUpdate(d.get(k, {}), v) 

773 else: 

774 d[k] = v 

775 return d 

776 doUpdate(self._data, other) 

777 

778 def merge(self, other): 

779 """Like Config.update, but will add keys & values from other that 

780 DO NOT EXIST in self. 

781 

782 Keys and values that already exist in self will NOT be overwritten. 

783 

784 Parameters 

785 ---------- 

786 other : `dict` or `Config` 

787 Source of configuration: 

788 """ 

789 otherCopy = copy.deepcopy(other) 

790 otherCopy.update(self) 

791 self._data = otherCopy._data 

792 

793 def nameTuples(self, topLevelOnly=False): 

794 """Get tuples representing the name hierarchies of all keys. 

795 

796 The tuples returned from this method are guaranteed to be usable 

797 to access items in the configuration object. 

798 

799 Parameters 

800 ---------- 

801 topLevelOnly : `bool`, optional 

802 If False, the default, a full hierarchy of names is returned. 

803 If True, only the top level are returned. 

804 

805 Returns 

806 ------- 

807 names : `list` of `tuple` of `str` 

808 List of all names present in the `Config` where each element 

809 in the list is a `tuple` of strings representing the hierarchy. 

810 """ 

811 if topLevelOnly: 

812 return list((k,) for k in self) 

813 

814 def getKeysAsTuples(d, keys, base): 

815 if isinstance(d, collections.abc.Sequence): 

816 theseKeys = range(len(d)) 

817 else: 

818 theseKeys = d.keys() 

819 for key in theseKeys: 

820 val = d[key] 

821 levelKey = base + (key,) if base is not None else (key,) 

822 keys.append(levelKey) 

823 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \ 

824 and not isinstance(val, str): 

825 getKeysAsTuples(val, keys, levelKey) 

826 keys = [] 

827 getKeysAsTuples(self._data, keys, None) 

828 return keys 

829 

830 def names(self, topLevelOnly=False, delimiter=None): 

831 """Get a delimited name of all the keys in the hierarchy. 

832 

833 The values returned from this method are guaranteed to be usable 

834 to access items in the configuration object. 

835 

836 Parameters 

837 ---------- 

838 topLevelOnly : `bool`, optional 

839 If False, the default, a full hierarchy of names is returned. 

840 If True, only the top level are returned. 

841 delimiter : `str`, optional 

842 Delimiter to use when forming the keys. If the delimiter is 

843 present in any of the keys, it will be escaped in the returned 

844 names. If `None` given a delimiter will be automatically provided. 

845 The delimiter can not be alphanumeric. 

846 

847 Returns 

848 ------- 

849 names : `list` of `str` 

850 List of all names present in the `Config`. 

851 

852 Notes 

853 ----- 

854 This is different than the built-in method `dict.keys`, which will 

855 return only the first level keys. 

856 

857 Raises 

858 ------ 

859 ValueError: 

860 The supplied delimiter is alphanumeric. 

861 """ 

862 if topLevelOnly: 

863 return list(self.keys()) 

864 

865 # Get all the tuples of hierarchical keys 

866 nameTuples = self.nameTuples() 

867 

868 if delimiter is not None and delimiter.isalnum(): 

869 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.") 

870 

871 if delimiter is None: 

872 # Start with something, and ensure it does not need to be 

873 # escaped (it is much easier to understand if not escaped) 

874 delimiter = self._D 

875 

876 # Form big string for easy check of delimiter clash 

877 combined = "".join("".join(str(s) for s in k) for k in nameTuples) 

878 

879 # Try a delimiter and keep trying until we get something that 

880 # works. 

881 ntries = 0 

882 while delimiter in combined: 

883 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.") 

884 ntries += 1 

885 

886 if ntries > 100: 

887 raise ValueError(f"Unable to determine a delimiter for Config {self}") 

888 

889 # try another one 

890 while True: 

891 delimiter = chr(ord(delimiter)+1) 

892 if not delimiter.isalnum(): 

893 break 

894 

895 log.debug(f"Using delimiter {delimiter!r}") 

896 

897 # Form the keys, escaping the delimiter if necessary 

898 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k) 

899 for k in nameTuples] 

900 return strings 

901 

902 def asArray(self, name): 

903 """Get a value as an array. 

904 

905 May contain one or more elements. 

906 

907 Parameters 

908 ---------- 

909 name : `str` 

910 Key to use to retrieve value. 

911 

912 Returns 

913 ------- 

914 array : `collections.abc.Sequence` 

915 The value corresponding to name, but guaranteed to be returned 

916 as a list with at least one element. If the value is a 

917 `~collections.abc.Sequence` (and not a `str`) the value itself 

918 will be returned, else the value will be the first element. 

919 """ 

920 val = self.get(name) 

921 if isinstance(val, str): 

922 val = [val] 

923 elif not isinstance(val, collections.abc.Sequence): 

924 val = [val] 

925 return val 

926 

927 def __eq__(self, other): 

928 if isinstance(other, Config): 

929 other = other._data 

930 return self._data == other 

931 

932 def __ne__(self, other): 

933 if isinstance(other, Config): 

934 other = other._data 

935 return self._data != other 

936 

937 ####### 

938 # i/o # 

939 

940 def dump(self, output): 

941 """Writes the config to a yaml stream. 

942 

943 Parameters 

944 ---------- 

945 output 

946 The YAML stream to use for output. 

947 """ 

948 yaml.safe_dump(self._data, output, default_flow_style=False) 

949 

950 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml", 

951 overwrite=True): 

952 """Writes the config to location pointed to by given URI. 

953 

954 Currently supports 's3' and 'file' URI schemes. 

955 

956 Parameters 

957 ---------- 

958 uri: `str` or `ButlerURI` 

959 URI of location where the Config will be written. 

960 updateFile : bool, optional 

961 If True and uri does not end on a filename with extension, will 

962 append `defaultFileName` to the target uri. True by default. 

963 defaultFileName : bool, optional 

964 The file name that will be appended to target uri if updateFile is 

965 True and uri does not end on a file with an extension. 

966 overwrite : bool, optional 

967 If True the configuration will be written even if it already 

968 exists at that location. 

969 """ 

970 if isinstance(uri, str): 

971 uri = ButlerURI(uri) 

972 

973 if not uri.scheme or uri.scheme == "file": 

974 if os.path.isdir(uri.path) and updateFile: 

975 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName)) 

976 self.dumpToFile(uri.ospath, overwrite=overwrite) 

977 elif uri.scheme == "s3": 

978 if not uri.dirLike and "." not in uri.basename(): 

979 uri = ButlerURI(uri.geturl(), forceDirectory=True) 

980 uri.updateFile(defaultFileName) 

981 self.dumpToS3File(uri, overwrite=overwrite) 

982 else: 

983 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}") 

984 

985 def dumpToFile(self, path, *, overwrite=True): 

986 """Writes the config to a file. 

987 

988 Parameters 

989 ---------- 

990 path : `str` 

991 Path to the file to use for output. 

992 overwrite : `bool`, optional 

993 If True any existing file will be over written. 

994 

995 Notes 

996 ----- 

997 The name of the config file is stored in the Config object. 

998 

999 Raises 

1000 ------ 

1001 FileExistsError 

1002 Raised if the file already exists but overwrite is False. 

1003 """ 

1004 if overwrite: 

1005 mode = "w" 

1006 else: 

1007 mode = "x" 

1008 with open(path, mode) as f: 

1009 self.dump(f) 

1010 self.configFile = ButlerURI(path) 

1011 

1012 def dumpToS3File(self, uri, *, overwrite=True): 

1013 """Writes the config to a file in S3 Bucket. 

1014 

1015 Parameters 

1016 ---------- 

1017 uri : `ButlerURI` 

1018 S3 URI where the configuration should be stored. 

1019 overwrite : `bool`, optional 

1020 If False, a check will be made to see if the key already 

1021 exists. 

1022 

1023 Raises 

1024 ------ 

1025 FileExistsError 

1026 Raised if the configuration already exists at this location 

1027 and overwrite is set to `False`. 

1028 

1029 Notes 

1030 ----- 

1031 The name of the config output location is stored in the Config object. 

1032 """ 

1033 if boto3 is None: 

1034 raise ModuleNotFoundError("Could not find boto3. " 

1035 "Are you sure it is installed?") 

1036 

1037 if uri.scheme != "s3": 

1038 raise ValueError(f"Must provide S3 URI not {uri}") 

1039 

1040 s3 = getS3Client() 

1041 

1042 if not overwrite: 

1043 from .s3utils import s3CheckFileExists 

1044 if s3CheckFileExists(uri, client=s3)[0]: 

1045 raise FileExistsError(f"Config already exists at {uri}") 

1046 

1047 bucket = uri.netloc 

1048 key = uri.relativeToPathRoot 

1049 

1050 with io.StringIO() as stream: 

1051 self.dump(stream) 

1052 stream.seek(0) 

1053 s3.put_object(Bucket=bucket, Key=key, Body=stream.read()) 

1054 

1055 self.configFile = uri 

1056 

1057 @staticmethod 

1058 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True): 

1059 """Generic helper function for updating specific config parameters. 

1060 

1061 Allows for named parameters to be set to new values in bulk, and 

1062 for other values to be set by copying from a reference config. 

1063 

1064 Assumes that the supplied config is compatible with ``configType`` 

1065 and will attach the updated values to the supplied config by 

1066 looking for the related component key. It is assumed that 

1067 ``config`` and ``full`` are from the same part of the 

1068 configuration hierarchy. 

1069 

1070 Parameters 

1071 ---------- 

1072 configType : `ConfigSubset` 

1073 Config type to use to extract relevant items from ``config``. 

1074 config : `Config` 

1075 A `Config` to update. Only the subset understood by 

1076 the supplied `ConfigSubset` will be modified. Default values 

1077 will not be inserted and the content will not be validated 

1078 since mandatory keys are allowed to be missing until 

1079 populated later by merging. 

1080 full : `Config` 

1081 A complete config with all defaults expanded that can be 

1082 converted to a ``configType``. Read-only and will not be 

1083 modified by this method. Values are read from here if 

1084 ``toCopy`` is defined. 

1085 

1086 Repository-specific options that should not be obtained 

1087 from defaults when Butler instances are constructed 

1088 should be copied from ``full`` to ``config``. 

1089 toUpdate : `dict`, optional 

1090 A `dict` defining the keys to update and the new value to use. 

1091 The keys and values can be any supported by `Config` 

1092 assignment. 

1093 toCopy : `tuple`, optional 

1094 `tuple` of keys whose values should be copied from ``full`` 

1095 into ``config``. 

1096 overwrite : `bool`, optional 

1097 If `False`, do not modify a value in ``config`` if the key 

1098 already exists. Default is always to overwrite. 

1099 

1100 Raises 

1101 ------ 

1102 ValueError 

1103 Neither ``toUpdate`` not ``toCopy`` were defined. 

1104 """ 

1105 if toUpdate is None and toCopy is None: 

1106 raise ValueError("One of toUpdate or toCopy parameters must be set.") 

1107 

1108 # If this is a parent configuration then we need to ensure that 

1109 # the supplied config has the relevant component key in it. 

1110 # If this is a parent configuration we add in the stub entry 

1111 # so that the ConfigSubset constructor will do the right thing. 

1112 # We check full for this since that is guaranteed to be complete. 

1113 if configType.component in full and configType.component not in config: 

1114 config[configType.component] = {} 

1115 

1116 # Extract the part of the config we wish to update 

1117 localConfig = configType(config, mergeDefaults=False, validate=False) 

1118 

1119 if toUpdate: 

1120 for key, value in toUpdate.items(): 

1121 if key in localConfig and not overwrite: 

1122 log.debug("Not overriding key '%s' with value '%s' in config %s", 

1123 key, value, localConfig.__class__.__name__) 

1124 else: 

1125 localConfig[key] = value 

1126 

1127 if toCopy: 

1128 localFullConfig = configType(full, mergeDefaults=False) 

1129 for key in toCopy: 

1130 if key in localConfig and not overwrite: 

1131 log.debug("Not overriding key '%s' from defaults in config %s", 

1132 key, localConfig.__class__.__name__) 

1133 else: 

1134 localConfig[key] = localFullConfig[key] 

1135 

1136 # Reattach to parent if this is a child config 

1137 if configType.component in config: 

1138 config[configType.component] = localConfig 

1139 else: 

1140 config.update(localConfig) 

1141 

1142 def toDict(self): 

1143 """Convert a `Config` to a standalone hierarchical `dict`. 

1144 

1145 Returns 

1146 ------- 

1147 d : `dict` 

1148 The standalone hierarchical `dict` with any `Config` classes 

1149 in the hierarchy converted to `dict`. 

1150 

1151 Notes 

1152 ----- 

1153 This can be useful when passing a Config to some code that 

1154 expects native Python types. 

1155 """ 

1156 output = copy.deepcopy(self._data) 

1157 for k, v in output.items(): 

1158 if isinstance(v, Config): 

1159 v = v.toDict() 

1160 output[k] = v 

1161 return output 

1162 

1163 

1164class ConfigSubset(Config): 

1165 """Config representing a subset of a more general configuration. 

1166 

1167 Subclasses define their own component and when given a configuration 

1168 that includes that component, the resulting configuration only includes 

1169 the subset. For example, your config might contain ``dimensions`` if it's 

1170 part of a global config and that subset will be stored. If ``dimensions`` 

1171 can not be found it is assumed that the entire contents of the 

1172 configuration should be used. 

1173 

1174 Default values are read from the environment or supplied search paths 

1175 using the default configuration file name specified in the subclass. 

1176 This allows a configuration class to be instantiated without any 

1177 additional arguments. 

1178 

1179 Additional validation can be specified to check for keys that are mandatory 

1180 in the configuration. 

1181 

1182 Parameters 

1183 ---------- 

1184 other : `Config` or `str` or `dict` 

1185 Argument specifying the configuration information as understood 

1186 by `Config` 

1187 validate : `bool`, optional 

1188 If `True` required keys will be checked to ensure configuration 

1189 consistency. 

1190 mergeDefaults : `bool`, optional 

1191 If `True` defaults will be read and the supplied config will 

1192 be combined with the defaults, with the supplied valiues taking 

1193 precedence. 

1194 searchPaths : `list` or `tuple`, optional 

1195 Explicit additional paths to search for defaults. They should 

1196 be supplied in priority order. These paths have higher priority 

1197 than those read from the environment in 

1198 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to 

1199 the local file system, or `ResourceDir`. 

1200 """ 

1201 

1202 component: ClassVar[Optional[str]] = None 

1203 """Component to use from supplied config. Can be None. If specified the 

1204 key is not required. Can be a full dot-separated path to a component. 

1205 """ 

1206 

1207 requiredKeys: ClassVar[Sequence[str]] = () 

1208 """Keys that are required to be specified in the configuration. 

1209 """ 

1210 

1211 defaultConfigFile: ClassVar[Optional[str]] = None 

1212 """Name of the file containing defaults for this config class. 

1213 """ 

1214 

1215 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None): 

1216 

1217 # Create a blank object to receive the defaults 

1218 # Once we have the defaults we then update with the external values 

1219 super().__init__() 

1220 

1221 # Create a standard Config rather than subset 

1222 externalConfig = Config(other) 

1223 

1224 # Select the part we need from it 

1225 # To simplify the use of !include we also check for the existence of 

1226 # component.component (since the included files can themselves 

1227 # include the component name) 

1228 if self.component is not None: 

1229 doubled = (self.component, self.component) 

1230 # Must check for double depth first 

1231 if doubled in externalConfig: 

1232 externalConfig = externalConfig[doubled] 

1233 elif self.component in externalConfig: 

1234 externalConfig._data = externalConfig._data[self.component] 

1235 

1236 # Default files read to create this configuration 

1237 self.filesRead = [] 

1238 

1239 # Assume we are not looking up child configurations 

1240 containerKey = None 

1241 

1242 # Sometimes we do not want to merge with defaults. 

1243 if mergeDefaults: 

1244 

1245 # Supplied search paths have highest priority 

1246 fullSearchPath = [] 

1247 if searchPaths: 

1248 fullSearchPath.extend(searchPaths) 

1249 

1250 # Read default paths from enviroment 

1251 fullSearchPath.extend(self.defaultSearchPaths()) 

1252 

1253 # There are two places to find defaults for this particular config 

1254 # - The "defaultConfigFile" defined in the subclass 

1255 # - The class specified in the "cls" element in the config. 

1256 # Read cls after merging in case it changes. 

1257 if self.defaultConfigFile is not None: 

1258 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile) 

1259 

1260 # Can have a class specification in the external config (priority) 

1261 # or from the defaults. 

1262 pytype = None 

1263 if "cls" in externalConfig: 

1264 pytype = externalConfig["cls"] 

1265 elif "cls" in self: 

1266 pytype = self["cls"] 

1267 

1268 if pytype is not None: 

1269 try: 

1270 cls = doImport(pytype) 

1271 except ImportError as e: 

1272 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e 

1273 defaultsFile = cls.defaultConfigFile 

1274 if defaultsFile is not None: 

1275 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile) 

1276 

1277 # Get the container key in case we need it 

1278 try: 

1279 containerKey = cls.containerKey 

1280 except AttributeError: 

1281 pass 

1282 

1283 # Now update this object with the external values so that the external 

1284 # values always override the defaults 

1285 self.update(externalConfig) 

1286 

1287 # If this configuration has child configurations of the same 

1288 # config class, we need to expand those defaults as well. 

1289 

1290 if mergeDefaults and containerKey is not None and containerKey in self: 

1291 for idx, subConfig in enumerate(self[containerKey]): 

1292 self[containerKey, idx] = type(self)(other=subConfig, validate=validate, 

1293 mergeDefaults=mergeDefaults, 

1294 searchPaths=searchPaths) 

1295 

1296 if validate: 

1297 self.validate() 

1298 

1299 @classmethod 

1300 def defaultSearchPaths(cls): 

1301 """Read the environment to determine search paths to use for global 

1302 defaults. 

1303 

1304 Global defaults, at lowest priority, are found in the ``config`` 

1305 directory of the butler source tree. Additional defaults can be 

1306 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS`` 

1307 which is a PATH-like variable where paths at the front of the list 

1308 have priority over those later. 

1309 

1310 Returns 

1311 ------- 

1312 paths : `list` 

1313 Returns a list of paths to search. The returned order is in 

1314 priority with the highest priority paths first. The butler config 

1315 configuration resources will not be included here but will 

1316 always be searched last. 

1317 

1318 Notes 

1319 ----- 

1320 The environment variable is split on the standard ``:`` path separator. 

1321 This currently makes it incompatible with usage of URIs. 

1322 """ 

1323 # We can pick up defaults from multiple search paths 

1324 # We fill defaults by using the butler config path and then 

1325 # the config path environment variable in reverse order. 

1326 defaultsPaths = [] 

1327 

1328 if CONFIG_PATH in os.environ: 

1329 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep) 

1330 defaultsPaths.extend(externalPaths) 

1331 

1332 # Add the package defaults as a resource 

1333 defaultsPaths.append(ResourceDir(cls.resourcesPackage, "configs")) 

1334 

1335 return defaultsPaths 

1336 

1337 def _updateWithConfigsFromPath(self, searchPaths, configFile): 

1338 """Search the supplied paths, merging the configuration values 

1339 

1340 The values read will override values currently stored in the object. 

1341 Every file found in the path will be read, such that the earlier 

1342 path entries have higher priority. 

1343 

1344 Parameters 

1345 ---------- 

1346 searchPaths : `list` 

1347 Paths to search for the supplied configFile. This path 

1348 is the priority order, such that files read from the 

1349 first path entry will be selected over those read from 

1350 a later path. Can contain `str` referring to the local file 

1351 system or `ResourceDir`. 

1352 configFile : `str` 

1353 File to locate in path. If absolute path it will be read 

1354 directly and the search path will not be used. Can be a URI 

1355 to an explicit resource (which will ignore the search path) 

1356 which is assumed to exist. 

1357 """ 

1358 uri = ButlerURI(configFile) 

1359 if uri.scheme: 

1360 # Assume this resource exists 

1361 self._updateWithOtherConfigFile(configFile) 

1362 self.filesRead.append(configFile) 

1363 elif os.path.isabs(configFile) and os.path.exists(configFile): 

1364 self.filesRead.append(configFile) 

1365 self._updateWithOtherConfigFile(configFile) 

1366 else: 

1367 # Reverse order so that high priority entries 

1368 # update the object last. 

1369 for pathDir in reversed(searchPaths): 

1370 if isinstance(pathDir, str): 

1371 file = os.path.join(pathDir, configFile) 

1372 if os.path.exists(file): 

1373 self.filesRead.append(file) 

1374 self._updateWithOtherConfigFile(file) 

1375 elif isinstance(pathDir, ResourceDir): 

1376 resource = pathDir.toResource(configFile) 

1377 if resource.exists(): 

1378 self.filesRead.append(resource) 

1379 self._updateWithOtherConfigFile(resource) 

1380 

1381 def _updateWithOtherConfigFile(self, file): 

1382 """Read in some defaults and update. 

1383 

1384 Update the configuration by reading the supplied file as a config 

1385 of this class, and merging such that these values override the 

1386 current values. Contents of the external config are not validated. 

1387 

1388 Parameters 

1389 ---------- 

1390 file : `Config`, `str`, or `dict` 

1391 Entity that can be converted to a `ConfigSubset`. 

1392 """ 

1393 # Use this class to read the defaults so that subsetting can happen 

1394 # correctly. 

1395 externalConfig = type(self)(file, validate=False, mergeDefaults=False) 

1396 self.update(externalConfig) 

1397 

1398 def validate(self): 

1399 """Check that mandatory keys are present in this configuration. 

1400 

1401 Ignored if ``requiredKeys`` is empty.""" 

1402 # Validation 

1403 missing = [k for k in self.requiredKeys if k not in self._data] 

1404 if missing: 

1405 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")