Coverage for python/lsst/daf/butler/core/config.py: 45%
485 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Configuration control."""
24from __future__ import annotations
26__all__ = ("Config", "ConfigSubset")
28import copy
29import io
30import json
31import logging
32import os
33import pprint
34import sys
35from collections import defaultdict
36from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence
37from pathlib import Path
38from typing import IO, TYPE_CHECKING, Any, ClassVar, cast
40import yaml
41from lsst.resources import ResourcePath, ResourcePathExpression
42from lsst.utils import doImportType
43from yaml.representer import Representer
45yaml.add_representer(defaultdict, Representer.represent_dict)
48# Config module logger
49log = logging.getLogger(__name__)
51# PATH-like environment variable to use for defaults.
52CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
54if TYPE_CHECKING:
55 yamlLoader = yaml.SafeLoader
56else:
57 try:
58 yamlLoader = yaml.CSafeLoader
59 except AttributeError:
60 # Not all installations have the C library
61 # (but assume for mypy's sake that they're the same)
62 yamlLoader = yaml.SafeLoader
65def _doUpdate(d: Mapping[str, Any], u: Mapping[str, Any]) -> Mapping[str, Any]:
66 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 raise RuntimeError(f"Only call update with Mapping, not {type(d)}")
68 for k, v in u.items():
69 if isinstance(v, Mapping):
70 lhs = d.get(k, {})
71 if not isinstance(lhs, Mapping): 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true
72 lhs = {}
73 d[k] = _doUpdate(lhs, v)
74 else:
75 d[k] = v
76 return d
79def _checkNextItem(k: str | int, d: Any, create: bool, must_be_dict: bool) -> tuple[Any, bool]:
80 """See if k is in d and if it is return the new child."""
81 nextVal = None
82 isThere = False
83 if d is None: 83 ↛ 85line 83 didn't jump to line 85, because the condition on line 83 was never true
84 # We have gone past the end of the hierarchy
85 pass
86 elif not must_be_dict and isinstance(d, Sequence): 86 ↛ 91line 86 didn't jump to line 91, because the condition on line 86 was never true
87 # Check for Sequence first because for lists
88 # __contains__ checks whether value is found in list
89 # not whether the index exists in list. When we traverse
90 # the hierarchy we are interested in the index.
91 try:
92 nextVal = d[int(k)]
93 isThere = True
94 except IndexError:
95 pass
96 except ValueError:
97 isThere = k in d
98 elif k in d:
99 nextVal = d[k]
100 isThere = True
101 elif create: 101 ↛ 102line 101 didn't jump to line 102, because the condition on line 101 was never true
102 d[k] = {}
103 nextVal = d[k]
104 isThere = True
106 return nextVal, isThere
109class Loader(yamlLoader):
110 """YAML Loader that supports file include directives.
112 Uses ``!include`` directive in a YAML file to point to another
113 YAML file to be included. The path in the include directive is relative
114 to the file containing that directive.
116 storageClasses: !include storageClasses.yaml
118 Examples
119 --------
120 >>> with open("document.yaml", "r") as f:
121 data = yaml.load(f, Loader=Loader)
123 Notes
124 -----
125 See https://davidchall.github.io/yaml-includes.html
126 """
128 def __init__(self, stream: str | IO): # types-PyYAML annotates 'stream' with a private type
129 super().__init__(stream)
130 # if this is a string and not a stream we may well lack a name
131 if hasattr(stream, "name"): 131 ↛ 135line 131 didn't jump to line 135, because the condition on line 131 was never false
132 self._root = ResourcePath(stream.name)
133 else:
134 # No choice but to assume a local filesystem
135 self._root = ResourcePath("no-file.yaml")
136 self.add_constructor("!include", Loader.include)
138 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
139 result: list[Any] | dict[str, Any]
140 if isinstance(node, yaml.ScalarNode):
141 return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]
143 elif isinstance(node, yaml.SequenceNode):
144 result = []
145 for filename in self.construct_sequence(node):
146 result.append(self.extractFile(filename))
147 return result
149 elif isinstance(node, yaml.MappingNode):
150 result = {}
151 for k, v in self.construct_mapping(node).items():
152 if not isinstance(k, str):
153 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
154 result[k] = self.extractFile(v)
155 return result
157 else:
158 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
159 raise yaml.constructor.ConstructorError
161 def extractFile(self, filename: str) -> Any:
162 # It is possible for the !include to point to an explicit URI
163 # instead of a relative URI, therefore we first see if it is
164 # scheme-less or not. If it has a scheme we use it directly
165 # if it is scheme-less we use it relative to the file root.
166 requesteduri = ResourcePath(filename, forceAbsolute=False)
168 if requesteduri.scheme:
169 fileuri = requesteduri
170 else:
171 fileuri = self._root.updatedFile(filename)
173 log.debug("Opening YAML file via !include: %s", fileuri)
175 # Read all the data from the resource
176 data = fileuri.read()
178 # Store the bytes into a BytesIO so we can attach a .name
179 stream = io.BytesIO(data)
180 stream.name = fileuri.geturl()
181 return yaml.load(stream, Loader)
184# Type of the key used for accessing items in configuration object. It can be
185# a single string as described below or a sequence of srtings and integer
186# indices. Indices are used to access items in sequences stored in config.
187_ConfigKey = str | Sequence[str | int]
190class Config(MutableMapping):
191 r"""Implements a datatype that is used by `Butler` for configuration.
193 It is essentially a `dict` with key/value pairs, including nested dicts
194 (as values). In fact, it can be initialized with a `dict`.
195 This is explained next:
197 Config extends the `dict` api so that hierarchical values may be accessed
198 with delimited notation or as a tuple. If a string is given the delimiter
199 is picked up from the first character in that string. For example,
200 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
201 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
202 If the first character is alphanumeric, no delimiter will be used.
203 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
204 Unicode characters can be used as the delimiter for distinctiveness if
205 required.
207 If a key in the hierarchy starts with a non-alphanumeric character care
208 should be used to ensure that either the tuple interface is used or
209 a distinct delimiter is always given in string form.
211 Finally, the delimiter can be escaped if it is part of a key and also
212 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
213 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
214 always better to use a different delimiter in these cases.
216 Note that adding a multi-level key implicitly creates any nesting levels
217 that do not exist, but removing multi-level keys does not automatically
218 remove empty nesting levels. As a result:
220 >>> c = Config()
221 >>> c[".a.b"] = 1
222 >>> del c[".a.b"]
223 >>> c["a"]
224 Config({'a': {}})
226 Storage formats supported:
228 - yaml: read and write is supported.
229 - json: read and write is supported but no ``!include`` directive.
231 Parameters
232 ----------
233 other : `lsst.resources.ResourcePath` or `Config` or `dict`
234 Other source of configuration, can be:
236 - (`lsst.resources.ResourcePathExpression`)
237 Treated as a URI to a config file. Must end with ".yaml".
238 - (`Config`) Copies the other Config's values into this one.
239 - (`dict`) Copies the values from the dict into this Config.
241 If `None` is provided an empty `Config` will be created.
242 """
244 _D: str = "→"
245 """Default internal delimiter to use for components in the hierarchy when
246 constructing keys for external use (see `Config.names()`)."""
248 includeKey: ClassVar[str] = "includeConfigs"
249 """Key used to indicate that another config should be included at this
250 part of the hierarchy."""
252 resourcesPackage: str = "lsst.daf.butler"
253 """Package to search for default configuration data. The resources
254 themselves will be within a ``configs`` resource hierarchy."""
256 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
257 self._data: dict[str, Any] = {}
258 self.configFile: ResourcePath | None = None
260 if other is None:
261 return
263 if isinstance(other, Config):
264 # Deep copy might be more efficient but if someone has overridden
265 # a config entry to store a complex object then deep copy may
266 # fail. Safer to use update().
267 self.update(other._data)
268 self.configFile = other.configFile
269 elif isinstance(other, dict | Mapping):
270 # In most cases we have a dict, and it's more efficient
271 # to check for a dict instance before checking the generic mapping.
272 self.update(other)
273 elif isinstance(other, str | ResourcePath | Path): 273 ↛ 280line 273 didn't jump to line 280, because the condition on line 273 was never false
274 # if other is a string, assume it is a file path/URI
275 self.__initFromUri(other)
276 self._processExplicitIncludes()
277 else:
278 # if the config specified by other could not be recognized raise
279 # a runtime error.
280 raise RuntimeError(f"A Config could not be loaded from other: {other}")
282 def ppprint(self) -> str:
283 """Return config as formatted readable string.
285 Examples
286 --------
287 use: ``pdb> print(myConfigObject.ppprint())``
289 Returns
290 -------
291 s : `str`
292 A prettyprint formatted string representing the config
293 """
294 return pprint.pformat(self._data, indent=2, width=1)
296 def __repr__(self) -> str:
297 return f"{type(self).__name__}({self._data!r})"
299 def __str__(self) -> str:
300 return self.ppprint()
302 def __len__(self) -> int:
303 return len(self._data)
305 def __iter__(self) -> Iterator[str]:
306 return iter(self._data)
308 def copy(self) -> Config:
309 return type(self)(self)
311 @classmethod
312 def fromString(cls, string: str, format: str = "yaml") -> Config:
313 """Create a new Config instance from a serialized string.
315 Parameters
316 ----------
317 string : `str`
318 String containing content in specified format
319 format : `str`, optional
320 Format of the supplied string. Can be ``json`` or ``yaml``.
322 Returns
323 -------
324 c : `Config`
325 Newly-constructed Config.
326 """
327 if format == "yaml":
328 new_config = cls().__initFromYaml(string)
329 elif format == "json":
330 new_config = cls().__initFromJson(string)
331 else:
332 raise ValueError(f"Unexpected format of string: {format}")
333 new_config._processExplicitIncludes()
334 return new_config
336 @classmethod
337 def fromYaml(cls, string: str) -> Config:
338 """Create a new Config instance from a YAML string.
340 Parameters
341 ----------
342 string : `str`
343 String containing content in YAML format
345 Returns
346 -------
347 c : `Config`
348 Newly-constructed Config.
349 """
350 return cls.fromString(string, format="yaml")
352 def __initFromUri(self, path: ResourcePathExpression) -> None:
353 """Load a file from a path or an URI.
355 Parameters
356 ----------
357 path : `lsst.resources.ResourcePathExpression`
358 Path or a URI to a persisted config file.
359 """
360 uri = ResourcePath(path)
361 ext = uri.getExtension()
362 if ext == ".yaml": 362 ↛ 369line 362 didn't jump to line 369, because the condition on line 362 was never false
363 log.debug("Opening YAML config file: %s", uri.geturl())
364 content = uri.read()
365 # Use a stream so we can name it
366 stream = io.BytesIO(content)
367 stream.name = uri.geturl()
368 self.__initFromYaml(stream)
369 elif ext == ".json":
370 log.debug("Opening JSON config file: %s", uri.geturl())
371 content = uri.read()
372 self.__initFromJson(content)
373 else:
374 # This URI does not have a valid extension. It might be because
375 # we ended up with a directory and not a file. Before we complain
376 # about an extension, do an existence check. No need to do
377 # the (possibly expensive) existence check in the default code
378 # path above because we will find out soon enough that the file
379 # is not there.
380 if not uri.exists():
381 raise FileNotFoundError(f"Config location {uri} does not exist.")
382 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
383 self.configFile = uri
385 def __initFromYaml(self, stream: IO | str | bytes) -> Config:
386 """Load a YAML config from any readable stream that contains one.
388 Parameters
389 ----------
390 stream: `IO` or `str`
391 Stream to pass to the YAML loader. Accepts anything that
392 `yaml.load` accepts. This can include a string as well as an
393 IO stream.
395 Raises
396 ------
397 yaml.YAMLError
398 If there is an error loading the file.
399 """
400 content = yaml.load(stream, Loader=Loader)
401 if content is None: 401 ↛ 402line 401 didn't jump to line 402, because the condition on line 401 was never true
402 content = {}
403 self._data = content
404 return self
406 def __initFromJson(self, stream: IO | str | bytes) -> Config:
407 """Load a JSON config from any readable stream that contains one.
409 Parameters
410 ----------
411 stream: `IO` or `str`
412 Stream to pass to the JSON loader. This can include a string as
413 well as an IO stream.
415 Raises
416 ------
417 TypeError:
418 Raised if there is an error loading the content.
419 """
420 if isinstance(stream, bytes | str):
421 content = json.loads(stream)
422 else:
423 content = json.load(stream)
424 if content is None:
425 content = {}
426 self._data = content
427 return self
429 def _processExplicitIncludes(self) -> None:
430 """Scan through the configuration searching for the special includes.
432 Looks for ``includeConfigs`` directive and processes the includes.
433 """
434 # Search paths for config files
435 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
436 if self.configFile is not None: 436 ↛ 444line 436 didn't jump to line 444, because the condition on line 436 was never false
437 if isinstance(self.configFile, ResourcePath): 437 ↛ 440line 437 didn't jump to line 440, because the condition on line 437 was never false
438 configDir = self.configFile.dirname()
439 else:
440 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
441 searchPaths.append(configDir)
443 # Ensure we know what delimiter to use
444 names = self.nameTuples()
445 for path in names:
446 if path[-1] == self.includeKey: 446 ↛ 447line 446 didn't jump to line 447, because the condition on line 446 was never true
447 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
448 basePath = path[:-1]
450 # Extract the includes and then delete them from the config
451 includes = self[path]
452 del self[path]
454 # Be consistent and convert to a list
455 if not isinstance(includes, list):
456 includes = [includes]
458 # Read each file assuming it is a reference to a file
459 # The file can be relative to config file or cwd
460 # ConfigSubset search paths are not used
461 subConfigs = []
462 for fileName in includes:
463 # Expand any shell variables -- this could be URI
464 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False)
465 found = None
466 if fileName.isabs():
467 found = fileName
468 else:
469 for dir in searchPaths:
470 specific = dir.join(fileName.path)
471 # Remote resource check might be expensive
472 if specific.exists():
473 found = specific
474 break
475 if not found:
476 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
478 # Read the referenced Config as a Config
479 subConfigs.append(type(self)(found))
481 # Now we need to merge these sub configs with the current
482 # information that was present in this node in the config
483 # tree with precedence given to the explicit values
484 newConfig = subConfigs.pop(0)
485 for sc in subConfigs:
486 newConfig.update(sc)
488 # Explicit values take precedence
489 if not basePath:
490 # This is an include at the root config
491 newConfig.update(self)
492 # Replace the current config
493 self._data = newConfig._data
494 else:
495 newConfig.update(self[basePath])
496 # And reattach to the base config
497 self[basePath] = newConfig
499 @staticmethod
500 def _splitIntoKeys(key: _ConfigKey) -> list[str | int]:
501 r"""Split the argument for get/set/in into a hierarchical list.
503 Parameters
504 ----------
505 key : `str` or iterable
506 Argument given to get/set/in. If an iterable is provided it will
507 be converted to a list. If the first character of the string
508 is not an alphanumeric character then it will be used as the
509 delimiter for the purposes of splitting the remainder of the
510 string. If the delimiter is also in one of the keys then it
511 can be escaped using ``\``. There is no default delimiter.
513 Returns
514 -------
515 keys : `list`
516 Hierarchical keys as a `list`.
517 """
518 if isinstance(key, str):
519 if not key[0].isalnum(): 519 ↛ 520line 519 didn't jump to line 520, because the condition on line 519 was never true
520 d = key[0]
521 key = key[1:]
522 else:
523 return [
524 key,
525 ]
526 escaped = f"\\{d}"
527 temp = None
528 if escaped in key:
529 # Complain at the attempt to escape the escape
530 doubled = rf"\{escaped}"
531 if doubled in key:
532 raise ValueError(
533 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
534 )
535 # Replace with a character that won't be in the string
536 temp = "\r"
537 if temp in key or d == temp:
538 raise ValueError(
539 f"Can not use character {temp!r} in hierarchical key or as"
540 " delimiter if escaping the delimiter"
541 )
542 key = key.replace(escaped, temp)
543 hierarchy = key.split(d)
544 if temp:
545 hierarchy = [h.replace(temp, d) for h in hierarchy]
546 # Copy the list to keep mypy quiet.
547 return list(hierarchy)
548 elif isinstance(key, Iterable): 548 ↛ 552line 548 didn't jump to line 552, because the condition on line 548 was never false
549 return list(key)
550 else:
551 # Do not try to guess.
552 raise TypeError(f"Provided key [{key}] neither str nor iterable.")
554 def _getKeyHierarchy(self, name: _ConfigKey) -> list[str | int]:
555 """Retrieve the key hierarchy for accessing the Config.
557 Parameters
558 ----------
559 name : `str` or `tuple`
560 Delimited string or `tuple` of hierarchical keys.
562 Returns
563 -------
564 hierarchy : `list` of `str`
565 Hierarchy to use as a `list`. If the name is available directly
566 as a key in the Config it will be used regardless of the presence
567 of any nominal delimiter.
568 """
569 keys: list[str | int]
570 if name in self._data:
571 keys = [cast(str, name)]
572 else:
573 keys = self._splitIntoKeys(name)
574 return keys
576 def _findInHierarchy(self, keys: Sequence[str | int], create: bool = False) -> tuple[list[Any], bool]:
577 """Look for hierarchy of keys in Config.
579 Parameters
580 ----------
581 keys : `list` or `tuple`
582 Keys to search in hierarchy.
583 create : `bool`, optional
584 If `True`, if a part of the hierarchy does not exist, insert an
585 empty `dict` into the hierarchy.
587 Returns
588 -------
589 hierarchy : `list`
590 List of the value corresponding to each key in the supplied
591 hierarchy. Only keys that exist in the hierarchy will have
592 a value.
593 complete : `bool`
594 `True` if the full hierarchy exists and the final element
595 in ``hierarchy`` is the value of relevant value.
596 """
597 d: Any = self._data
599 # For the first key, d must be a dict so it is a waste
600 # of time to check for a sequence.
601 must_be_dict = True
603 hierarchy = []
604 complete = True
605 for k in keys:
606 d, isThere = _checkNextItem(k, d, create, must_be_dict)
607 if isThere:
608 hierarchy.append(d)
609 else:
610 complete = False
611 break
612 # Second time round it might be a sequence.
613 must_be_dict = False
615 return hierarchy, complete
617 def __getitem__(self, name: _ConfigKey) -> Any:
618 # Override the split for the simple case where there is an exact
619 # match. This allows `Config.items()` to work via a simple
620 # __iter__ implementation that returns top level keys of
621 # self._data.
623 # If the name matches a key in the top-level hierarchy, bypass
624 # all further cleverness.
625 found_directly = False
626 try:
627 if isinstance(name, str): 627 ↛ 633line 627 didn't jump to line 633, because the condition on line 627 was never false
628 data = self._data[name]
629 found_directly = True
630 except KeyError:
631 pass
633 if not found_directly: 633 ↛ 634line 633 didn't jump to line 634, because the condition on line 633 was never true
634 keys = self._getKeyHierarchy(name)
636 hierarchy, complete = self._findInHierarchy(keys)
637 if not complete:
638 raise KeyError(f"{name} not found")
639 data = hierarchy[-1]
641 # In most cases we have a dict, and it's more efficient
642 # to check for a dict instance before checking the generic mapping.
643 if isinstance(data, dict | Mapping):
644 data = Config(data)
645 # Ensure that child configs inherit the parent internal delimiter
646 if self._D != Config._D: 646 ↛ 647line 646 didn't jump to line 647, because the condition on line 646 was never true
647 data._D = self._D
648 return data
650 def __setitem__(self, name: _ConfigKey, value: Any) -> None:
651 keys = self._getKeyHierarchy(name)
652 last = keys.pop()
653 if isinstance(value, Config):
654 value = copy.deepcopy(value._data)
656 hierarchy, complete = self._findInHierarchy(keys, create=True)
657 if hierarchy:
658 data = hierarchy[-1]
659 else:
660 data = self._data
662 try:
663 data[last] = value
664 except TypeError:
665 data[int(last)] = value
667 def __contains__(self, key: Any) -> bool:
668 if not isinstance(key, str | Sequence): 668 ↛ 669line 668 didn't jump to line 669, because the condition on line 668 was never true
669 return False
670 keys = self._getKeyHierarchy(key)
671 hierarchy, complete = self._findInHierarchy(keys)
672 return complete
674 def __delitem__(self, key: str | Sequence[str]) -> None:
675 keys = self._getKeyHierarchy(key)
676 last = keys.pop()
677 hierarchy, complete = self._findInHierarchy(keys)
678 if complete: 678 ↛ 685line 678 didn't jump to line 685, because the condition on line 678 was never false
679 if hierarchy: 679 ↛ 680line 679 didn't jump to line 680, because the condition on line 679 was never true
680 data = hierarchy[-1]
681 else:
682 data = self._data
683 del data[last]
684 else:
685 raise KeyError(f"{key} not found in Config")
687 def update(self, other: Mapping[str, Any]) -> None: # type: ignore[override]
688 """Update config from other `Config` or `dict`.
690 Like `dict.update()`, but will add or modify keys in nested dicts,
691 instead of overwriting the nested dict entirely.
693 Parameters
694 ----------
695 other : `dict` or `Config`
696 Source of configuration:
698 Examples
699 --------
700 >>> c = Config({"a": {"b": 1}})
701 >>> c.update({"a": {"c": 2}})
702 >>> print(c)
703 {'a': {'b': 1, 'c': 2}}
705 >>> foo = {"a": {"b": 1}}
706 >>> foo.update({"a": {"c": 2}})
707 >>> print(foo)
708 {'a': {'c': 2}}
709 """
710 _doUpdate(self._data, other)
712 def merge(self, other: Mapping) -> None:
713 """Merge another Config into this one.
715 Like `Config.update()`, but will add keys & values from other that
716 DO NOT EXIST in self.
718 Keys and values that already exist in self will NOT be overwritten.
720 Parameters
721 ----------
722 other : `dict` or `Config`
723 Source of configuration:
724 """
725 if not isinstance(other, Mapping):
726 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
728 # Convert the supplied mapping to a Config for consistency
729 # This will do a deepcopy if it is already a Config
730 otherCopy = Config(other)
731 otherCopy.update(self)
732 self._data = otherCopy._data
734 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
735 """Get tuples representing the name hierarchies of all keys.
737 The tuples returned from this method are guaranteed to be usable
738 to access items in the configuration object.
740 Parameters
741 ----------
742 topLevelOnly : `bool`, optional
743 If False, the default, a full hierarchy of names is returned.
744 If True, only the top level are returned.
746 Returns
747 -------
748 names : `list` of `tuple` of `str`
749 List of all names present in the `Config` where each element
750 in the list is a `tuple` of strings representing the hierarchy.
751 """
752 if topLevelOnly: 752 ↛ 753line 752 didn't jump to line 753, because the condition on line 752 was never true
753 return [(k,) for k in self]
755 def getKeysAsTuples(
756 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
757 ) -> None:
758 if isinstance(d, Sequence):
759 theseKeys: Iterable[Any] = range(len(d))
760 else:
761 theseKeys = d.keys()
762 for key in theseKeys:
763 val = d[key]
764 levelKey = base + (key,) if base is not None else (key,)
765 keys.append(levelKey)
766 if isinstance(val, Mapping | Sequence) and not isinstance(val, str):
767 getKeysAsTuples(val, keys, levelKey)
769 keys: list[tuple[str, ...]] = []
770 getKeysAsTuples(self._data, keys, None)
771 return keys
773 def names(self, topLevelOnly: bool = False, delimiter: str | None = None) -> list[str]:
774 """Get a delimited name of all the keys in the hierarchy.
776 The values returned from this method are guaranteed to be usable
777 to access items in the configuration object.
779 Parameters
780 ----------
781 topLevelOnly : `bool`, optional
782 If False, the default, a full hierarchy of names is returned.
783 If True, only the top level are returned.
784 delimiter : `str`, optional
785 Delimiter to use when forming the keys. If the delimiter is
786 present in any of the keys, it will be escaped in the returned
787 names. If `None` given a delimiter will be automatically provided.
788 The delimiter can not be alphanumeric.
790 Returns
791 -------
792 names : `list` of `str`
793 List of all names present in the `Config`.
795 Notes
796 -----
797 This is different than the built-in method `dict.keys`, which will
798 return only the first level keys.
800 Raises
801 ------
802 ValueError:
803 The supplied delimiter is alphanumeric.
804 """
805 if topLevelOnly:
806 return list(self.keys())
808 # Get all the tuples of hierarchical keys
809 nameTuples = self.nameTuples()
811 if delimiter is not None and delimiter.isalnum():
812 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
814 if delimiter is None:
815 # Start with something, and ensure it does not need to be
816 # escaped (it is much easier to understand if not escaped)
817 delimiter = self._D
819 # Form big string for easy check of delimiter clash
820 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
822 # Try a delimiter and keep trying until we get something that
823 # works.
824 ntries = 0
825 while delimiter in combined:
826 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
827 ntries += 1
829 if ntries > 100:
830 raise ValueError(f"Unable to determine a delimiter for Config {self}")
832 # try another one
833 while True:
834 delimiter = chr(ord(delimiter) + 1)
835 if not delimiter.isalnum():
836 break
838 log.debug("Using delimiter %r", delimiter)
840 # Form the keys, escaping the delimiter if necessary
841 strings = [
842 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
843 for k in nameTuples
844 ]
845 return strings
847 def asArray(self, name: str | Sequence[str]) -> Sequence[Any]:
848 """Get a value as an array.
850 May contain one or more elements.
852 Parameters
853 ----------
854 name : `str`
855 Key to use to retrieve value.
857 Returns
858 -------
859 array : `collections.abc.Sequence`
860 The value corresponding to name, but guaranteed to be returned
861 as a list with at least one element. If the value is a
862 `~collections.abc.Sequence` (and not a `str`) the value itself
863 will be returned, else the value will be the first element.
864 """
865 val = self.get(name)
866 if isinstance(val, str) or not isinstance(val, Sequence):
867 val = [val]
868 return val
870 def __eq__(self, other: Any) -> bool:
871 if isinstance(other, Config):
872 other = other._data
873 return self._data == other
875 def __ne__(self, other: Any) -> bool:
876 if isinstance(other, Config):
877 other = other._data
878 return self._data != other
880 #######
881 # i/o #
883 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
884 """Write the config to an output stream.
886 Parameters
887 ----------
888 output : `IO`, optional
889 The stream to use for output. If `None` the serialized content
890 will be returned.
891 format : `str`, optional
892 The format to use for the output. Can be "yaml" or "json".
894 Returns
895 -------
896 serialized : `str` or `None`
897 If a stream was given the stream will be used and the return
898 value will be `None`. If the stream was `None` the
899 serialization will be returned as a string.
900 """
901 if format == "yaml":
902 return yaml.safe_dump(self._data, output, default_flow_style=False)
903 elif format == "json":
904 if output is not None:
905 json.dump(self._data, output, ensure_ascii=False)
906 return None
907 else:
908 return json.dumps(self._data, ensure_ascii=False)
909 raise ValueError(f"Unsupported format for Config serialization: {format}")
911 def dumpToUri(
912 self,
913 uri: ResourcePathExpression,
914 updateFile: bool = True,
915 defaultFileName: str = "butler.yaml",
916 overwrite: bool = True,
917 ) -> None:
918 """Write the config to location pointed to by given URI.
920 Currently supports 's3' and 'file' URI schemes.
922 Parameters
923 ----------
924 uri: `lsst.resources.ResourcePathExpression`
925 URI of location where the Config will be written.
926 updateFile : bool, optional
927 If True and uri does not end on a filename with extension, will
928 append `defaultFileName` to the target uri. True by default.
929 defaultFileName : bool, optional
930 The file name that will be appended to target uri if updateFile is
931 True and uri does not end on a file with an extension.
932 overwrite : bool, optional
933 If True the configuration will be written even if it already
934 exists at that location.
935 """
936 # Make local copy of URI or create new one
937 uri = ResourcePath(uri)
939 if updateFile and not uri.getExtension():
940 uri = uri.updatedFile(defaultFileName)
942 # Try to work out the format from the extension
943 ext = uri.getExtension()
944 format = ext[1:].lower()
946 output = self.dump(format=format)
947 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
948 uri.write(output.encode(), overwrite=overwrite)
949 self.configFile = uri
951 @staticmethod
952 def updateParameters(
953 configType: type[ConfigSubset],
954 config: Config,
955 full: Config,
956 toUpdate: dict[str, Any] | None = None,
957 toCopy: Sequence[str | Sequence[str]] | None = None,
958 overwrite: bool = True,
959 toMerge: Sequence[str | Sequence[str]] | None = None,
960 ) -> None:
961 """Update specific config parameters.
963 Allows for named parameters to be set to new values in bulk, and
964 for other values to be set by copying from a reference config.
966 Assumes that the supplied config is compatible with ``configType``
967 and will attach the updated values to the supplied config by
968 looking for the related component key. It is assumed that
969 ``config`` and ``full`` are from the same part of the
970 configuration hierarchy.
972 Parameters
973 ----------
974 configType : `ConfigSubset`
975 Config type to use to extract relevant items from ``config``.
976 config : `Config`
977 A `Config` to update. Only the subset understood by
978 the supplied `ConfigSubset` will be modified. Default values
979 will not be inserted and the content will not be validated
980 since mandatory keys are allowed to be missing until
981 populated later by merging.
982 full : `Config`
983 A complete config with all defaults expanded that can be
984 converted to a ``configType``. Read-only and will not be
985 modified by this method. Values are read from here if
986 ``toCopy`` is defined.
988 Repository-specific options that should not be obtained
989 from defaults when Butler instances are constructed
990 should be copied from ``full`` to ``config``.
991 toUpdate : `dict`, optional
992 A `dict` defining the keys to update and the new value to use.
993 The keys and values can be any supported by `Config`
994 assignment.
995 toCopy : `tuple`, optional
996 `tuple` of keys whose values should be copied from ``full``
997 into ``config``.
998 overwrite : `bool`, optional
999 If `False`, do not modify a value in ``config`` if the key
1000 already exists. Default is always to overwrite.
1001 toMerge : `tuple`, optional
1002 Keys to merge content from full to config without overwriting
1003 pre-existing values. Only works if the key refers to a hierarchy.
1004 The ``overwrite`` flag is ignored.
1006 Raises
1007 ------
1008 ValueError
1009 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1010 """
1011 if toUpdate is None and toCopy is None and toMerge is None:
1012 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1014 # If this is a parent configuration then we need to ensure that
1015 # the supplied config has the relevant component key in it.
1016 # If this is a parent configuration we add in the stub entry
1017 # so that the ConfigSubset constructor will do the right thing.
1018 # We check full for this since that is guaranteed to be complete.
1019 if (
1020 configType.component is not None
1021 and configType.component in full
1022 and configType.component not in config
1023 ):
1024 config[configType.component] = {}
1026 # Extract the part of the config we wish to update
1027 localConfig = configType(config, mergeDefaults=False, validate=False)
1029 key: str | Sequence[str]
1030 if toUpdate:
1031 for key, value in toUpdate.items():
1032 if key in localConfig and not overwrite:
1033 log.debug(
1034 "Not overriding key '%s' with value '%s' in config %s",
1035 key,
1036 value,
1037 localConfig.__class__.__name__,
1038 )
1039 else:
1040 localConfig[key] = value
1042 if toCopy or toMerge:
1043 localFullConfig = configType(full, mergeDefaults=False)
1045 if toCopy:
1046 for key in toCopy:
1047 if key in localConfig and not overwrite:
1048 log.debug(
1049 "Not overriding key '%s' from defaults in config %s",
1050 key,
1051 localConfig.__class__.__name__,
1052 )
1053 else:
1054 localConfig[key] = localFullConfig[key]
1055 if toMerge:
1056 for key in toMerge:
1057 if key in localConfig:
1058 # Get the node from the config to do the merge
1059 # but then have to reattach to the config.
1060 subset = localConfig[key]
1061 subset.merge(localFullConfig[key])
1062 localConfig[key] = subset
1063 else:
1064 localConfig[key] = localFullConfig[key]
1066 # Reattach to parent if this is a child config
1067 if configType.component is not None and configType.component in config:
1068 config[configType.component] = localConfig
1069 else:
1070 config.update(localConfig)
1072 def toDict(self) -> dict[str, Any]:
1073 """Convert a `Config` to a standalone hierarchical `dict`.
1075 Returns
1076 -------
1077 d : `dict`
1078 The standalone hierarchical `dict` with any `Config` classes
1079 in the hierarchy converted to `dict`.
1081 Notes
1082 -----
1083 This can be useful when passing a Config to some code that
1084 expects native Python types.
1085 """
1086 output = copy.deepcopy(self._data)
1087 for k, v in output.items():
1088 if isinstance(v, Config): 1088 ↛ 1089line 1088 didn't jump to line 1089, because the condition on line 1088 was never true
1089 v = v.toDict()
1090 output[k] = v
1091 return output
1094class ConfigSubset(Config):
1095 """Config representing a subset of a more general configuration.
1097 Subclasses define their own component and when given a configuration
1098 that includes that component, the resulting configuration only includes
1099 the subset. For example, your config might contain ``dimensions`` if it's
1100 part of a global config and that subset will be stored. If ``dimensions``
1101 can not be found it is assumed that the entire contents of the
1102 configuration should be used.
1104 Default values are read from the environment or supplied search paths
1105 using the default configuration file name specified in the subclass.
1106 This allows a configuration class to be instantiated without any
1107 additional arguments.
1109 Additional validation can be specified to check for keys that are mandatory
1110 in the configuration.
1112 Parameters
1113 ----------
1114 other : `Config` or `~lsst.resources.ResourcePathExpression` or `dict`
1115 Argument specifying the configuration information as understood
1116 by `Config`
1117 validate : `bool`, optional
1118 If `True` required keys will be checked to ensure configuration
1119 consistency.
1120 mergeDefaults : `bool`, optional
1121 If `True` defaults will be read and the supplied config will
1122 be combined with the defaults, with the supplied values taking
1123 precedence.
1124 searchPaths : `list` or `tuple`, optional
1125 Explicit additional paths to search for defaults. They should
1126 be supplied in priority order. These paths have higher priority
1127 than those read from the environment in
1128 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1129 the local file system or URIs, `lsst.resources.ResourcePath`.
1130 """
1132 component: ClassVar[str | None] = None
1133 """Component to use from supplied config. Can be None. If specified the
1134 key is not required. Can be a full dot-separated path to a component.
1135 """
1137 requiredKeys: ClassVar[Sequence[str]] = ()
1138 """Keys that are required to be specified in the configuration.
1139 """
1141 defaultConfigFile: ClassVar[str | None] = None
1142 """Name of the file containing defaults for this config class.
1143 """
1145 def __init__(
1146 self,
1147 other: Config | ResourcePathExpression | Mapping[str, Any] | None = None,
1148 validate: bool = True,
1149 mergeDefaults: bool = True,
1150 searchPaths: Sequence[ResourcePathExpression] | None = None,
1151 ):
1152 # Create a blank object to receive the defaults
1153 # Once we have the defaults we then update with the external values
1154 super().__init__()
1156 # Create a standard Config rather than subset
1157 externalConfig = Config(other)
1159 # Select the part we need from it
1160 # To simplify the use of !include we also check for the existence of
1161 # component.component (since the included files can themselves
1162 # include the component name)
1163 if self.component is not None: 1163 ↛ 1172line 1163 didn't jump to line 1172, because the condition on line 1163 was never false
1164 doubled = (self.component, self.component)
1165 # Must check for double depth first
1166 if doubled in externalConfig: 1166 ↛ 1167line 1166 didn't jump to line 1167, because the condition on line 1166 was never true
1167 externalConfig = externalConfig[doubled]
1168 elif self.component in externalConfig:
1169 externalConfig._data = externalConfig._data[self.component]
1171 # Default files read to create this configuration
1172 self.filesRead: list[ResourcePath | str] = []
1174 # Assume we are not looking up child configurations
1175 containerKey = None
1177 # Sometimes we do not want to merge with defaults.
1178 if mergeDefaults:
1179 # Supplied search paths have highest priority
1180 fullSearchPath: list[ResourcePath | str] = []
1181 if searchPaths: 1181 ↛ 1182line 1181 didn't jump to line 1182, because the condition on line 1181 was never true
1182 fullSearchPath = [ResourcePath(path) for path in searchPaths]
1184 # Read default paths from environment
1185 fullSearchPath.extend(self.defaultSearchPaths())
1187 # There are two places to find defaults for this particular config
1188 # - The "defaultConfigFile" defined in the subclass
1189 # - The class specified in the "cls" element in the config.
1190 # Read cls after merging in case it changes.
1191 if self.defaultConfigFile is not None: 1191 ↛ 1196line 1191 didn't jump to line 1196, because the condition on line 1191 was never false
1192 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1194 # Can have a class specification in the external config (priority)
1195 # or from the defaults.
1196 pytype = None
1197 if "cls" in externalConfig: 1197 ↛ 1198line 1197 didn't jump to line 1198, because the condition on line 1197 was never true
1198 pytype = externalConfig["cls"]
1199 elif "cls" in self: 1199 ↛ 1200line 1199 didn't jump to line 1200, because the condition on line 1199 was never true
1200 pytype = self["cls"]
1202 if pytype is not None: 1202 ↛ 1203line 1202 didn't jump to line 1203, because the condition on line 1202 was never true
1203 try:
1204 cls = doImportType(pytype)
1205 except ImportError as e:
1206 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1207 # The class referenced from the config file is not required
1208 # to specify a default config file.
1209 defaultsFile = getattr(cls, "defaultConfigFile", None)
1210 if defaultsFile is not None:
1211 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1213 # Get the container key in case we need it and it is specified.
1214 containerKey = getattr(cls, "containerKey", None)
1216 # Now update this object with the external values so that the external
1217 # values always override the defaults
1218 self.update(externalConfig)
1219 if not self.configFile: 1219 ↛ 1225line 1219 didn't jump to line 1225, because the condition on line 1219 was never false
1220 self.configFile = externalConfig.configFile
1222 # If this configuration has child configurations of the same
1223 # config class, we need to expand those defaults as well.
1225 if mergeDefaults and containerKey is not None and containerKey in self: 1225 ↛ 1226line 1225 didn't jump to line 1226, because the condition on line 1225 was never true
1226 for idx, subConfig in enumerate(self[containerKey]):
1227 self[containerKey, idx] = type(self)(
1228 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1229 )
1231 if validate:
1232 self.validate()
1234 @classmethod
1235 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1236 """Read environment to determine search paths to use.
1238 Global defaults, at lowest priority, are found in the ``config``
1239 directory of the butler source tree. Additional defaults can be
1240 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1241 which is a PATH-like variable where paths at the front of the list
1242 have priority over those later.
1244 Returns
1245 -------
1246 paths : `list`
1247 Returns a list of paths to search. The returned order is in
1248 priority with the highest priority paths first. The butler config
1249 configuration resources will not be included here but will
1250 always be searched last.
1252 Notes
1253 -----
1254 The environment variable is split on the standard ``:`` path separator.
1255 This currently makes it incompatible with usage of URIs.
1256 """
1257 # We can pick up defaults from multiple search paths
1258 # We fill defaults by using the butler config path and then
1259 # the config path environment variable in reverse order.
1260 defaultsPaths: list[str | ResourcePath] = []
1262 if CONFIG_PATH in os.environ: 1262 ↛ 1263line 1262 didn't jump to line 1263, because the condition on line 1262 was never true
1263 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1264 defaultsPaths.extend(externalPaths)
1266 # Add the package defaults as a resource
1267 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1268 return defaultsPaths
1270 def _updateWithConfigsFromPath(
1271 self, searchPaths: Sequence[str | ResourcePath], configFile: ResourcePath | str
1272 ) -> None:
1273 """Search the supplied paths, merging the configuration values.
1275 The values read will override values currently stored in the object.
1276 Every file found in the path will be read, such that the earlier
1277 path entries have higher priority.
1279 Parameters
1280 ----------
1281 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1282 Paths to search for the supplied configFile. This path
1283 is the priority order, such that files read from the
1284 first path entry will be selected over those read from
1285 a later path. Can contain `str` referring to the local file
1286 system or a URI string.
1287 configFile : `lsst.resources.ResourcePath`
1288 File to locate in path. If absolute path it will be read
1289 directly and the search path will not be used. Can be a URI
1290 to an explicit resource (which will ignore the search path)
1291 which is assumed to exist.
1292 """
1293 uri = ResourcePath(configFile)
1294 if uri.isabs() and uri.exists(): 1294 ↛ 1296line 1294 didn't jump to line 1296, because the condition on line 1294 was never true
1295 # Assume this resource exists
1296 self._updateWithOtherConfigFile(configFile)
1297 self.filesRead.append(configFile)
1298 else:
1299 # Reverse order so that high priority entries
1300 # update the object last.
1301 for pathDir in reversed(searchPaths):
1302 if isinstance(pathDir, str | ResourcePath): 1302 ↛ 1309line 1302 didn't jump to line 1309, because the condition on line 1302 was never false
1303 pathDir = ResourcePath(pathDir, forceDirectory=True)
1304 file = pathDir.join(configFile)
1305 if file.exists(): 1305 ↛ 1301line 1305 didn't jump to line 1301, because the condition on line 1305 was never false
1306 self.filesRead.append(file)
1307 self._updateWithOtherConfigFile(file)
1308 else:
1309 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1311 def _updateWithOtherConfigFile(self, file: Config | str | ResourcePath | Mapping[str, Any]) -> None:
1312 """Read in some defaults and update.
1314 Update the configuration by reading the supplied file as a config
1315 of this class, and merging such that these values override the
1316 current values. Contents of the external config are not validated.
1318 Parameters
1319 ----------
1320 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1321 Entity that can be converted to a `ConfigSubset`.
1322 """
1323 # Use this class to read the defaults so that subsetting can happen
1324 # correctly.
1325 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1326 self.update(externalConfig)
1328 def validate(self) -> None:
1329 """Check that mandatory keys are present in this configuration.
1331 Ignored if ``requiredKeys`` is empty.
1332 """
1333 # Validation
1334 missing = [k for k in self.requiredKeys if k not in self._data]
1335 if missing: 1335 ↛ 1336line 1335 didn't jump to line 1336, because the condition on line 1335 was never true
1336 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")