Coverage for python/lsst/daf/butler/core/config.py: 44%
490 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:11 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:11 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28import copy
29import io
30import json
31import logging
32import os
33import pprint
34import sys
35from collections import defaultdict
36from collections.abc import Iterable, Mapping, MutableMapping, Sequence
37from pathlib import Path
38from typing import IO, TYPE_CHECKING, Any, ClassVar, Iterator, cast
40import yaml
41from lsst.resources import ResourcePath, ResourcePathExpression
42from lsst.utils import doImport
43from yaml.representer import Representer
45yaml.add_representer(defaultdict, Representer.represent_dict)
48# Config module logger
49log = logging.getLogger(__name__)
51# PATH-like environment variable to use for defaults.
52CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
54if TYPE_CHECKING:
55 yamlLoader = yaml.SafeLoader
56else:
57 try:
58 yamlLoader = yaml.CSafeLoader
59 except AttributeError:
60 # Not all installations have the C library
61 # (but assume for mypy's sake that they're the same)
62 yamlLoader = yaml.SafeLoader
65def _doUpdate(d: Mapping[str, Any], u: Mapping[str, Any]) -> Mapping[str, Any]:
66 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
68 for k, v in u.items():
69 if isinstance(v, Mapping):
70 lhs = d.get(k, {})
71 if not isinstance(lhs, Mapping): 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true
72 lhs = {}
73 d[k] = _doUpdate(lhs, v)
74 else:
75 d[k] = v
76 return d
79def _checkNextItem(k: str | int, d: Any, create: bool, must_be_dict: bool) -> tuple[Any, bool]:
80 """See if k is in d and if it is return the new child."""
81 nextVal = None
82 isThere = False
83 if d is None: 83 ↛ 85line 83 didn't jump to line 85, because the condition on line 83 was never true
84 # We have gone past the end of the hierarchy
85 pass
86 elif not must_be_dict and isinstance(d, Sequence): 86 ↛ 91line 86 didn't jump to line 91, because the condition on line 86 was never true
87 # Check for Sequence first because for lists
88 # __contains__ checks whether value is found in list
89 # not whether the index exists in list. When we traverse
90 # the hierarchy we are interested in the index.
91 try:
92 nextVal = d[int(k)]
93 isThere = True
94 except IndexError:
95 pass
96 except ValueError:
97 isThere = k in d
98 elif k in d:
99 nextVal = d[k]
100 isThere = True
101 elif create: 101 ↛ 102line 101 didn't jump to line 102, because the condition on line 101 was never true
102 d[k] = {}
103 nextVal = d[k]
104 isThere = True
106 return nextVal, isThere
109class Loader(yamlLoader):
110 """YAML Loader that supports file include directives.
112 Uses ``!include`` directive in a YAML file to point to another
113 YAML file to be included. The path in the include directive is relative
114 to the file containing that directive.
116 storageClasses: !include storageClasses.yaml
118 Examples
119 --------
120 >>> with open("document.yaml", "r") as f:
121 data = yaml.load(f, Loader=Loader)
123 Notes
124 -----
125 See https://davidchall.github.io/yaml-includes.html
126 """
128 def __init__(self, stream: str | IO): # types-PyYAML annotates 'stream' with a private type
129 super().__init__(stream)
130 # if this is a string and not a stream we may well lack a name
131 if hasattr(stream, "name"): 131 ↛ 135line 131 didn't jump to line 135, because the condition on line 131 was never false
132 self._root = ResourcePath(stream.name)
133 else:
134 # No choice but to assume a local filesystem
135 self._root = ResourcePath("no-file.yaml")
136 self.add_constructor("!include", Loader.include)
138 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
139 result: list[Any] | dict[str, Any]
140 if isinstance(node, yaml.ScalarNode):
141 return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]
143 elif isinstance(node, yaml.SequenceNode):
144 result = []
145 for filename in self.construct_sequence(node):
146 result.append(self.extractFile(filename))
147 return result
149 elif isinstance(node, yaml.MappingNode):
150 result = {}
151 for k, v in self.construct_mapping(node).items():
152 if not isinstance(k, str):
153 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
154 result[k] = self.extractFile(v)
155 return result
157 else:
158 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
159 raise yaml.constructor.ConstructorError
161 def extractFile(self, filename: str) -> Any:
162 # It is possible for the !include to point to an explicit URI
163 # instead of a relative URI, therefore we first see if it is
164 # scheme-less or not. If it has a scheme we use it directly
165 # if it is scheme-less we use it relative to the file root.
166 requesteduri = ResourcePath(filename, forceAbsolute=False)
168 if requesteduri.scheme:
169 fileuri = requesteduri
170 else:
171 fileuri = self._root.updatedFile(filename)
173 log.debug("Opening YAML file via !include: %s", fileuri)
175 # Read all the data from the resource
176 data = fileuri.read()
178 # Store the bytes into a BytesIO so we can attach a .name
179 stream = io.BytesIO(data)
180 stream.name = fileuri.geturl()
181 return yaml.load(stream, Loader)
184# Type of the key used for accessing items in configuration object. It can be
185# a single string as described below or a sequence of srtings and integer
186# indices. Indices are used to access items in sequences stored in config.
187_ConfigKey = str | Sequence[str | int]
190class Config(MutableMapping):
191 r"""Implements a datatype that is used by `Butler` for configuration.
193 It is essentially a `dict` with key/value pairs, including nested dicts
194 (as values). In fact, it can be initialized with a `dict`.
195 This is explained next:
197 Config extends the `dict` api so that hierarchical values may be accessed
198 with delimited notation or as a tuple. If a string is given the delimiter
199 is picked up from the first character in that string. For example,
200 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
201 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
202 If the first character is alphanumeric, no delimiter will be used.
203 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
204 Unicode characters can be used as the delimiter for distinctiveness if
205 required.
207 If a key in the hierarchy starts with a non-alphanumeric character care
208 should be used to ensure that either the tuple interface is used or
209 a distinct delimiter is always given in string form.
211 Finally, the delimiter can be escaped if it is part of a key and also
212 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
213 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
214 always better to use a different delimiter in these cases.
216 Note that adding a multi-level key implicitly creates any nesting levels
217 that do not exist, but removing multi-level keys does not automatically
218 remove empty nesting levels. As a result:
220 >>> c = Config()
221 >>> c[".a.b"] = 1
222 >>> del c[".a.b"]
223 >>> c["a"]
224 Config({'a': {}})
226 Storage formats supported:
228 - yaml: read and write is supported.
229 - json: read and write is supported but no ``!include`` directive.
231 Parameters
232 ----------
233 other : `lsst.resources.ResourcePath` or `Config` or `dict`
234 Other source of configuration, can be:
236 - (`lsst.resources.ResourcePathExpression`)
237 Treated as a URI to a config file. Must end with ".yaml".
238 - (`Config`) Copies the other Config's values into this one.
239 - (`dict`) Copies the values from the dict into this Config.
241 If `None` is provided an empty `Config` will be created.
242 """
244 _D: str = "→"
245 """Default internal delimiter to use for components in the hierarchy when
246 constructing keys for external use (see `Config.names()`)."""
248 includeKey: ClassVar[str] = "includeConfigs"
249 """Key used to indicate that another config should be included at this
250 part of the hierarchy."""
252 resourcesPackage: str = "lsst.daf.butler"
253 """Package to search for default configuration data. The resources
254 themselves will be within a ``configs`` resource hierarchy."""
256 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
257 self._data: dict[str, Any] = {}
258 self.configFile: ResourcePath | None = None
260 if other is None:
261 return
263 if isinstance(other, Config):
264 # Deep copy might be more efficient but if someone has overridden
265 # a config entry to store a complex object then deep copy may
266 # fail. Safer to use update().
267 self.update(other._data)
268 self.configFile = other.configFile
269 elif isinstance(other, (dict, Mapping)):
270 # In most cases we have a dict, and it's more efficient
271 # to check for a dict instance before checking the generic mapping.
272 self.update(other)
273 elif isinstance(other, (str, ResourcePath, Path)): 273 ↛ 280line 273 didn't jump to line 280, because the condition on line 273 was never false
274 # if other is a string, assume it is a file path/URI
275 self.__initFromUri(other)
276 self._processExplicitIncludes()
277 else:
278 # if the config specified by other could not be recognized raise
279 # a runtime error.
280 raise RuntimeError(f"A Config could not be loaded from other: {other}")
282 def ppprint(self) -> str:
283 """Return config as formatted readable string.
285 Examples
286 --------
287 use: ``pdb> print(myConfigObject.ppprint())``
289 Returns
290 -------
291 s : `str`
292 A prettyprint formatted string representing the config
293 """
294 return pprint.pformat(self._data, indent=2, width=1)
296 def __repr__(self) -> str:
297 return f"{type(self).__name__}({self._data!r})"
299 def __str__(self) -> str:
300 return self.ppprint()
302 def __len__(self) -> int:
303 return len(self._data)
305 def __iter__(self) -> Iterator[str]:
306 return iter(self._data)
308 def copy(self) -> Config:
309 return type(self)(self)
311 @classmethod
312 def fromString(cls, string: str, format: str = "yaml") -> Config:
313 """Create a new Config instance from a serialized string.
315 Parameters
316 ----------
317 string : `str`
318 String containing content in specified format
319 format : `str`, optional
320 Format of the supplied string. Can be ``json`` or ``yaml``.
322 Returns
323 -------
324 c : `Config`
325 Newly-constructed Config.
326 """
327 if format == "yaml":
328 new_config = cls().__initFromYaml(string)
329 elif format == "json":
330 new_config = cls().__initFromJson(string)
331 else:
332 raise ValueError(f"Unexpected format of string: {format}")
333 new_config._processExplicitIncludes()
334 return new_config
336 @classmethod
337 def fromYaml(cls, string: str) -> Config:
338 """Create a new Config instance from a YAML string.
340 Parameters
341 ----------
342 string : `str`
343 String containing content in YAML format
345 Returns
346 -------
347 c : `Config`
348 Newly-constructed Config.
349 """
350 return cls.fromString(string, format="yaml")
352 def __initFromUri(self, path: ResourcePathExpression) -> None:
353 """Load a file from a path or an URI.
355 Parameters
356 ----------
357 path : `lsst.resources.ResourcePathExpression`
358 Path or a URI to a persisted config file.
359 """
360 uri = ResourcePath(path)
361 ext = uri.getExtension()
362 if ext == ".yaml": 362 ↛ 369line 362 didn't jump to line 369, because the condition on line 362 was never false
363 log.debug("Opening YAML config file: %s", uri.geturl())
364 content = uri.read()
365 # Use a stream so we can name it
366 stream = io.BytesIO(content)
367 stream.name = uri.geturl()
368 self.__initFromYaml(stream)
369 elif ext == ".json":
370 log.debug("Opening JSON config file: %s", uri.geturl())
371 content = uri.read()
372 self.__initFromJson(content)
373 else:
374 # This URI does not have a valid extension. It might be because
375 # we ended up with a directory and not a file. Before we complain
376 # about an extension, do an existence check. No need to do
377 # the (possibly expensive) existence check in the default code
378 # path above because we will find out soon enough that the file
379 # is not there.
380 if not uri.exists():
381 raise FileNotFoundError(f"Config location {uri} does not exist.")
382 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
383 self.configFile = uri
385 def __initFromYaml(self, stream: IO | str | bytes) -> Config:
386 """Load a YAML config from any readable stream that contains one.
388 Parameters
389 ----------
390 stream: `IO` or `str`
391 Stream to pass to the YAML loader. Accepts anything that
392 `yaml.load` accepts. This can include a string as well as an
393 IO stream.
395 Raises
396 ------
397 yaml.YAMLError
398 If there is an error loading the file.
399 """
400 content = yaml.load(stream, Loader=Loader)
401 if content is None: 401 ↛ 402line 401 didn't jump to line 402, because the condition on line 401 was never true
402 content = {}
403 self._data = content
404 return self
406 def __initFromJson(self, stream: IO | str | bytes) -> Config:
407 """Load a JSON config from any readable stream that contains one.
409 Parameters
410 ----------
411 stream: `IO` or `str`
412 Stream to pass to the JSON loader. This can include a string as
413 well as an IO stream.
415 Raises
416 ------
417 TypeError:
418 Raised if there is an error loading the content.
419 """
420 if isinstance(stream, (bytes, str)):
421 content = json.loads(stream)
422 else:
423 content = json.load(stream)
424 if content is None:
425 content = {}
426 self._data = content
427 return self
429 def _processExplicitIncludes(self) -> None:
430 """Scan through the configuration searching for the special includes.
432 Looks for ``includeConfigs`` directive and processes the includes.
433 """
434 # Search paths for config files
435 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
436 if self.configFile is not None: 436 ↛ 444line 436 didn't jump to line 444, because the condition on line 436 was never false
437 if isinstance(self.configFile, ResourcePath): 437 ↛ 440line 437 didn't jump to line 440, because the condition on line 437 was never false
438 configDir = self.configFile.dirname()
439 else:
440 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
441 searchPaths.append(configDir)
443 # Ensure we know what delimiter to use
444 names = self.nameTuples()
445 for path in names:
446 if path[-1] == self.includeKey: 446 ↛ 447line 446 didn't jump to line 447, because the condition on line 446 was never true
447 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
448 basePath = path[:-1]
450 # Extract the includes and then delete them from the config
451 includes = self[path]
452 del self[path]
454 # Be consistent and convert to a list
455 if not isinstance(includes, list):
456 includes = [includes]
458 # Read each file assuming it is a reference to a file
459 # The file can be relative to config file or cwd
460 # ConfigSubset search paths are not used
461 subConfigs = []
462 for fileName in includes:
463 # Expand any shell variables -- this could be URI
464 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False)
465 found = None
466 if fileName.isabs():
467 found = fileName
468 else:
469 for dir in searchPaths:
470 specific = dir.join(fileName.path)
471 # Remote resource check might be expensive
472 if specific.exists():
473 found = specific
474 break
475 if not found:
476 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
478 # Read the referenced Config as a Config
479 subConfigs.append(type(self)(found))
481 # Now we need to merge these sub configs with the current
482 # information that was present in this node in the config
483 # tree with precedence given to the explicit values
484 newConfig = subConfigs.pop(0)
485 for sc in subConfigs:
486 newConfig.update(sc)
488 # Explicit values take precedence
489 if not basePath:
490 # This is an include at the root config
491 newConfig.update(self)
492 # Replace the current config
493 self._data = newConfig._data
494 else:
495 newConfig.update(self[basePath])
496 # And reattach to the base config
497 self[basePath] = newConfig
499 @staticmethod
500 def _splitIntoKeys(key: _ConfigKey) -> list[str | int]:
501 r"""Split the argument for get/set/in into a hierarchical list.
503 Parameters
504 ----------
505 key : `str` or iterable
506 Argument given to get/set/in. If an iterable is provided it will
507 be converted to a list. If the first character of the string
508 is not an alphanumeric character then it will be used as the
509 delimiter for the purposes of splitting the remainder of the
510 string. If the delimiter is also in one of the keys then it
511 can be escaped using ``\``. There is no default delimiter.
513 Returns
514 -------
515 keys : `list`
516 Hierarchical keys as a `list`.
517 """
518 if isinstance(key, str):
519 if not key[0].isalnum(): 519 ↛ 520line 519 didn't jump to line 520, because the condition on line 519 was never true
520 d = key[0]
521 key = key[1:]
522 else:
523 return [
524 key,
525 ]
526 escaped = f"\\{d}"
527 temp = None
528 if escaped in key:
529 # Complain at the attempt to escape the escape
530 doubled = rf"\{escaped}"
531 if doubled in key:
532 raise ValueError(
533 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
534 )
535 # Replace with a character that won't be in the string
536 temp = "\r"
537 if temp in key or d == temp:
538 raise ValueError(
539 f"Can not use character {temp!r} in hierarchical key or as"
540 " delimiter if escaping the delimiter"
541 )
542 key = key.replace(escaped, temp)
543 hierarchy = key.split(d)
544 if temp:
545 hierarchy = [h.replace(temp, d) for h in hierarchy]
546 # Copy the list to keep mypy quiet.
547 return list(hierarchy)
548 elif isinstance(key, Iterable): 548 ↛ 552line 548 didn't jump to line 552, because the condition on line 548 was never false
549 return list(key)
550 else:
551 # Do not try to guess.
552 raise TypeError(f"Provided key [{key}] neither str nor iterable.")
554 def _getKeyHierarchy(self, name: _ConfigKey) -> list[str | int]:
555 """Retrieve the key hierarchy for accessing the Config.
557 Parameters
558 ----------
559 name : `str` or `tuple`
560 Delimited string or `tuple` of hierarchical keys.
562 Returns
563 -------
564 hierarchy : `list` of `str`
565 Hierarchy to use as a `list`. If the name is available directly
566 as a key in the Config it will be used regardless of the presence
567 of any nominal delimiter.
568 """
569 keys: list[str | int]
570 if name in self._data:
571 keys = [cast(str, name)]
572 else:
573 keys = self._splitIntoKeys(name)
574 return keys
576 def _findInHierarchy(self, keys: Sequence[str | int], create: bool = False) -> tuple[list[Any], bool]:
577 """Look for hierarchy of keys in Config.
579 Parameters
580 ----------
581 keys : `list` or `tuple`
582 Keys to search in hierarchy.
583 create : `bool`, optional
584 If `True`, if a part of the hierarchy does not exist, insert an
585 empty `dict` into the hierarchy.
587 Returns
588 -------
589 hierarchy : `list`
590 List of the value corresponding to each key in the supplied
591 hierarchy. Only keys that exist in the hierarchy will have
592 a value.
593 complete : `bool`
594 `True` if the full hierarchy exists and the final element
595 in ``hierarchy`` is the value of relevant value.
596 """
597 d: Any = self._data
599 # For the first key, d must be a dict so it is a waste
600 # of time to check for a sequence.
601 must_be_dict = True
603 hierarchy = []
604 complete = True
605 for k in keys:
606 d, isThere = _checkNextItem(k, d, create, must_be_dict)
607 if isThere:
608 hierarchy.append(d)
609 else:
610 complete = False
611 break
612 # Second time round it might be a sequence.
613 must_be_dict = False
615 return hierarchy, complete
617 def __getitem__(self, name: _ConfigKey) -> Any:
618 # Override the split for the simple case where there is an exact
619 # match. This allows `Config.items()` to work via a simple
620 # __iter__ implementation that returns top level keys of
621 # self._data.
623 # If the name matches a key in the top-level hierarchy, bypass
624 # all further cleverness.
625 found_directly = False
626 try:
627 if isinstance(name, str): 627 ↛ 633line 627 didn't jump to line 633, because the condition on line 627 was never false
628 data = self._data[name]
629 found_directly = True
630 except KeyError:
631 pass
633 if not found_directly: 633 ↛ 634line 633 didn't jump to line 634, because the condition on line 633 was never true
634 keys = self._getKeyHierarchy(name)
636 hierarchy, complete = self._findInHierarchy(keys)
637 if not complete:
638 raise KeyError(f"{name} not found")
639 data = hierarchy[-1]
641 # In most cases we have a dict, and it's more efficient
642 # to check for a dict instance before checking the generic mapping.
643 if isinstance(data, (dict, Mapping)):
644 data = Config(data)
645 # Ensure that child configs inherit the parent internal delimiter
646 if self._D != Config._D: 646 ↛ 647line 646 didn't jump to line 647, because the condition on line 646 was never true
647 data._D = self._D
648 return data
650 def __setitem__(self, name: _ConfigKey, value: Any) -> None:
651 keys = self._getKeyHierarchy(name)
652 last = keys.pop()
653 if isinstance(value, Config):
654 value = copy.deepcopy(value._data)
656 hierarchy, complete = self._findInHierarchy(keys, create=True)
657 if hierarchy:
658 data = hierarchy[-1]
659 else:
660 data = self._data
662 try:
663 data[last] = value
664 except TypeError:
665 data[int(last)] = value
667 def __contains__(self, key: Any) -> bool:
668 if not isinstance(key, str | Sequence): 668 ↛ 669line 668 didn't jump to line 669, because the condition on line 668 was never true
669 return False
670 keys = self._getKeyHierarchy(key)
671 hierarchy, complete = self._findInHierarchy(keys)
672 return complete
674 def __delitem__(self, key: str | Sequence[str]) -> None:
675 keys = self._getKeyHierarchy(key)
676 last = keys.pop()
677 hierarchy, complete = self._findInHierarchy(keys)
678 if complete: 678 ↛ 685line 678 didn't jump to line 685, because the condition on line 678 was never false
679 if hierarchy: 679 ↛ 680line 679 didn't jump to line 680, because the condition on line 679 was never true
680 data = hierarchy[-1]
681 else:
682 data = self._data
683 del data[last]
684 else:
685 raise KeyError(f"{key} not found in Config")
687 def update(self, other: Mapping[str, Any]) -> None: # type: ignore[override]
688 """Update config from other `Config` or `dict`.
690 Like `dict.update()`, but will add or modify keys in nested dicts,
691 instead of overwriting the nested dict entirely.
693 Parameters
694 ----------
695 other : `dict` or `Config`
696 Source of configuration:
698 Examples
699 --------
700 >>> c = Config({"a": {"b": 1}})
701 >>> c.update({"a": {"c": 2}})
702 >>> print(c)
703 {'a': {'b': 1, 'c': 2}}
705 >>> foo = {"a": {"b": 1}}
706 >>> foo.update({"a": {"c": 2}})
707 >>> print(foo)
708 {'a': {'c': 2}}
709 """
710 _doUpdate(self._data, other)
712 def merge(self, other: Mapping) -> None:
713 """Merge another Config into this one.
715 Like `Config.update()`, but will add keys & values from other that
716 DO NOT EXIST in self.
718 Keys and values that already exist in self will NOT be overwritten.
720 Parameters
721 ----------
722 other : `dict` or `Config`
723 Source of configuration:
724 """
725 if not isinstance(other, Mapping):
726 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
728 # Convert the supplied mapping to a Config for consistency
729 # This will do a deepcopy if it is already a Config
730 otherCopy = Config(other)
731 otherCopy.update(self)
732 self._data = otherCopy._data
734 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
735 """Get tuples representing the name hierarchies of all keys.
737 The tuples returned from this method are guaranteed to be usable
738 to access items in the configuration object.
740 Parameters
741 ----------
742 topLevelOnly : `bool`, optional
743 If False, the default, a full hierarchy of names is returned.
744 If True, only the top level are returned.
746 Returns
747 -------
748 names : `list` of `tuple` of `str`
749 List of all names present in the `Config` where each element
750 in the list is a `tuple` of strings representing the hierarchy.
751 """
752 if topLevelOnly: 752 ↛ 753line 752 didn't jump to line 753, because the condition on line 752 was never true
753 return list((k,) for k in self)
755 def getKeysAsTuples(
756 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
757 ) -> None:
758 if isinstance(d, Sequence):
759 theseKeys: Iterable[Any] = range(len(d))
760 else:
761 theseKeys = d.keys()
762 for key in theseKeys:
763 val = d[key]
764 levelKey = base + (key,) if base is not None else (key,)
765 keys.append(levelKey)
766 if isinstance(val, (Mapping, Sequence)) and not isinstance(val, str):
767 getKeysAsTuples(val, keys, levelKey)
769 keys: list[tuple[str, ...]] = []
770 getKeysAsTuples(self._data, keys, None)
771 return keys
773 def names(self, topLevelOnly: bool = False, delimiter: str | None = None) -> list[str]:
774 """Get a delimited name of all the keys in the hierarchy.
776 The values returned from this method are guaranteed to be usable
777 to access items in the configuration object.
779 Parameters
780 ----------
781 topLevelOnly : `bool`, optional
782 If False, the default, a full hierarchy of names is returned.
783 If True, only the top level are returned.
784 delimiter : `str`, optional
785 Delimiter to use when forming the keys. If the delimiter is
786 present in any of the keys, it will be escaped in the returned
787 names. If `None` given a delimiter will be automatically provided.
788 The delimiter can not be alphanumeric.
790 Returns
791 -------
792 names : `list` of `str`
793 List of all names present in the `Config`.
795 Notes
796 -----
797 This is different than the built-in method `dict.keys`, which will
798 return only the first level keys.
800 Raises
801 ------
802 ValueError:
803 The supplied delimiter is alphanumeric.
804 """
805 if topLevelOnly:
806 return list(self.keys())
808 # Get all the tuples of hierarchical keys
809 nameTuples = self.nameTuples()
811 if delimiter is not None and delimiter.isalnum():
812 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
814 if delimiter is None:
815 # Start with something, and ensure it does not need to be
816 # escaped (it is much easier to understand if not escaped)
817 delimiter = self._D
819 # Form big string for easy check of delimiter clash
820 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
822 # Try a delimiter and keep trying until we get something that
823 # works.
824 ntries = 0
825 while delimiter in combined:
826 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
827 ntries += 1
829 if ntries > 100:
830 raise ValueError(f"Unable to determine a delimiter for Config {self}")
832 # try another one
833 while True:
834 delimiter = chr(ord(delimiter) + 1)
835 if not delimiter.isalnum():
836 break
838 log.debug("Using delimiter %r", delimiter)
840 # Form the keys, escaping the delimiter if necessary
841 strings = [
842 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
843 for k in nameTuples
844 ]
845 return strings
847 def asArray(self, name: str | Sequence[str]) -> Sequence[Any]:
848 """Get a value as an array.
850 May contain one or more elements.
852 Parameters
853 ----------
854 name : `str`
855 Key to use to retrieve value.
857 Returns
858 -------
859 array : `collections.abc.Sequence`
860 The value corresponding to name, but guaranteed to be returned
861 as a list with at least one element. If the value is a
862 `~collections.abc.Sequence` (and not a `str`) the value itself
863 will be returned, else the value will be the first element.
864 """
865 val = self.get(name)
866 if isinstance(val, str):
867 val = [val]
868 elif not isinstance(val, Sequence):
869 val = [val]
870 return val
872 def __eq__(self, other: Any) -> bool:
873 if isinstance(other, Config):
874 other = other._data
875 return self._data == other
877 def __ne__(self, other: Any) -> bool:
878 if isinstance(other, Config):
879 other = other._data
880 return self._data != other
882 #######
883 # i/o #
885 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
886 """Write the config to an output stream.
888 Parameters
889 ----------
890 output : `IO`, optional
891 The stream to use for output. If `None` the serialized content
892 will be returned.
893 format : `str`, optional
894 The format to use for the output. Can be "yaml" or "json".
896 Returns
897 -------
898 serialized : `str` or `None`
899 If a stream was given the stream will be used and the return
900 value will be `None`. If the stream was `None` the
901 serialization will be returned as a string.
902 """
903 if format == "yaml":
904 return yaml.safe_dump(self._data, output, default_flow_style=False)
905 elif format == "json":
906 if output is not None:
907 json.dump(self._data, output, ensure_ascii=False)
908 return None
909 else:
910 return json.dumps(self._data, ensure_ascii=False)
911 raise ValueError(f"Unsupported format for Config serialization: {format}")
913 def dumpToUri(
914 self,
915 uri: ResourcePathExpression,
916 updateFile: bool = True,
917 defaultFileName: str = "butler.yaml",
918 overwrite: bool = True,
919 ) -> None:
920 """Write the config to location pointed to by given URI.
922 Currently supports 's3' and 'file' URI schemes.
924 Parameters
925 ----------
926 uri: `lsst.resources.ResourcePathExpression`
927 URI of location where the Config will be written.
928 updateFile : bool, optional
929 If True and uri does not end on a filename with extension, will
930 append `defaultFileName` to the target uri. True by default.
931 defaultFileName : bool, optional
932 The file name that will be appended to target uri if updateFile is
933 True and uri does not end on a file with an extension.
934 overwrite : bool, optional
935 If True the configuration will be written even if it already
936 exists at that location.
937 """
938 # Make local copy of URI or create new one
939 uri = ResourcePath(uri)
941 if updateFile and not uri.getExtension():
942 uri = uri.updatedFile(defaultFileName)
944 # Try to work out the format from the extension
945 ext = uri.getExtension()
946 format = ext[1:].lower()
948 output = self.dump(format=format)
949 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
950 uri.write(output.encode(), overwrite=overwrite)
951 self.configFile = uri
953 @staticmethod
954 def updateParameters(
955 configType: type[ConfigSubset],
956 config: Config,
957 full: Config,
958 toUpdate: dict[str, Any] | None = None,
959 toCopy: Sequence[str | Sequence[str]] | None = None,
960 overwrite: bool = True,
961 toMerge: Sequence[str | Sequence[str]] | None = None,
962 ) -> None:
963 """Update specific config parameters.
965 Allows for named parameters to be set to new values in bulk, and
966 for other values to be set by copying from a reference config.
968 Assumes that the supplied config is compatible with ``configType``
969 and will attach the updated values to the supplied config by
970 looking for the related component key. It is assumed that
971 ``config`` and ``full`` are from the same part of the
972 configuration hierarchy.
974 Parameters
975 ----------
976 configType : `ConfigSubset`
977 Config type to use to extract relevant items from ``config``.
978 config : `Config`
979 A `Config` to update. Only the subset understood by
980 the supplied `ConfigSubset` will be modified. Default values
981 will not be inserted and the content will not be validated
982 since mandatory keys are allowed to be missing until
983 populated later by merging.
984 full : `Config`
985 A complete config with all defaults expanded that can be
986 converted to a ``configType``. Read-only and will not be
987 modified by this method. Values are read from here if
988 ``toCopy`` is defined.
990 Repository-specific options that should not be obtained
991 from defaults when Butler instances are constructed
992 should be copied from ``full`` to ``config``.
993 toUpdate : `dict`, optional
994 A `dict` defining the keys to update and the new value to use.
995 The keys and values can be any supported by `Config`
996 assignment.
997 toCopy : `tuple`, optional
998 `tuple` of keys whose values should be copied from ``full``
999 into ``config``.
1000 overwrite : `bool`, optional
1001 If `False`, do not modify a value in ``config`` if the key
1002 already exists. Default is always to overwrite.
1003 toMerge : `tuple`, optional
1004 Keys to merge content from full to config without overwriting
1005 pre-existing values. Only works if the key refers to a hierarchy.
1006 The ``overwrite`` flag is ignored.
1008 Raises
1009 ------
1010 ValueError
1011 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1012 """
1013 if toUpdate is None and toCopy is None and toMerge is None:
1014 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1016 # If this is a parent configuration then we need to ensure that
1017 # the supplied config has the relevant component key in it.
1018 # If this is a parent configuration we add in the stub entry
1019 # so that the ConfigSubset constructor will do the right thing.
1020 # We check full for this since that is guaranteed to be complete.
1021 if (
1022 configType.component is not None
1023 and configType.component in full
1024 and configType.component not in config
1025 ):
1026 config[configType.component] = {}
1028 # Extract the part of the config we wish to update
1029 localConfig = configType(config, mergeDefaults=False, validate=False)
1031 key: str | Sequence[str]
1032 if toUpdate:
1033 for key, value in toUpdate.items():
1034 if key in localConfig and not overwrite:
1035 log.debug(
1036 "Not overriding key '%s' with value '%s' in config %s",
1037 key,
1038 value,
1039 localConfig.__class__.__name__,
1040 )
1041 else:
1042 localConfig[key] = value
1044 if toCopy or toMerge:
1045 localFullConfig = configType(full, mergeDefaults=False)
1047 if toCopy:
1048 for key in toCopy:
1049 if key in localConfig and not overwrite:
1050 log.debug(
1051 "Not overriding key '%s' from defaults in config %s",
1052 key,
1053 localConfig.__class__.__name__,
1054 )
1055 else:
1056 localConfig[key] = localFullConfig[key]
1057 if toMerge:
1058 for key in toMerge:
1059 if key in localConfig:
1060 # Get the node from the config to do the merge
1061 # but then have to reattach to the config.
1062 subset = localConfig[key]
1063 subset.merge(localFullConfig[key])
1064 localConfig[key] = subset
1065 else:
1066 localConfig[key] = localFullConfig[key]
1068 # Reattach to parent if this is a child config
1069 if configType.component is not None and configType.component in config:
1070 config[configType.component] = localConfig
1071 else:
1072 config.update(localConfig)
1074 def toDict(self) -> dict[str, Any]:
1075 """Convert a `Config` to a standalone hierarchical `dict`.
1077 Returns
1078 -------
1079 d : `dict`
1080 The standalone hierarchical `dict` with any `Config` classes
1081 in the hierarchy converted to `dict`.
1083 Notes
1084 -----
1085 This can be useful when passing a Config to some code that
1086 expects native Python types.
1087 """
1088 output = copy.deepcopy(self._data)
1089 for k, v in output.items():
1090 if isinstance(v, Config): 1090 ↛ 1091line 1090 didn't jump to line 1091, because the condition on line 1090 was never true
1091 v = v.toDict()
1092 output[k] = v
1093 return output
1096class ConfigSubset(Config):
1097 """Config representing a subset of a more general configuration.
1099 Subclasses define their own component and when given a configuration
1100 that includes that component, the resulting configuration only includes
1101 the subset. For example, your config might contain ``dimensions`` if it's
1102 part of a global config and that subset will be stored. If ``dimensions``
1103 can not be found it is assumed that the entire contents of the
1104 configuration should be used.
1106 Default values are read from the environment or supplied search paths
1107 using the default configuration file name specified in the subclass.
1108 This allows a configuration class to be instantiated without any
1109 additional arguments.
1111 Additional validation can be specified to check for keys that are mandatory
1112 in the configuration.
1114 Parameters
1115 ----------
1116 other : `Config` or `str` or `dict`
1117 Argument specifying the configuration information as understood
1118 by `Config`
1119 validate : `bool`, optional
1120 If `True` required keys will be checked to ensure configuration
1121 consistency.
1122 mergeDefaults : `bool`, optional
1123 If `True` defaults will be read and the supplied config will
1124 be combined with the defaults, with the supplied values taking
1125 precedence.
1126 searchPaths : `list` or `tuple`, optional
1127 Explicit additional paths to search for defaults. They should
1128 be supplied in priority order. These paths have higher priority
1129 than those read from the environment in
1130 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1131 the local file system or URIs, `lsst.resources.ResourcePath`.
1132 """
1134 component: ClassVar[str | None] = None
1135 """Component to use from supplied config. Can be None. If specified the
1136 key is not required. Can be a full dot-separated path to a component.
1137 """
1139 requiredKeys: ClassVar[Sequence[str]] = ()
1140 """Keys that are required to be specified in the configuration.
1141 """
1143 defaultConfigFile: ClassVar[str | None] = None
1144 """Name of the file containing defaults for this config class.
1145 """
1147 def __init__(
1148 self,
1149 other: Config | ResourcePathExpression | Mapping[str, Any] | None = None,
1150 validate: bool = True,
1151 mergeDefaults: bool = True,
1152 searchPaths: Sequence[ResourcePathExpression] | None = None,
1153 ):
1154 # Create a blank object to receive the defaults
1155 # Once we have the defaults we then update with the external values
1156 super().__init__()
1158 # Create a standard Config rather than subset
1159 externalConfig = Config(other)
1161 # Select the part we need from it
1162 # To simplify the use of !include we also check for the existence of
1163 # component.component (since the included files can themselves
1164 # include the component name)
1165 if self.component is not None: 1165 ↛ 1174line 1165 didn't jump to line 1174, because the condition on line 1165 was never false
1166 doubled = (self.component, self.component)
1167 # Must check for double depth first
1168 if doubled in externalConfig: 1168 ↛ 1169line 1168 didn't jump to line 1169, because the condition on line 1168 was never true
1169 externalConfig = externalConfig[doubled]
1170 elif self.component in externalConfig:
1171 externalConfig._data = externalConfig._data[self.component]
1173 # Default files read to create this configuration
1174 self.filesRead: list[ResourcePath | str] = []
1176 # Assume we are not looking up child configurations
1177 containerKey = None
1179 # Sometimes we do not want to merge with defaults.
1180 if mergeDefaults:
1181 # Supplied search paths have highest priority
1182 fullSearchPath: list[ResourcePath | str] = []
1183 if searchPaths: 1183 ↛ 1184line 1183 didn't jump to line 1184, because the condition on line 1183 was never true
1184 fullSearchPath = [ResourcePath(path) for path in searchPaths]
1186 # Read default paths from environment
1187 fullSearchPath.extend(self.defaultSearchPaths())
1189 # There are two places to find defaults for this particular config
1190 # - The "defaultConfigFile" defined in the subclass
1191 # - The class specified in the "cls" element in the config.
1192 # Read cls after merging in case it changes.
1193 if self.defaultConfigFile is not None: 1193 ↛ 1198line 1193 didn't jump to line 1198, because the condition on line 1193 was never false
1194 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1196 # Can have a class specification in the external config (priority)
1197 # or from the defaults.
1198 pytype = None
1199 if "cls" in externalConfig: 1199 ↛ 1200line 1199 didn't jump to line 1200, because the condition on line 1199 was never true
1200 pytype = externalConfig["cls"]
1201 elif "cls" in self: 1201 ↛ 1202line 1201 didn't jump to line 1202, because the condition on line 1201 was never true
1202 pytype = self["cls"]
1204 if pytype is not None: 1204 ↛ 1205line 1204 didn't jump to line 1205, because the condition on line 1204 was never true
1205 try:
1206 cls = doImport(pytype)
1207 except ImportError as e:
1208 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1209 defaultsFile = cls.defaultConfigFile
1210 if defaultsFile is not None:
1211 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1213 # Get the container key in case we need it
1214 try:
1215 containerKey = cls.containerKey
1216 except AttributeError:
1217 pass
1219 # Now update this object with the external values so that the external
1220 # values always override the defaults
1221 self.update(externalConfig)
1222 if not self.configFile: 1222 ↛ 1228line 1222 didn't jump to line 1228, because the condition on line 1222 was never false
1223 self.configFile = externalConfig.configFile
1225 # If this configuration has child configurations of the same
1226 # config class, we need to expand those defaults as well.
1228 if mergeDefaults and containerKey is not None and containerKey in self: 1228 ↛ 1229line 1228 didn't jump to line 1229, because the condition on line 1228 was never true
1229 for idx, subConfig in enumerate(self[containerKey]):
1230 self[containerKey, idx] = type(self)(
1231 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1232 )
1234 if validate:
1235 self.validate()
1237 @classmethod
1238 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1239 """Read environment to determine search paths to use.
1241 Global defaults, at lowest priority, are found in the ``config``
1242 directory of the butler source tree. Additional defaults can be
1243 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1244 which is a PATH-like variable where paths at the front of the list
1245 have priority over those later.
1247 Returns
1248 -------
1249 paths : `list`
1250 Returns a list of paths to search. The returned order is in
1251 priority with the highest priority paths first. The butler config
1252 configuration resources will not be included here but will
1253 always be searched last.
1255 Notes
1256 -----
1257 The environment variable is split on the standard ``:`` path separator.
1258 This currently makes it incompatible with usage of URIs.
1259 """
1260 # We can pick up defaults from multiple search paths
1261 # We fill defaults by using the butler config path and then
1262 # the config path environment variable in reverse order.
1263 defaultsPaths: list[str | ResourcePath] = []
1265 if CONFIG_PATH in os.environ: 1265 ↛ 1266line 1265 didn't jump to line 1266, because the condition on line 1265 was never true
1266 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1267 defaultsPaths.extend(externalPaths)
1269 # Add the package defaults as a resource
1270 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1271 return defaultsPaths
1273 def _updateWithConfigsFromPath(
1274 self, searchPaths: Sequence[str | ResourcePath], configFile: ResourcePath | str
1275 ) -> None:
1276 """Search the supplied paths, merging the configuration values.
1278 The values read will override values currently stored in the object.
1279 Every file found in the path will be read, such that the earlier
1280 path entries have higher priority.
1282 Parameters
1283 ----------
1284 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1285 Paths to search for the supplied configFile. This path
1286 is the priority order, such that files read from the
1287 first path entry will be selected over those read from
1288 a later path. Can contain `str` referring to the local file
1289 system or a URI string.
1290 configFile : `lsst.resources.ResourcePath`
1291 File to locate in path. If absolute path it will be read
1292 directly and the search path will not be used. Can be a URI
1293 to an explicit resource (which will ignore the search path)
1294 which is assumed to exist.
1295 """
1296 uri = ResourcePath(configFile)
1297 if uri.isabs() and uri.exists(): 1297 ↛ 1299line 1297 didn't jump to line 1299, because the condition on line 1297 was never true
1298 # Assume this resource exists
1299 self._updateWithOtherConfigFile(configFile)
1300 self.filesRead.append(configFile)
1301 else:
1302 # Reverse order so that high priority entries
1303 # update the object last.
1304 for pathDir in reversed(searchPaths):
1305 if isinstance(pathDir, (str, ResourcePath)): 1305 ↛ 1312line 1305 didn't jump to line 1312, because the condition on line 1305 was never false
1306 pathDir = ResourcePath(pathDir, forceDirectory=True)
1307 file = pathDir.join(configFile)
1308 if file.exists(): 1308 ↛ 1304line 1308 didn't jump to line 1304, because the condition on line 1308 was never false
1309 self.filesRead.append(file)
1310 self._updateWithOtherConfigFile(file)
1311 else:
1312 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1314 def _updateWithOtherConfigFile(self, file: Config | str | ResourcePath | Mapping[str, Any]) -> None:
1315 """Read in some defaults and update.
1317 Update the configuration by reading the supplied file as a config
1318 of this class, and merging such that these values override the
1319 current values. Contents of the external config are not validated.
1321 Parameters
1322 ----------
1323 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1324 Entity that can be converted to a `ConfigSubset`.
1325 """
1326 # Use this class to read the defaults so that subsetting can happen
1327 # correctly.
1328 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1329 self.update(externalConfig)
1331 def validate(self) -> None:
1332 """Check that mandatory keys are present in this configuration.
1334 Ignored if ``requiredKeys`` is empty.
1335 """
1336 # Validation
1337 missing = [k for k in self.requiredKeys if k not in self._data]
1338 if missing: 1338 ↛ 1339line 1338 didn't jump to line 1339, because the condition on line 1338 was never true
1339 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")