Coverage for python/lsst/daf/butler/core/config.py: 45%
485 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Configuration control."""
30from __future__ import annotations
32__all__ = ("Config", "ConfigSubset")
34import copy
35import io
36import json
37import logging
38import os
39import pprint
40import sys
41from collections import defaultdict
42from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence
43from pathlib import Path
44from typing import IO, TYPE_CHECKING, Any, ClassVar, cast
46import yaml
47from lsst.resources import ResourcePath, ResourcePathExpression
48from lsst.utils import doImportType
49from yaml.representer import Representer
51yaml.add_representer(defaultdict, Representer.represent_dict)
54# Config module logger
55log = logging.getLogger(__name__)
57# PATH-like environment variable to use for defaults.
58CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
60if TYPE_CHECKING:
61 yamlLoader = yaml.SafeLoader
62else:
63 try:
64 yamlLoader = yaml.CSafeLoader
65 except AttributeError:
66 # Not all installations have the C library
67 # (but assume for mypy's sake that they're the same)
68 yamlLoader = yaml.SafeLoader
71def _doUpdate(d: Mapping[str, Any], u: Mapping[str, Any]) -> Mapping[str, Any]:
72 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true
73 raise RuntimeError(f"Only call update with Mapping, not {type(d)}")
74 for k, v in u.items():
75 if isinstance(v, Mapping):
76 lhs = d.get(k, {})
77 if not isinstance(lhs, Mapping): 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true
78 lhs = {}
79 d[k] = _doUpdate(lhs, v)
80 else:
81 d[k] = v
82 return d
85def _checkNextItem(k: str | int, d: Any, create: bool, must_be_dict: bool) -> tuple[Any, bool]:
86 """See if k is in d and if it is return the new child."""
87 nextVal = None
88 isThere = False
89 if d is None: 89 ↛ 91line 89 didn't jump to line 91, because the condition on line 89 was never true
90 # We have gone past the end of the hierarchy
91 pass
92 elif not must_be_dict and isinstance(d, Sequence): 92 ↛ 97line 92 didn't jump to line 97, because the condition on line 92 was never true
93 # Check for Sequence first because for lists
94 # __contains__ checks whether value is found in list
95 # not whether the index exists in list. When we traverse
96 # the hierarchy we are interested in the index.
97 try:
98 nextVal = d[int(k)]
99 isThere = True
100 except IndexError:
101 pass
102 except ValueError:
103 isThere = k in d
104 elif k in d:
105 nextVal = d[k]
106 isThere = True
107 elif create: 107 ↛ 108line 107 didn't jump to line 108, because the condition on line 107 was never true
108 d[k] = {}
109 nextVal = d[k]
110 isThere = True
112 return nextVal, isThere
115class Loader(yamlLoader):
116 """YAML Loader that supports file include directives.
118 Uses ``!include`` directive in a YAML file to point to another
119 YAML file to be included. The path in the include directive is relative
120 to the file containing that directive.
122 storageClasses: !include storageClasses.yaml
124 Examples
125 --------
126 >>> with open("document.yaml", "r") as f:
127 data = yaml.load(f, Loader=Loader)
129 Notes
130 -----
131 See https://davidchall.github.io/yaml-includes.html
132 """
134 def __init__(self, stream: str | IO): # types-PyYAML annotates 'stream' with a private type
135 super().__init__(stream)
136 # if this is a string and not a stream we may well lack a name
137 if hasattr(stream, "name"): 137 ↛ 141line 137 didn't jump to line 141, because the condition on line 137 was never false
138 self._root = ResourcePath(stream.name)
139 else:
140 # No choice but to assume a local filesystem
141 self._root = ResourcePath("no-file.yaml")
142 self.add_constructor("!include", Loader.include)
144 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
145 result: list[Any] | dict[str, Any]
146 if isinstance(node, yaml.ScalarNode):
147 return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]
149 elif isinstance(node, yaml.SequenceNode):
150 result = []
151 for filename in self.construct_sequence(node):
152 result.append(self.extractFile(filename))
153 return result
155 elif isinstance(node, yaml.MappingNode):
156 result = {}
157 for k, v in self.construct_mapping(node).items():
158 if not isinstance(k, str):
159 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
160 result[k] = self.extractFile(v)
161 return result
163 else:
164 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
165 raise yaml.constructor.ConstructorError
167 def extractFile(self, filename: str) -> Any:
168 # It is possible for the !include to point to an explicit URI
169 # instead of a relative URI, therefore we first see if it is
170 # scheme-less or not. If it has a scheme we use it directly
171 # if it is scheme-less we use it relative to the file root.
172 requesteduri = ResourcePath(filename, forceAbsolute=False)
174 if requesteduri.scheme:
175 fileuri = requesteduri
176 else:
177 fileuri = self._root.updatedFile(filename)
179 log.debug("Opening YAML file via !include: %s", fileuri)
181 # Read all the data from the resource
182 data = fileuri.read()
184 # Store the bytes into a BytesIO so we can attach a .name
185 stream = io.BytesIO(data)
186 stream.name = fileuri.geturl()
187 return yaml.load(stream, Loader)
190# Type of the key used for accessing items in configuration object. It can be
191# a single string as described below or a sequence of srtings and integer
192# indices. Indices are used to access items in sequences stored in config.
193_ConfigKey = str | Sequence[str | int]
196class Config(MutableMapping):
197 r"""Implements a datatype that is used by `Butler` for configuration.
199 It is essentially a `dict` with key/value pairs, including nested dicts
200 (as values). In fact, it can be initialized with a `dict`.
201 This is explained next:
203 Config extends the `dict` api so that hierarchical values may be accessed
204 with delimited notation or as a tuple. If a string is given the delimiter
205 is picked up from the first character in that string. For example,
206 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
207 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
208 If the first character is alphanumeric, no delimiter will be used.
209 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
210 Unicode characters can be used as the delimiter for distinctiveness if
211 required.
213 If a key in the hierarchy starts with a non-alphanumeric character care
214 should be used to ensure that either the tuple interface is used or
215 a distinct delimiter is always given in string form.
217 Finally, the delimiter can be escaped if it is part of a key and also
218 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
219 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
220 always better to use a different delimiter in these cases.
222 Note that adding a multi-level key implicitly creates any nesting levels
223 that do not exist, but removing multi-level keys does not automatically
224 remove empty nesting levels. As a result:
226 >>> c = Config()
227 >>> c[".a.b"] = 1
228 >>> del c[".a.b"]
229 >>> c["a"]
230 Config({'a': {}})
232 Storage formats supported:
234 - yaml: read and write is supported.
235 - json: read and write is supported but no ``!include`` directive.
237 Parameters
238 ----------
239 other : `lsst.resources.ResourcePath` or `Config` or `dict`
240 Other source of configuration, can be:
242 - (`lsst.resources.ResourcePathExpression`)
243 Treated as a URI to a config file. Must end with ".yaml".
244 - (`Config`) Copies the other Config's values into this one.
245 - (`dict`) Copies the values from the dict into this Config.
247 If `None` is provided an empty `Config` will be created.
248 """
250 _D: str = "→"
251 """Default internal delimiter to use for components in the hierarchy when
252 constructing keys for external use (see `Config.names()`)."""
254 includeKey: ClassVar[str] = "includeConfigs"
255 """Key used to indicate that another config should be included at this
256 part of the hierarchy."""
258 resourcesPackage: str = "lsst.daf.butler"
259 """Package to search for default configuration data. The resources
260 themselves will be within a ``configs`` resource hierarchy."""
262 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
263 self._data: dict[str, Any] = {}
264 self.configFile: ResourcePath | None = None
266 if other is None:
267 return
269 if isinstance(other, Config):
270 # Deep copy might be more efficient but if someone has overridden
271 # a config entry to store a complex object then deep copy may
272 # fail. Safer to use update().
273 self.update(other._data)
274 self.configFile = other.configFile
275 elif isinstance(other, dict | Mapping):
276 # In most cases we have a dict, and it's more efficient
277 # to check for a dict instance before checking the generic mapping.
278 self.update(other)
279 elif isinstance(other, str | ResourcePath | Path): 279 ↛ 286line 279 didn't jump to line 286, because the condition on line 279 was never false
280 # if other is a string, assume it is a file path/URI
281 self.__initFromUri(other)
282 self._processExplicitIncludes()
283 else:
284 # if the config specified by other could not be recognized raise
285 # a runtime error.
286 raise RuntimeError(f"A Config could not be loaded from other: {other}")
288 def ppprint(self) -> str:
289 """Return config as formatted readable string.
291 Examples
292 --------
293 use: ``pdb> print(myConfigObject.ppprint())``
295 Returns
296 -------
297 s : `str`
298 A prettyprint formatted string representing the config
299 """
300 return pprint.pformat(self._data, indent=2, width=1)
302 def __repr__(self) -> str:
303 return f"{type(self).__name__}({self._data!r})"
305 def __str__(self) -> str:
306 return self.ppprint()
308 def __len__(self) -> int:
309 return len(self._data)
311 def __iter__(self) -> Iterator[str]:
312 return iter(self._data)
314 def copy(self) -> Config:
315 return type(self)(self)
317 @classmethod
318 def fromString(cls, string: str, format: str = "yaml") -> Config:
319 """Create a new Config instance from a serialized string.
321 Parameters
322 ----------
323 string : `str`
324 String containing content in specified format
325 format : `str`, optional
326 Format of the supplied string. Can be ``json`` or ``yaml``.
328 Returns
329 -------
330 c : `Config`
331 Newly-constructed Config.
332 """
333 if format == "yaml":
334 new_config = cls().__initFromYaml(string)
335 elif format == "json":
336 new_config = cls().__initFromJson(string)
337 else:
338 raise ValueError(f"Unexpected format of string: {format}")
339 new_config._processExplicitIncludes()
340 return new_config
342 @classmethod
343 def fromYaml(cls, string: str) -> Config:
344 """Create a new Config instance from a YAML string.
346 Parameters
347 ----------
348 string : `str`
349 String containing content in YAML format
351 Returns
352 -------
353 c : `Config`
354 Newly-constructed Config.
355 """
356 return cls.fromString(string, format="yaml")
358 def __initFromUri(self, path: ResourcePathExpression) -> None:
359 """Load a file from a path or an URI.
361 Parameters
362 ----------
363 path : `lsst.resources.ResourcePathExpression`
364 Path or a URI to a persisted config file.
365 """
366 uri = ResourcePath(path)
367 ext = uri.getExtension()
368 if ext == ".yaml": 368 ↛ 375line 368 didn't jump to line 375, because the condition on line 368 was never false
369 log.debug("Opening YAML config file: %s", uri.geturl())
370 content = uri.read()
371 # Use a stream so we can name it
372 stream = io.BytesIO(content)
373 stream.name = uri.geturl()
374 self.__initFromYaml(stream)
375 elif ext == ".json":
376 log.debug("Opening JSON config file: %s", uri.geturl())
377 content = uri.read()
378 self.__initFromJson(content)
379 else:
380 # This URI does not have a valid extension. It might be because
381 # we ended up with a directory and not a file. Before we complain
382 # about an extension, do an existence check. No need to do
383 # the (possibly expensive) existence check in the default code
384 # path above because we will find out soon enough that the file
385 # is not there.
386 if not uri.exists():
387 raise FileNotFoundError(f"Config location {uri} does not exist.")
388 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
389 self.configFile = uri
391 def __initFromYaml(self, stream: IO | str | bytes) -> Config:
392 """Load a YAML config from any readable stream that contains one.
394 Parameters
395 ----------
396 stream: `IO` or `str`
397 Stream to pass to the YAML loader. Accepts anything that
398 `yaml.load` accepts. This can include a string as well as an
399 IO stream.
401 Raises
402 ------
403 yaml.YAMLError
404 If there is an error loading the file.
405 """
406 content = yaml.load(stream, Loader=Loader)
407 if content is None: 407 ↛ 408line 407 didn't jump to line 408, because the condition on line 407 was never true
408 content = {}
409 self._data = content
410 return self
412 def __initFromJson(self, stream: IO | str | bytes) -> Config:
413 """Load a JSON config from any readable stream that contains one.
415 Parameters
416 ----------
417 stream: `IO` or `str`
418 Stream to pass to the JSON loader. This can include a string as
419 well as an IO stream.
421 Raises
422 ------
423 TypeError:
424 Raised if there is an error loading the content.
425 """
426 if isinstance(stream, bytes | str):
427 content = json.loads(stream)
428 else:
429 content = json.load(stream)
430 if content is None:
431 content = {}
432 self._data = content
433 return self
435 def _processExplicitIncludes(self) -> None:
436 """Scan through the configuration searching for the special includes.
438 Looks for ``includeConfigs`` directive and processes the includes.
439 """
440 # Search paths for config files
441 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
442 if self.configFile is not None: 442 ↛ 450line 442 didn't jump to line 450, because the condition on line 442 was never false
443 if isinstance(self.configFile, ResourcePath): 443 ↛ 446line 443 didn't jump to line 446, because the condition on line 443 was never false
444 configDir = self.configFile.dirname()
445 else:
446 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
447 searchPaths.append(configDir)
449 # Ensure we know what delimiter to use
450 names = self.nameTuples()
451 for path in names:
452 if path[-1] == self.includeKey: 452 ↛ 453line 452 didn't jump to line 453, because the condition on line 452 was never true
453 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
454 basePath = path[:-1]
456 # Extract the includes and then delete them from the config
457 includes = self[path]
458 del self[path]
460 # Be consistent and convert to a list
461 if not isinstance(includes, list):
462 includes = [includes]
464 # Read each file assuming it is a reference to a file
465 # The file can be relative to config file or cwd
466 # ConfigSubset search paths are not used
467 subConfigs = []
468 for fileName in includes:
469 # Expand any shell variables -- this could be URI
470 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False)
471 found = None
472 if fileName.isabs():
473 found = fileName
474 else:
475 for dir in searchPaths:
476 specific = dir.join(fileName.path)
477 # Remote resource check might be expensive
478 if specific.exists():
479 found = specific
480 break
481 if not found:
482 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
484 # Read the referenced Config as a Config
485 subConfigs.append(type(self)(found))
487 # Now we need to merge these sub configs with the current
488 # information that was present in this node in the config
489 # tree with precedence given to the explicit values
490 newConfig = subConfigs.pop(0)
491 for sc in subConfigs:
492 newConfig.update(sc)
494 # Explicit values take precedence
495 if not basePath:
496 # This is an include at the root config
497 newConfig.update(self)
498 # Replace the current config
499 self._data = newConfig._data
500 else:
501 newConfig.update(self[basePath])
502 # And reattach to the base config
503 self[basePath] = newConfig
505 @staticmethod
506 def _splitIntoKeys(key: _ConfigKey) -> list[str | int]:
507 r"""Split the argument for get/set/in into a hierarchical list.
509 Parameters
510 ----------
511 key : `str` or iterable
512 Argument given to get/set/in. If an iterable is provided it will
513 be converted to a list. If the first character of the string
514 is not an alphanumeric character then it will be used as the
515 delimiter for the purposes of splitting the remainder of the
516 string. If the delimiter is also in one of the keys then it
517 can be escaped using ``\``. There is no default delimiter.
519 Returns
520 -------
521 keys : `list`
522 Hierarchical keys as a `list`.
523 """
524 if isinstance(key, str):
525 if not key[0].isalnum(): 525 ↛ 526line 525 didn't jump to line 526, because the condition on line 525 was never true
526 d = key[0]
527 key = key[1:]
528 else:
529 return [
530 key,
531 ]
532 escaped = f"\\{d}"
533 temp = None
534 if escaped in key:
535 # Complain at the attempt to escape the escape
536 doubled = rf"\{escaped}"
537 if doubled in key:
538 raise ValueError(
539 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
540 )
541 # Replace with a character that won't be in the string
542 temp = "\r"
543 if temp in key or d == temp:
544 raise ValueError(
545 f"Can not use character {temp!r} in hierarchical key or as"
546 " delimiter if escaping the delimiter"
547 )
548 key = key.replace(escaped, temp)
549 hierarchy = key.split(d)
550 if temp:
551 hierarchy = [h.replace(temp, d) for h in hierarchy]
552 # Copy the list to keep mypy quiet.
553 return list(hierarchy)
554 elif isinstance(key, Iterable): 554 ↛ 558line 554 didn't jump to line 558, because the condition on line 554 was never false
555 return list(key)
556 else:
557 # Do not try to guess.
558 raise TypeError(f"Provided key [{key}] neither str nor iterable.")
560 def _getKeyHierarchy(self, name: _ConfigKey) -> list[str | int]:
561 """Retrieve the key hierarchy for accessing the Config.
563 Parameters
564 ----------
565 name : `str` or `tuple`
566 Delimited string or `tuple` of hierarchical keys.
568 Returns
569 -------
570 hierarchy : `list` of `str`
571 Hierarchy to use as a `list`. If the name is available directly
572 as a key in the Config it will be used regardless of the presence
573 of any nominal delimiter.
574 """
575 keys: list[str | int]
576 if name in self._data:
577 keys = [cast(str, name)]
578 else:
579 keys = self._splitIntoKeys(name)
580 return keys
582 def _findInHierarchy(self, keys: Sequence[str | int], create: bool = False) -> tuple[list[Any], bool]:
583 """Look for hierarchy of keys in Config.
585 Parameters
586 ----------
587 keys : `list` or `tuple`
588 Keys to search in hierarchy.
589 create : `bool`, optional
590 If `True`, if a part of the hierarchy does not exist, insert an
591 empty `dict` into the hierarchy.
593 Returns
594 -------
595 hierarchy : `list`
596 List of the value corresponding to each key in the supplied
597 hierarchy. Only keys that exist in the hierarchy will have
598 a value.
599 complete : `bool`
600 `True` if the full hierarchy exists and the final element
601 in ``hierarchy`` is the value of relevant value.
602 """
603 d: Any = self._data
605 # For the first key, d must be a dict so it is a waste
606 # of time to check for a sequence.
607 must_be_dict = True
609 hierarchy = []
610 complete = True
611 for k in keys:
612 d, isThere = _checkNextItem(k, d, create, must_be_dict)
613 if isThere:
614 hierarchy.append(d)
615 else:
616 complete = False
617 break
618 # Second time round it might be a sequence.
619 must_be_dict = False
621 return hierarchy, complete
623 def __getitem__(self, name: _ConfigKey) -> Any:
624 # Override the split for the simple case where there is an exact
625 # match. This allows `Config.items()` to work via a simple
626 # __iter__ implementation that returns top level keys of
627 # self._data.
629 # If the name matches a key in the top-level hierarchy, bypass
630 # all further cleverness.
631 found_directly = False
632 try:
633 if isinstance(name, str): 633 ↛ 639line 633 didn't jump to line 639, because the condition on line 633 was never false
634 data = self._data[name]
635 found_directly = True
636 except KeyError:
637 pass
639 if not found_directly: 639 ↛ 640line 639 didn't jump to line 640, because the condition on line 639 was never true
640 keys = self._getKeyHierarchy(name)
642 hierarchy, complete = self._findInHierarchy(keys)
643 if not complete:
644 raise KeyError(f"{name} not found")
645 data = hierarchy[-1]
647 # In most cases we have a dict, and it's more efficient
648 # to check for a dict instance before checking the generic mapping.
649 if isinstance(data, dict | Mapping):
650 data = Config(data)
651 # Ensure that child configs inherit the parent internal delimiter
652 if self._D != Config._D: 652 ↛ 653line 652 didn't jump to line 653, because the condition on line 652 was never true
653 data._D = self._D
654 return data
656 def __setitem__(self, name: _ConfigKey, value: Any) -> None:
657 keys = self._getKeyHierarchy(name)
658 last = keys.pop()
659 if isinstance(value, Config):
660 value = copy.deepcopy(value._data)
662 hierarchy, complete = self._findInHierarchy(keys, create=True)
663 if hierarchy:
664 data = hierarchy[-1]
665 else:
666 data = self._data
668 try:
669 data[last] = value
670 except TypeError:
671 data[int(last)] = value
673 def __contains__(self, key: Any) -> bool:
674 if not isinstance(key, str | Sequence): 674 ↛ 675line 674 didn't jump to line 675, because the condition on line 674 was never true
675 return False
676 keys = self._getKeyHierarchy(key)
677 hierarchy, complete = self._findInHierarchy(keys)
678 return complete
680 def __delitem__(self, key: str | Sequence[str]) -> None:
681 keys = self._getKeyHierarchy(key)
682 last = keys.pop()
683 hierarchy, complete = self._findInHierarchy(keys)
684 if complete: 684 ↛ 691line 684 didn't jump to line 691, because the condition on line 684 was never false
685 if hierarchy: 685 ↛ 686line 685 didn't jump to line 686, because the condition on line 685 was never true
686 data = hierarchy[-1]
687 else:
688 data = self._data
689 del data[last]
690 else:
691 raise KeyError(f"{key} not found in Config")
693 def update(self, other: Mapping[str, Any]) -> None: # type: ignore[override]
694 """Update config from other `Config` or `dict`.
696 Like `dict.update()`, but will add or modify keys in nested dicts,
697 instead of overwriting the nested dict entirely.
699 Parameters
700 ----------
701 other : `dict` or `Config`
702 Source of configuration:
704 Examples
705 --------
706 >>> c = Config({"a": {"b": 1}})
707 >>> c.update({"a": {"c": 2}})
708 >>> print(c)
709 {'a': {'b': 1, 'c': 2}}
711 >>> foo = {"a": {"b": 1}}
712 >>> foo.update({"a": {"c": 2}})
713 >>> print(foo)
714 {'a': {'c': 2}}
715 """
716 _doUpdate(self._data, other)
718 def merge(self, other: Mapping) -> None:
719 """Merge another Config into this one.
721 Like `Config.update()`, but will add keys & values from other that
722 DO NOT EXIST in self.
724 Keys and values that already exist in self will NOT be overwritten.
726 Parameters
727 ----------
728 other : `dict` or `Config`
729 Source of configuration:
730 """
731 if not isinstance(other, Mapping):
732 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
734 # Convert the supplied mapping to a Config for consistency
735 # This will do a deepcopy if it is already a Config
736 otherCopy = Config(other)
737 otherCopy.update(self)
738 self._data = otherCopy._data
740 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
741 """Get tuples representing the name hierarchies of all keys.
743 The tuples returned from this method are guaranteed to be usable
744 to access items in the configuration object.
746 Parameters
747 ----------
748 topLevelOnly : `bool`, optional
749 If False, the default, a full hierarchy of names is returned.
750 If True, only the top level are returned.
752 Returns
753 -------
754 names : `list` of `tuple` of `str`
755 List of all names present in the `Config` where each element
756 in the list is a `tuple` of strings representing the hierarchy.
757 """
758 if topLevelOnly: 758 ↛ 759line 758 didn't jump to line 759, because the condition on line 758 was never true
759 return [(k,) for k in self]
761 def getKeysAsTuples(
762 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
763 ) -> None:
764 if isinstance(d, Sequence):
765 theseKeys: Iterable[Any] = range(len(d))
766 else:
767 theseKeys = d.keys()
768 for key in theseKeys:
769 val = d[key]
770 levelKey = base + (key,) if base is not None else (key,)
771 keys.append(levelKey)
772 if isinstance(val, Mapping | Sequence) and not isinstance(val, str):
773 getKeysAsTuples(val, keys, levelKey)
775 keys: list[tuple[str, ...]] = []
776 getKeysAsTuples(self._data, keys, None)
777 return keys
779 def names(self, topLevelOnly: bool = False, delimiter: str | None = None) -> list[str]:
780 """Get a delimited name of all the keys in the hierarchy.
782 The values returned from this method are guaranteed to be usable
783 to access items in the configuration object.
785 Parameters
786 ----------
787 topLevelOnly : `bool`, optional
788 If False, the default, a full hierarchy of names is returned.
789 If True, only the top level are returned.
790 delimiter : `str`, optional
791 Delimiter to use when forming the keys. If the delimiter is
792 present in any of the keys, it will be escaped in the returned
793 names. If `None` given a delimiter will be automatically provided.
794 The delimiter can not be alphanumeric.
796 Returns
797 -------
798 names : `list` of `str`
799 List of all names present in the `Config`.
801 Notes
802 -----
803 This is different than the built-in method `dict.keys`, which will
804 return only the first level keys.
806 Raises
807 ------
808 ValueError:
809 The supplied delimiter is alphanumeric.
810 """
811 if topLevelOnly:
812 return list(self.keys())
814 # Get all the tuples of hierarchical keys
815 nameTuples = self.nameTuples()
817 if delimiter is not None and delimiter.isalnum():
818 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
820 if delimiter is None:
821 # Start with something, and ensure it does not need to be
822 # escaped (it is much easier to understand if not escaped)
823 delimiter = self._D
825 # Form big string for easy check of delimiter clash
826 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
828 # Try a delimiter and keep trying until we get something that
829 # works.
830 ntries = 0
831 while delimiter in combined:
832 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
833 ntries += 1
835 if ntries > 100:
836 raise ValueError(f"Unable to determine a delimiter for Config {self}")
838 # try another one
839 while True:
840 delimiter = chr(ord(delimiter) + 1)
841 if not delimiter.isalnum():
842 break
844 log.debug("Using delimiter %r", delimiter)
846 # Form the keys, escaping the delimiter if necessary
847 strings = [
848 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
849 for k in nameTuples
850 ]
851 return strings
853 def asArray(self, name: str | Sequence[str]) -> Sequence[Any]:
854 """Get a value as an array.
856 May contain one or more elements.
858 Parameters
859 ----------
860 name : `str`
861 Key to use to retrieve value.
863 Returns
864 -------
865 array : `collections.abc.Sequence`
866 The value corresponding to name, but guaranteed to be returned
867 as a list with at least one element. If the value is a
868 `~collections.abc.Sequence` (and not a `str`) the value itself
869 will be returned, else the value will be the first element.
870 """
871 val = self.get(name)
872 if isinstance(val, str) or not isinstance(val, Sequence):
873 val = [val]
874 return val
876 def __eq__(self, other: Any) -> bool:
877 if isinstance(other, Config):
878 other = other._data
879 return self._data == other
881 def __ne__(self, other: Any) -> bool:
882 if isinstance(other, Config):
883 other = other._data
884 return self._data != other
886 #######
887 # i/o #
889 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
890 """Write the config to an output stream.
892 Parameters
893 ----------
894 output : `IO`, optional
895 The stream to use for output. If `None` the serialized content
896 will be returned.
897 format : `str`, optional
898 The format to use for the output. Can be "yaml" or "json".
900 Returns
901 -------
902 serialized : `str` or `None`
903 If a stream was given the stream will be used and the return
904 value will be `None`. If the stream was `None` the
905 serialization will be returned as a string.
906 """
907 if format == "yaml":
908 return yaml.safe_dump(self._data, output, default_flow_style=False)
909 elif format == "json":
910 if output is not None:
911 json.dump(self._data, output, ensure_ascii=False)
912 return None
913 else:
914 return json.dumps(self._data, ensure_ascii=False)
915 raise ValueError(f"Unsupported format for Config serialization: {format}")
917 def dumpToUri(
918 self,
919 uri: ResourcePathExpression,
920 updateFile: bool = True,
921 defaultFileName: str = "butler.yaml",
922 overwrite: bool = True,
923 ) -> None:
924 """Write the config to location pointed to by given URI.
926 Currently supports 's3' and 'file' URI schemes.
928 Parameters
929 ----------
930 uri: `lsst.resources.ResourcePathExpression`
931 URI of location where the Config will be written.
932 updateFile : bool, optional
933 If True and uri does not end on a filename with extension, will
934 append `defaultFileName` to the target uri. True by default.
935 defaultFileName : bool, optional
936 The file name that will be appended to target uri if updateFile is
937 True and uri does not end on a file with an extension.
938 overwrite : bool, optional
939 If True the configuration will be written even if it already
940 exists at that location.
941 """
942 # Make local copy of URI or create new one
943 uri = ResourcePath(uri)
945 if updateFile and not uri.getExtension():
946 uri = uri.updatedFile(defaultFileName)
948 # Try to work out the format from the extension
949 ext = uri.getExtension()
950 format = ext[1:].lower()
952 output = self.dump(format=format)
953 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
954 uri.write(output.encode(), overwrite=overwrite)
955 self.configFile = uri
957 @staticmethod
958 def updateParameters(
959 configType: type[ConfigSubset],
960 config: Config,
961 full: Config,
962 toUpdate: dict[str, Any] | None = None,
963 toCopy: Sequence[str | Sequence[str]] | None = None,
964 overwrite: bool = True,
965 toMerge: Sequence[str | Sequence[str]] | None = None,
966 ) -> None:
967 """Update specific config parameters.
969 Allows for named parameters to be set to new values in bulk, and
970 for other values to be set by copying from a reference config.
972 Assumes that the supplied config is compatible with ``configType``
973 and will attach the updated values to the supplied config by
974 looking for the related component key. It is assumed that
975 ``config`` and ``full`` are from the same part of the
976 configuration hierarchy.
978 Parameters
979 ----------
980 configType : `ConfigSubset`
981 Config type to use to extract relevant items from ``config``.
982 config : `Config`
983 A `Config` to update. Only the subset understood by
984 the supplied `ConfigSubset` will be modified. Default values
985 will not be inserted and the content will not be validated
986 since mandatory keys are allowed to be missing until
987 populated later by merging.
988 full : `Config`
989 A complete config with all defaults expanded that can be
990 converted to a ``configType``. Read-only and will not be
991 modified by this method. Values are read from here if
992 ``toCopy`` is defined.
994 Repository-specific options that should not be obtained
995 from defaults when Butler instances are constructed
996 should be copied from ``full`` to ``config``.
997 toUpdate : `dict`, optional
998 A `dict` defining the keys to update and the new value to use.
999 The keys and values can be any supported by `Config`
1000 assignment.
1001 toCopy : `tuple`, optional
1002 `tuple` of keys whose values should be copied from ``full``
1003 into ``config``.
1004 overwrite : `bool`, optional
1005 If `False`, do not modify a value in ``config`` if the key
1006 already exists. Default is always to overwrite.
1007 toMerge : `tuple`, optional
1008 Keys to merge content from full to config without overwriting
1009 pre-existing values. Only works if the key refers to a hierarchy.
1010 The ``overwrite`` flag is ignored.
1012 Raises
1013 ------
1014 ValueError
1015 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1016 """
1017 if toUpdate is None and toCopy is None and toMerge is None:
1018 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1020 # If this is a parent configuration then we need to ensure that
1021 # the supplied config has the relevant component key in it.
1022 # If this is a parent configuration we add in the stub entry
1023 # so that the ConfigSubset constructor will do the right thing.
1024 # We check full for this since that is guaranteed to be complete.
1025 if (
1026 configType.component is not None
1027 and configType.component in full
1028 and configType.component not in config
1029 ):
1030 config[configType.component] = {}
1032 # Extract the part of the config we wish to update
1033 localConfig = configType(config, mergeDefaults=False, validate=False)
1035 key: str | Sequence[str]
1036 if toUpdate:
1037 for key, value in toUpdate.items():
1038 if key in localConfig and not overwrite:
1039 log.debug(
1040 "Not overriding key '%s' with value '%s' in config %s",
1041 key,
1042 value,
1043 localConfig.__class__.__name__,
1044 )
1045 else:
1046 localConfig[key] = value
1048 if toCopy or toMerge:
1049 localFullConfig = configType(full, mergeDefaults=False)
1051 if toCopy:
1052 for key in toCopy:
1053 if key in localConfig and not overwrite:
1054 log.debug(
1055 "Not overriding key '%s' from defaults in config %s",
1056 key,
1057 localConfig.__class__.__name__,
1058 )
1059 else:
1060 localConfig[key] = localFullConfig[key]
1061 if toMerge:
1062 for key in toMerge:
1063 if key in localConfig:
1064 # Get the node from the config to do the merge
1065 # but then have to reattach to the config.
1066 subset = localConfig[key]
1067 subset.merge(localFullConfig[key])
1068 localConfig[key] = subset
1069 else:
1070 localConfig[key] = localFullConfig[key]
1072 # Reattach to parent if this is a child config
1073 if configType.component is not None and configType.component in config:
1074 config[configType.component] = localConfig
1075 else:
1076 config.update(localConfig)
1078 def toDict(self) -> dict[str, Any]:
1079 """Convert a `Config` to a standalone hierarchical `dict`.
1081 Returns
1082 -------
1083 d : `dict`
1084 The standalone hierarchical `dict` with any `Config` classes
1085 in the hierarchy converted to `dict`.
1087 Notes
1088 -----
1089 This can be useful when passing a Config to some code that
1090 expects native Python types.
1091 """
1092 output = copy.deepcopy(self._data)
1093 for k, v in output.items():
1094 if isinstance(v, Config): 1094 ↛ 1095line 1094 didn't jump to line 1095, because the condition on line 1094 was never true
1095 v = v.toDict()
1096 output[k] = v
1097 return output
1100class ConfigSubset(Config):
1101 """Config representing a subset of a more general configuration.
1103 Subclasses define their own component and when given a configuration
1104 that includes that component, the resulting configuration only includes
1105 the subset. For example, your config might contain ``dimensions`` if it's
1106 part of a global config and that subset will be stored. If ``dimensions``
1107 can not be found it is assumed that the entire contents of the
1108 configuration should be used.
1110 Default values are read from the environment or supplied search paths
1111 using the default configuration file name specified in the subclass.
1112 This allows a configuration class to be instantiated without any
1113 additional arguments.
1115 Additional validation can be specified to check for keys that are mandatory
1116 in the configuration.
1118 Parameters
1119 ----------
1120 other : `Config` or `~lsst.resources.ResourcePathExpression` or `dict`
1121 Argument specifying the configuration information as understood
1122 by `Config`
1123 validate : `bool`, optional
1124 If `True` required keys will be checked to ensure configuration
1125 consistency.
1126 mergeDefaults : `bool`, optional
1127 If `True` defaults will be read and the supplied config will
1128 be combined with the defaults, with the supplied values taking
1129 precedence.
1130 searchPaths : `list` or `tuple`, optional
1131 Explicit additional paths to search for defaults. They should
1132 be supplied in priority order. These paths have higher priority
1133 than those read from the environment in
1134 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1135 the local file system or URIs, `lsst.resources.ResourcePath`.
1136 """
1138 component: ClassVar[str | None] = None
1139 """Component to use from supplied config. Can be None. If specified the
1140 key is not required. Can be a full dot-separated path to a component.
1141 """
1143 requiredKeys: ClassVar[Sequence[str]] = ()
1144 """Keys that are required to be specified in the configuration.
1145 """
1147 defaultConfigFile: ClassVar[str | None] = None
1148 """Name of the file containing defaults for this config class.
1149 """
1151 def __init__(
1152 self,
1153 other: Config | ResourcePathExpression | Mapping[str, Any] | None = None,
1154 validate: bool = True,
1155 mergeDefaults: bool = True,
1156 searchPaths: Sequence[ResourcePathExpression] | None = None,
1157 ):
1158 # Create a blank object to receive the defaults
1159 # Once we have the defaults we then update with the external values
1160 super().__init__()
1162 # Create a standard Config rather than subset
1163 externalConfig = Config(other)
1165 # Select the part we need from it
1166 # To simplify the use of !include we also check for the existence of
1167 # component.component (since the included files can themselves
1168 # include the component name)
1169 if self.component is not None: 1169 ↛ 1178line 1169 didn't jump to line 1178, because the condition on line 1169 was never false
1170 doubled = (self.component, self.component)
1171 # Must check for double depth first
1172 if doubled in externalConfig: 1172 ↛ 1173line 1172 didn't jump to line 1173, because the condition on line 1172 was never true
1173 externalConfig = externalConfig[doubled]
1174 elif self.component in externalConfig:
1175 externalConfig._data = externalConfig._data[self.component]
1177 # Default files read to create this configuration
1178 self.filesRead: list[ResourcePath | str] = []
1180 # Assume we are not looking up child configurations
1181 containerKey = None
1183 # Sometimes we do not want to merge with defaults.
1184 if mergeDefaults:
1185 # Supplied search paths have highest priority
1186 fullSearchPath: list[ResourcePath | str] = []
1187 if searchPaths: 1187 ↛ 1188line 1187 didn't jump to line 1188, because the condition on line 1187 was never true
1188 fullSearchPath = [ResourcePath(path) for path in searchPaths]
1190 # Read default paths from environment
1191 fullSearchPath.extend(self.defaultSearchPaths())
1193 # There are two places to find defaults for this particular config
1194 # - The "defaultConfigFile" defined in the subclass
1195 # - The class specified in the "cls" element in the config.
1196 # Read cls after merging in case it changes.
1197 if self.defaultConfigFile is not None: 1197 ↛ 1202line 1197 didn't jump to line 1202, because the condition on line 1197 was never false
1198 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1200 # Can have a class specification in the external config (priority)
1201 # or from the defaults.
1202 pytype = None
1203 if "cls" in externalConfig: 1203 ↛ 1204line 1203 didn't jump to line 1204, because the condition on line 1203 was never true
1204 pytype = externalConfig["cls"]
1205 elif "cls" in self: 1205 ↛ 1206line 1205 didn't jump to line 1206, because the condition on line 1205 was never true
1206 pytype = self["cls"]
1208 if pytype is not None: 1208 ↛ 1209line 1208 didn't jump to line 1209, because the condition on line 1208 was never true
1209 try:
1210 cls = doImportType(pytype)
1211 except ImportError as e:
1212 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1213 # The class referenced from the config file is not required
1214 # to specify a default config file.
1215 defaultsFile = getattr(cls, "defaultConfigFile", None)
1216 if defaultsFile is not None:
1217 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1219 # Get the container key in case we need it and it is specified.
1220 containerKey = getattr(cls, "containerKey", None)
1222 # Now update this object with the external values so that the external
1223 # values always override the defaults
1224 self.update(externalConfig)
1225 if not self.configFile: 1225 ↛ 1231line 1225 didn't jump to line 1231, because the condition on line 1225 was never false
1226 self.configFile = externalConfig.configFile
1228 # If this configuration has child configurations of the same
1229 # config class, we need to expand those defaults as well.
1231 if mergeDefaults and containerKey is not None and containerKey in self: 1231 ↛ 1232line 1231 didn't jump to line 1232, because the condition on line 1231 was never true
1232 for idx, subConfig in enumerate(self[containerKey]):
1233 self[containerKey, idx] = type(self)(
1234 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1235 )
1237 if validate:
1238 self.validate()
1240 @classmethod
1241 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1242 """Read environment to determine search paths to use.
1244 Global defaults, at lowest priority, are found in the ``config``
1245 directory of the butler source tree. Additional defaults can be
1246 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1247 which is a PATH-like variable where paths at the front of the list
1248 have priority over those later.
1250 Returns
1251 -------
1252 paths : `list`
1253 Returns a list of paths to search. The returned order is in
1254 priority with the highest priority paths first. The butler config
1255 configuration resources will not be included here but will
1256 always be searched last.
1258 Notes
1259 -----
1260 The environment variable is split on the standard ``:`` path separator.
1261 This currently makes it incompatible with usage of URIs.
1262 """
1263 # We can pick up defaults from multiple search paths
1264 # We fill defaults by using the butler config path and then
1265 # the config path environment variable in reverse order.
1266 defaultsPaths: list[str | ResourcePath] = []
1268 if CONFIG_PATH in os.environ: 1268 ↛ 1269line 1268 didn't jump to line 1269, because the condition on line 1268 was never true
1269 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1270 defaultsPaths.extend(externalPaths)
1272 # Add the package defaults as a resource
1273 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1274 return defaultsPaths
1276 def _updateWithConfigsFromPath(
1277 self, searchPaths: Sequence[str | ResourcePath], configFile: ResourcePath | str
1278 ) -> None:
1279 """Search the supplied paths, merging the configuration values.
1281 The values read will override values currently stored in the object.
1282 Every file found in the path will be read, such that the earlier
1283 path entries have higher priority.
1285 Parameters
1286 ----------
1287 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1288 Paths to search for the supplied configFile. This path
1289 is the priority order, such that files read from the
1290 first path entry will be selected over those read from
1291 a later path. Can contain `str` referring to the local file
1292 system or a URI string.
1293 configFile : `lsst.resources.ResourcePath`
1294 File to locate in path. If absolute path it will be read
1295 directly and the search path will not be used. Can be a URI
1296 to an explicit resource (which will ignore the search path)
1297 which is assumed to exist.
1298 """
1299 uri = ResourcePath(configFile)
1300 if uri.isabs() and uri.exists(): 1300 ↛ 1302line 1300 didn't jump to line 1302, because the condition on line 1300 was never true
1301 # Assume this resource exists
1302 self._updateWithOtherConfigFile(configFile)
1303 self.filesRead.append(configFile)
1304 else:
1305 # Reverse order so that high priority entries
1306 # update the object last.
1307 for pathDir in reversed(searchPaths):
1308 if isinstance(pathDir, str | ResourcePath): 1308 ↛ 1315line 1308 didn't jump to line 1315, because the condition on line 1308 was never false
1309 pathDir = ResourcePath(pathDir, forceDirectory=True)
1310 file = pathDir.join(configFile)
1311 if file.exists(): 1311 ↛ 1307line 1311 didn't jump to line 1307, because the condition on line 1311 was never false
1312 self.filesRead.append(file)
1313 self._updateWithOtherConfigFile(file)
1314 else:
1315 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1317 def _updateWithOtherConfigFile(self, file: Config | str | ResourcePath | Mapping[str, Any]) -> None:
1318 """Read in some defaults and update.
1320 Update the configuration by reading the supplied file as a config
1321 of this class, and merging such that these values override the
1322 current values. Contents of the external config are not validated.
1324 Parameters
1325 ----------
1326 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1327 Entity that can be converted to a `ConfigSubset`.
1328 """
1329 # Use this class to read the defaults so that subsetting can happen
1330 # correctly.
1331 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1332 self.update(externalConfig)
1334 def validate(self) -> None:
1335 """Check that mandatory keys are present in this configuration.
1337 Ignored if ``requiredKeys`` is empty.
1338 """
1339 # Validation
1340 missing = [k for k in self.requiredKeys if k not in self._data]
1341 if missing: 1341 ↛ 1342line 1341 didn't jump to line 1342, because the condition on line 1341 was never true
1342 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")