Coverage for python/lsst/daf/butler/_config.py: 45%
487 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Configuration control."""
30from __future__ import annotations
32__all__ = ("Config", "ConfigSubset")
34import copy
35import io
36import json
37import logging
38import os
39import pprint
40import sys
41from collections import defaultdict
42from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence
43from pathlib import Path
44from typing import IO, TYPE_CHECKING, Any, ClassVar, cast
46import yaml
47from lsst.resources import ResourcePath, ResourcePathExpression
48from lsst.utils import doImportType
49from yaml.representer import Representer
51yaml.add_representer(defaultdict, Representer.represent_dict)
54# Config module logger
55log = logging.getLogger(__name__)
57# PATH-like environment variable to use for defaults.
58CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
60if TYPE_CHECKING:
61 yamlLoader = yaml.SafeLoader
62else:
63 try:
64 yamlLoader = yaml.CSafeLoader
65 except AttributeError:
66 # Not all installations have the C library
67 # (but assume for mypy's sake that they're the same)
68 yamlLoader = yaml.SafeLoader
71def _doUpdate(d: Mapping[str, Any], u: Mapping[str, Any]) -> Mapping[str, Any]:
72 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true
73 raise RuntimeError(f"Only call update with Mapping, not {type(d)}")
74 for k, v in u.items():
75 if isinstance(v, Mapping):
76 lhs = d.get(k, {})
77 if not isinstance(lhs, Mapping): 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true
78 lhs = {}
79 d[k] = _doUpdate(lhs, v)
80 else:
81 d[k] = v
82 return d
85def _checkNextItem(k: str | int, d: Any, create: bool, must_be_dict: bool) -> tuple[Any, bool]:
86 """See if k is in d and if it is return the new child."""
87 nextVal = None
88 isThere = False
89 if d is None: 89 ↛ 91line 89 didn't jump to line 91, because the condition on line 89 was never true
90 # We have gone past the end of the hierarchy
91 pass
92 elif not must_be_dict and isinstance(d, Sequence): 92 ↛ 97line 92 didn't jump to line 97, because the condition on line 92 was never true
93 # Check for Sequence first because for lists
94 # __contains__ checks whether value is found in list
95 # not whether the index exists in list. When we traverse
96 # the hierarchy we are interested in the index.
97 try:
98 nextVal = d[int(k)]
99 isThere = True
100 except IndexError:
101 pass
102 except ValueError:
103 isThere = k in d
104 elif k in d:
105 nextVal = d[k]
106 isThere = True
107 elif create: 107 ↛ 108line 107 didn't jump to line 108, because the condition on line 107 was never true
108 d[k] = {}
109 nextVal = d[k]
110 isThere = True
112 return nextVal, isThere
115class Loader(yamlLoader):
116 """YAML Loader that supports file include directives.
118 Uses ``!include`` directive in a YAML file to point to another
119 YAML file to be included. The path in the include directive is relative
120 to the file containing that directive.
122 storageClasses: !include storageClasses.yaml
124 Examples
125 --------
126 >>> with open("document.yaml", "r") as f:
127 data = yaml.load(f, Loader=Loader)
129 Notes
130 -----
131 See https://davidchall.github.io/yaml-includes.html
133 Parameters
134 ----------
135 stream : `str` or `io.IO`
136 The stream to parse.
137 """
139 def __init__(self, stream: str | IO): # types-PyYAML annotates 'stream' with a private type
140 super().__init__(stream)
141 # if this is a string and not a stream we may well lack a name
142 if hasattr(stream, "name"): 142 ↛ 146line 142 didn't jump to line 146, because the condition on line 142 was never false
143 self._root = ResourcePath(stream.name, forceDirectory=False)
144 else:
145 # No choice but to assume a local filesystem
146 self._root = ResourcePath("no-file.yaml", forceDirectory=False)
147 self.add_constructor("!include", Loader.include)
149 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
150 result: list[Any] | dict[str, Any]
151 if isinstance(node, yaml.ScalarNode):
152 return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]
154 elif isinstance(node, yaml.SequenceNode):
155 result = []
156 for filename in self.construct_sequence(node):
157 result.append(self.extractFile(filename))
158 return result
160 elif isinstance(node, yaml.MappingNode):
161 result = {}
162 for k, v in self.construct_mapping(node).items():
163 if not isinstance(k, str):
164 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
165 result[k] = self.extractFile(v)
166 return result
168 else:
169 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
170 raise yaml.constructor.ConstructorError
172 def extractFile(self, filename: str) -> Any:
173 # It is possible for the !include to point to an explicit URI
174 # instead of a relative URI, therefore we first see if it is
175 # scheme-less or not. If it has a scheme we use it directly
176 # if it is scheme-less we use it relative to the file root.
177 requesteduri = ResourcePath(filename, forceAbsolute=False, forceDirectory=False)
179 if requesteduri.scheme:
180 fileuri = requesteduri
181 else:
182 fileuri = self._root.updatedFile(filename)
184 log.debug("Opening YAML file via !include: %s", fileuri)
186 # Read all the data from the resource
187 data = fileuri.read()
189 # Store the bytes into a BytesIO so we can attach a .name
190 stream = io.BytesIO(data)
191 stream.name = fileuri.geturl()
192 return yaml.load(stream, Loader)
195# Type of the key used for accessing items in configuration object. It can be
196# a single string as described below or a sequence of srtings and integer
197# indices. Indices are used to access items in sequences stored in config.
198_ConfigKey = str | Sequence[str | int]
201class Config(MutableMapping):
202 r"""Implements a datatype that is used by `Butler` for configuration.
204 It is essentially a `dict` with key/value pairs, including nested dicts
205 (as values). In fact, it can be initialized with a `dict`.
206 This is explained next:
208 Config extends the `dict` api so that hierarchical values may be accessed
209 with delimited notation or as a tuple. If a string is given the delimiter
210 is picked up from the first character in that string. For example,
211 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
212 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
213 If the first character is alphanumeric, no delimiter will be used.
214 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
215 Unicode characters can be used as the delimiter for distinctiveness if
216 required.
218 If a key in the hierarchy starts with a non-alphanumeric character care
219 should be used to ensure that either the tuple interface is used or
220 a distinct delimiter is always given in string form.
222 Finally, the delimiter can be escaped if it is part of a key and also
223 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
224 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
225 always better to use a different delimiter in these cases.
227 Note that adding a multi-level key implicitly creates any nesting levels
228 that do not exist, but removing multi-level keys does not automatically
229 remove empty nesting levels. As a result:
231 >>> c = Config()
232 >>> c[".a.b"] = 1
233 >>> del c[".a.b"]
234 >>> c["a"]
235 Config({'a': {}})
237 Storage formats supported:
239 - yaml: read and write is supported.
240 - json: read and write is supported but no ``!include`` directive.
242 Parameters
243 ----------
244 other : `lsst.resources.ResourcePath` or `Config` or `dict`
245 Other source of configuration, can be:
247 - (`lsst.resources.ResourcePathExpression`)
248 Treated as a URI to a config file. Must end with ".yaml".
249 - (`Config`) Copies the other Config's values into this one.
250 - (`dict`) Copies the values from the dict into this Config.
252 If `None` is provided an empty `Config` will be created.
253 """
255 _D: str = "→"
256 """Default internal delimiter to use for components in the hierarchy when
257 constructing keys for external use (see `Config.names()`)."""
259 includeKey: ClassVar[str] = "includeConfigs"
260 """Key used to indicate that another config should be included at this
261 part of the hierarchy."""
263 resourcesPackage: str = "lsst.daf.butler"
264 """Package to search for default configuration data. The resources
265 themselves will be within a ``configs`` resource hierarchy."""
267 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
268 self._data: dict[str, Any] = {}
269 self.configFile: ResourcePath | None = None
271 if other is None:
272 return
274 if isinstance(other, Config):
275 # Deep copy might be more efficient but if someone has overridden
276 # a config entry to store a complex object then deep copy may
277 # fail. Safer to use update().
278 self.update(other._data)
279 self.configFile = other.configFile
280 elif isinstance(other, dict | Mapping):
281 # In most cases we have a dict, and it's more efficient
282 # to check for a dict instance before checking the generic mapping.
283 self.update(other)
284 elif isinstance(other, str | ResourcePath | Path): 284 ↛ 291line 284 didn't jump to line 291, because the condition on line 284 was never false
285 # if other is a string, assume it is a file path/URI
286 self.__initFromUri(other)
287 self._processExplicitIncludes()
288 else:
289 # if the config specified by other could not be recognized raise
290 # a runtime error.
291 raise RuntimeError(f"A Config could not be loaded from other: {other}")
293 def ppprint(self) -> str:
294 """Return config as formatted readable string.
296 Examples
297 --------
298 use: ``pdb> print(myConfigObject.ppprint())``
300 Returns
301 -------
302 s : `str`
303 A prettyprint formatted string representing the config.
304 """
305 return pprint.pformat(self._data, indent=2, width=1)
307 def __repr__(self) -> str:
308 return f"{type(self).__name__}({self._data!r})"
310 def __str__(self) -> str:
311 return self.ppprint()
313 def __len__(self) -> int:
314 return len(self._data)
316 def __iter__(self) -> Iterator[str]:
317 return iter(self._data)
319 def copy(self) -> Config:
320 return type(self)(self)
322 @classmethod
323 def fromString(cls, string: str, format: str = "yaml") -> Config:
324 """Create a new Config instance from a serialized string.
326 Parameters
327 ----------
328 string : `str`
329 String containing content in specified format.
330 format : `str`, optional
331 Format of the supplied string. Can be ``json`` or ``yaml``.
333 Returns
334 -------
335 c : `Config`
336 Newly-constructed Config.
337 """
338 if format == "yaml":
339 new_config = cls().__initFromYaml(string)
340 elif format == "json":
341 new_config = cls().__initFromJson(string)
342 else:
343 raise ValueError(f"Unexpected format of string: {format}")
344 new_config._processExplicitIncludes()
345 return new_config
347 @classmethod
348 def fromYaml(cls, string: str) -> Config:
349 """Create a new Config instance from a YAML string.
351 Parameters
352 ----------
353 string : `str`
354 String containing content in YAML format.
356 Returns
357 -------
358 c : `Config`
359 Newly-constructed Config.
360 """
361 return cls.fromString(string, format="yaml")
363 def __initFromUri(self, path: ResourcePathExpression) -> None:
364 """Load a file from a path or an URI.
366 Parameters
367 ----------
368 path : `lsst.resources.ResourcePathExpression`
369 Path or a URI to a persisted config file.
370 """
371 uri = ResourcePath(path, forceDirectory=False)
372 ext = uri.getExtension()
373 if ext == ".yaml": 373 ↛ 380line 373 didn't jump to line 380, because the condition on line 373 was never false
374 log.debug("Opening YAML config file: %s", uri.geturl())
375 content = uri.read()
376 # Use a stream so we can name it
377 stream = io.BytesIO(content)
378 stream.name = uri.geturl()
379 self.__initFromYaml(stream)
380 elif ext == ".json":
381 log.debug("Opening JSON config file: %s", uri.geturl())
382 content = uri.read()
383 self.__initFromJson(content)
384 else:
385 # This URI does not have a valid extension. It might be because
386 # we ended up with a directory and not a file. Before we complain
387 # about an extension, do an existence check. No need to do
388 # the (possibly expensive) existence check in the default code
389 # path above because we will find out soon enough that the file
390 # is not there.
391 if not uri.exists():
392 raise FileNotFoundError(f"Config location {uri} does not exist.")
393 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
394 self.configFile = uri
396 def __initFromYaml(self, stream: IO | str | bytes) -> Config:
397 """Load a YAML config from any readable stream that contains one.
399 Parameters
400 ----------
401 stream : `IO` or `str`
402 Stream to pass to the YAML loader. Accepts anything that
403 `yaml.load` accepts. This can include a string as well as an
404 IO stream.
406 Raises
407 ------
408 yaml.YAMLError
409 If there is an error loading the file.
410 """
411 content = yaml.load(stream, Loader=Loader)
412 if content is None: 412 ↛ 413line 412 didn't jump to line 413, because the condition on line 412 was never true
413 content = {}
414 self._data = content
415 return self
417 def __initFromJson(self, stream: IO | str | bytes) -> Config:
418 """Load a JSON config from any readable stream that contains one.
420 Parameters
421 ----------
422 stream : `IO` or `str`
423 Stream to pass to the JSON loader. This can include a string as
424 well as an IO stream.
426 Raises
427 ------
428 TypeError:
429 Raised if there is an error loading the content.
430 """
431 if isinstance(stream, bytes | str):
432 content = json.loads(stream)
433 else:
434 content = json.load(stream)
435 if content is None:
436 content = {}
437 self._data = content
438 return self
440 def _processExplicitIncludes(self) -> None:
441 """Scan through the configuration searching for the special includes.
443 Looks for ``includeConfigs`` directive and processes the includes.
444 """
445 # Search paths for config files
446 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
447 if self.configFile is not None: 447 ↛ 455line 447 didn't jump to line 455, because the condition on line 447 was never false
448 if isinstance(self.configFile, ResourcePath): 448 ↛ 451line 448 didn't jump to line 451, because the condition on line 448 was never false
449 configDir = self.configFile.dirname()
450 else:
451 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
452 searchPaths.append(configDir)
454 # Ensure we know what delimiter to use
455 names = self.nameTuples()
456 for path in names:
457 if path[-1] == self.includeKey: 457 ↛ 458line 457 didn't jump to line 458, because the condition on line 457 was never true
458 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
459 basePath = path[:-1]
461 # Extract the includes and then delete them from the config
462 includes = self[path]
463 del self[path]
465 # Be consistent and convert to a list
466 if not isinstance(includes, list):
467 includes = [includes]
469 # Read each file assuming it is a reference to a file
470 # The file can be relative to config file or cwd
471 # ConfigSubset search paths are not used
472 subConfigs = []
473 for fileName in includes:
474 # Expand any shell variables -- this could be URI
475 fileName = ResourcePath(
476 os.path.expandvars(fileName), forceAbsolute=False, forceDirectory=False
477 )
478 found = None
479 if fileName.isabs():
480 found = fileName
481 else:
482 for dir in searchPaths:
483 specific = dir.join(fileName.path)
484 # Remote resource check might be expensive
485 if specific.exists():
486 found = specific
487 break
488 if not found:
489 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
491 # Read the referenced Config as a Config
492 subConfigs.append(type(self)(found))
494 # Now we need to merge these sub configs with the current
495 # information that was present in this node in the config
496 # tree with precedence given to the explicit values
497 newConfig = subConfigs.pop(0)
498 for sc in subConfigs:
499 newConfig.update(sc)
501 # Explicit values take precedence
502 if not basePath:
503 # This is an include at the root config
504 newConfig.update(self)
505 # Replace the current config
506 self._data = newConfig._data
507 else:
508 newConfig.update(self[basePath])
509 # And reattach to the base config
510 self[basePath] = newConfig
512 @staticmethod
513 def _splitIntoKeys(key: _ConfigKey) -> list[str | int]:
514 r"""Split the argument for get/set/in into a hierarchical list.
516 Parameters
517 ----------
518 key : `str` or iterable
519 Argument given to get/set/in. If an iterable is provided it will
520 be converted to a list. If the first character of the string
521 is not an alphanumeric character then it will be used as the
522 delimiter for the purposes of splitting the remainder of the
523 string. If the delimiter is also in one of the keys then it
524 can be escaped using ``\``. There is no default delimiter.
526 Returns
527 -------
528 keys : `list`
529 Hierarchical keys as a `list`.
530 """
531 if isinstance(key, str):
532 if not key[0].isalnum(): 532 ↛ 533line 532 didn't jump to line 533, because the condition on line 532 was never true
533 d = key[0]
534 key = key[1:]
535 else:
536 return [
537 key,
538 ]
539 escaped = f"\\{d}"
540 temp = None
541 if escaped in key:
542 # Complain at the attempt to escape the escape
543 doubled = rf"\{escaped}"
544 if doubled in key:
545 raise ValueError(
546 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
547 )
548 # Replace with a character that won't be in the string
549 temp = "\r"
550 if temp in key or d == temp:
551 raise ValueError(
552 f"Can not use character {temp!r} in hierarchical key or as"
553 " delimiter if escaping the delimiter"
554 )
555 key = key.replace(escaped, temp)
556 hierarchy = key.split(d)
557 if temp:
558 hierarchy = [h.replace(temp, d) for h in hierarchy]
559 # Copy the list to keep mypy quiet.
560 return list(hierarchy)
561 elif isinstance(key, Iterable): 561 ↛ 565line 561 didn't jump to line 565, because the condition on line 561 was never false
562 return list(key)
563 else:
564 # Do not try to guess.
565 raise TypeError(f"Provided key [{key}] neither str nor iterable.")
567 def _getKeyHierarchy(self, name: _ConfigKey) -> list[str | int]:
568 """Retrieve the key hierarchy for accessing the Config.
570 Parameters
571 ----------
572 name : `str` or `tuple`
573 Delimited string or `tuple` of hierarchical keys.
575 Returns
576 -------
577 hierarchy : `list` of `str`
578 Hierarchy to use as a `list`. If the name is available directly
579 as a key in the Config it will be used regardless of the presence
580 of any nominal delimiter.
581 """
582 keys: list[str | int]
583 if name in self._data:
584 keys = [cast(str, name)]
585 else:
586 keys = self._splitIntoKeys(name)
587 return keys
589 def _findInHierarchy(self, keys: Sequence[str | int], create: bool = False) -> tuple[list[Any], bool]:
590 """Look for hierarchy of keys in Config.
592 Parameters
593 ----------
594 keys : `list` or `tuple`
595 Keys to search in hierarchy.
596 create : `bool`, optional
597 If `True`, if a part of the hierarchy does not exist, insert an
598 empty `dict` into the hierarchy.
600 Returns
601 -------
602 hierarchy : `list`
603 List of the value corresponding to each key in the supplied
604 hierarchy. Only keys that exist in the hierarchy will have
605 a value.
606 complete : `bool`
607 `True` if the full hierarchy exists and the final element
608 in ``hierarchy`` is the value of relevant value.
609 """
610 d: Any = self._data
612 # For the first key, d must be a dict so it is a waste
613 # of time to check for a sequence.
614 must_be_dict = True
616 hierarchy = []
617 complete = True
618 for k in keys:
619 d, isThere = _checkNextItem(k, d, create, must_be_dict)
620 if isThere:
621 hierarchy.append(d)
622 else:
623 complete = False
624 break
625 # Second time round it might be a sequence.
626 must_be_dict = False
628 return hierarchy, complete
630 def __getitem__(self, name: _ConfigKey) -> Any:
631 # Override the split for the simple case where there is an exact
632 # match. This allows `Config.items()` to work via a simple
633 # __iter__ implementation that returns top level keys of
634 # self._data.
636 # If the name matches a key in the top-level hierarchy, bypass
637 # all further cleverness.
638 found_directly = False
639 try:
640 if isinstance(name, str): 640 ↛ 646line 640 didn't jump to line 646, because the condition on line 640 was never false
641 data = self._data[name]
642 found_directly = True
643 except KeyError:
644 pass
646 if not found_directly: 646 ↛ 647line 646 didn't jump to line 647, because the condition on line 646 was never true
647 keys = self._getKeyHierarchy(name)
649 hierarchy, complete = self._findInHierarchy(keys)
650 if not complete:
651 raise KeyError(f"{name} not found")
652 data = hierarchy[-1]
654 # In most cases we have a dict, and it's more efficient
655 # to check for a dict instance before checking the generic mapping.
656 if isinstance(data, dict | Mapping):
657 data = Config(data)
658 # Ensure that child configs inherit the parent internal delimiter
659 if self._D != Config._D: 659 ↛ 660line 659 didn't jump to line 660, because the condition on line 659 was never true
660 data._D = self._D
661 return data
663 def __setitem__(self, name: _ConfigKey, value: Any) -> None:
664 keys = self._getKeyHierarchy(name)
665 last = keys.pop()
666 if isinstance(value, Config):
667 value = copy.deepcopy(value._data)
669 hierarchy, complete = self._findInHierarchy(keys, create=True)
670 if hierarchy:
671 data = hierarchy[-1]
672 else:
673 data = self._data
675 try:
676 data[last] = value
677 except TypeError:
678 data[int(last)] = value
680 def __contains__(self, key: Any) -> bool:
681 if not isinstance(key, str | Sequence): 681 ↛ 682line 681 didn't jump to line 682, because the condition on line 681 was never true
682 return False
683 keys = self._getKeyHierarchy(key)
684 hierarchy, complete = self._findInHierarchy(keys)
685 return complete
687 def __delitem__(self, key: str | Sequence[str]) -> None:
688 keys = self._getKeyHierarchy(key)
689 last = keys.pop()
690 hierarchy, complete = self._findInHierarchy(keys)
691 if complete: 691 ↛ 698line 691 didn't jump to line 698, because the condition on line 691 was never false
692 if hierarchy: 692 ↛ 693line 692 didn't jump to line 693, because the condition on line 692 was never true
693 data = hierarchy[-1]
694 else:
695 data = self._data
696 del data[last]
697 else:
698 raise KeyError(f"{key} not found in Config")
700 def update(self, other: Mapping[str, Any]) -> None: # type: ignore[override]
701 """Update config from other `Config` or `dict`.
703 Like `dict.update()`, but will add or modify keys in nested dicts,
704 instead of overwriting the nested dict entirely.
706 Parameters
707 ----------
708 other : `dict` or `Config`
709 Source of configuration.
711 Examples
712 --------
713 >>> c = Config({"a": {"b": 1}})
714 >>> c.update({"a": {"c": 2}})
715 >>> print(c)
716 {'a': {'b': 1, 'c': 2}}
718 >>> foo = {"a": {"b": 1}}
719 >>> foo.update({"a": {"c": 2}})
720 >>> print(foo)
721 {'a': {'c': 2}}
722 """
723 _doUpdate(self._data, other)
725 def merge(self, other: Mapping) -> None:
726 """Merge another Config into this one.
728 Like `Config.update()`, but will add keys & values from other that
729 DO NOT EXIST in self.
731 Keys and values that already exist in self will NOT be overwritten.
733 Parameters
734 ----------
735 other : `dict` or `Config`
736 Source of configuration.
737 """
738 if not isinstance(other, Mapping):
739 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
741 # Convert the supplied mapping to a Config for consistency
742 # This will do a deepcopy if it is already a Config
743 otherCopy = Config(other)
744 otherCopy.update(self)
745 self._data = otherCopy._data
747 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
748 """Get tuples representing the name hierarchies of all keys.
750 The tuples returned from this method are guaranteed to be usable
751 to access items in the configuration object.
753 Parameters
754 ----------
755 topLevelOnly : `bool`, optional
756 If False, the default, a full hierarchy of names is returned.
757 If True, only the top level are returned.
759 Returns
760 -------
761 names : `list` of `tuple` of `str`
762 List of all names present in the `Config` where each element
763 in the list is a `tuple` of strings representing the hierarchy.
764 """
765 if topLevelOnly: 765 ↛ 766line 765 didn't jump to line 766, because the condition on line 765 was never true
766 return [(k,) for k in self]
768 def getKeysAsTuples(
769 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
770 ) -> None:
771 if isinstance(d, Sequence):
772 theseKeys: Iterable[Any] = range(len(d))
773 else:
774 theseKeys = d.keys()
775 for key in theseKeys:
776 val = d[key]
777 levelKey = base + (key,) if base is not None else (key,)
778 keys.append(levelKey)
779 if isinstance(val, Mapping | Sequence) and not isinstance(val, str):
780 getKeysAsTuples(val, keys, levelKey)
782 keys: list[tuple[str, ...]] = []
783 getKeysAsTuples(self._data, keys, None)
784 return keys
786 def names(self, topLevelOnly: bool = False, delimiter: str | None = None) -> list[str]:
787 """Get a delimited name of all the keys in the hierarchy.
789 The values returned from this method are guaranteed to be usable
790 to access items in the configuration object.
792 Parameters
793 ----------
794 topLevelOnly : `bool`, optional
795 If False, the default, a full hierarchy of names is returned.
796 If True, only the top level are returned.
797 delimiter : `str`, optional
798 Delimiter to use when forming the keys. If the delimiter is
799 present in any of the keys, it will be escaped in the returned
800 names. If `None` given a delimiter will be automatically provided.
801 The delimiter can not be alphanumeric.
803 Returns
804 -------
805 names : `list` of `str`
806 List of all names present in the `Config`.
808 Notes
809 -----
810 This is different than the built-in method `dict.keys`, which will
811 return only the first level keys.
813 Raises
814 ------
815 ValueError:
816 The supplied delimiter is alphanumeric.
817 """
818 if topLevelOnly:
819 return list(self.keys())
821 # Get all the tuples of hierarchical keys
822 nameTuples = self.nameTuples()
824 if delimiter is not None and delimiter.isalnum():
825 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
827 if delimiter is None:
828 # Start with something, and ensure it does not need to be
829 # escaped (it is much easier to understand if not escaped)
830 delimiter = self._D
832 # Form big string for easy check of delimiter clash
833 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
835 # Try a delimiter and keep trying until we get something that
836 # works.
837 ntries = 0
838 while delimiter in combined:
839 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
840 ntries += 1
842 if ntries > 100:
843 raise ValueError(f"Unable to determine a delimiter for Config {self}")
845 # try another one
846 while True:
847 delimiter = chr(ord(delimiter) + 1)
848 if not delimiter.isalnum():
849 break
851 log.debug("Using delimiter %r", delimiter)
853 # Form the keys, escaping the delimiter if necessary
854 strings = [
855 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
856 for k in nameTuples
857 ]
858 return strings
860 def asArray(self, name: str | Sequence[str]) -> Sequence[Any]:
861 """Get a value as an array.
863 May contain one or more elements.
865 Parameters
866 ----------
867 name : `str`
868 Key to use to retrieve value.
870 Returns
871 -------
872 array : `collections.abc.Sequence`
873 The value corresponding to name, but guaranteed to be returned
874 as a list with at least one element. If the value is a
875 `~collections.abc.Sequence` (and not a `str`) the value itself
876 will be returned, else the value will be the first element.
877 """
878 val = self.get(name)
879 if isinstance(val, str) or not isinstance(val, Sequence):
880 val = [val]
881 return val
883 def __eq__(self, other: Any) -> bool:
884 if isinstance(other, Config):
885 other = other._data
886 return self._data == other
888 def __ne__(self, other: Any) -> bool:
889 if isinstance(other, Config):
890 other = other._data
891 return self._data != other
893 #######
894 # i/o #
896 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
897 """Write the config to an output stream.
899 Parameters
900 ----------
901 output : `IO`, optional
902 The stream to use for output. If `None` the serialized content
903 will be returned.
904 format : `str`, optional
905 The format to use for the output. Can be "yaml" or "json".
907 Returns
908 -------
909 serialized : `str` or `None`
910 If a stream was given the stream will be used and the return
911 value will be `None`. If the stream was `None` the
912 serialization will be returned as a string.
913 """
914 if format == "yaml":
915 return yaml.safe_dump(self._data, output, default_flow_style=False)
916 elif format == "json":
917 if output is not None:
918 json.dump(self._data, output, ensure_ascii=False)
919 return None
920 else:
921 return json.dumps(self._data, ensure_ascii=False)
922 raise ValueError(f"Unsupported format for Config serialization: {format}")
924 def dumpToUri(
925 self,
926 uri: ResourcePathExpression,
927 updateFile: bool = True,
928 defaultFileName: str = "butler.yaml",
929 overwrite: bool = True,
930 ) -> None:
931 """Write the config to location pointed to by given URI.
933 Currently supports 's3' and 'file' URI schemes.
935 Parameters
936 ----------
937 uri : `lsst.resources.ResourcePathExpression`
938 URI of location where the Config will be written.
939 updateFile : bool, optional
940 If True and uri does not end on a filename with extension, will
941 append `defaultFileName` to the target uri. True by default.
942 defaultFileName : bool, optional
943 The file name that will be appended to target uri if updateFile is
944 True and uri does not end on a file with an extension.
945 overwrite : bool, optional
946 If True the configuration will be written even if it already
947 exists at that location.
948 """
949 # Make local copy of URI or create new one
950 uri = ResourcePath(uri)
952 if updateFile and not uri.getExtension():
953 if uri.isdir():
954 uri = uri.join(defaultFileName, forceDirectory=False)
955 else:
956 uri = uri.updatedFile(defaultFileName)
958 # Try to work out the format from the extension
959 ext = uri.getExtension()
960 format = ext[1:].lower()
962 output = self.dump(format=format)
963 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
964 uri.write(output.encode(), overwrite=overwrite)
965 self.configFile = uri
967 @staticmethod
968 def updateParameters(
969 configType: type[ConfigSubset],
970 config: Config,
971 full: Config,
972 toUpdate: dict[str, Any] | None = None,
973 toCopy: Sequence[str | Sequence[str]] | None = None,
974 overwrite: bool = True,
975 toMerge: Sequence[str | Sequence[str]] | None = None,
976 ) -> None:
977 """Update specific config parameters.
979 Allows for named parameters to be set to new values in bulk, and
980 for other values to be set by copying from a reference config.
982 Assumes that the supplied config is compatible with ``configType``
983 and will attach the updated values to the supplied config by
984 looking for the related component key. It is assumed that
985 ``config`` and ``full`` are from the same part of the
986 configuration hierarchy.
988 Parameters
989 ----------
990 configType : `ConfigSubset`
991 Config type to use to extract relevant items from ``config``.
992 config : `Config`
993 A `Config` to update. Only the subset understood by
994 the supplied `ConfigSubset` will be modified. Default values
995 will not be inserted and the content will not be validated
996 since mandatory keys are allowed to be missing until
997 populated later by merging.
998 full : `Config`
999 A complete config with all defaults expanded that can be
1000 converted to a ``configType``. Read-only and will not be
1001 modified by this method. Values are read from here if
1002 ``toCopy`` is defined.
1004 Repository-specific options that should not be obtained
1005 from defaults when Butler instances are constructed
1006 should be copied from ``full`` to ``config``.
1007 toUpdate : `dict`, optional
1008 A `dict` defining the keys to update and the new value to use.
1009 The keys and values can be any supported by `Config`
1010 assignment.
1011 toCopy : `tuple`, optional
1012 `tuple` of keys whose values should be copied from ``full``
1013 into ``config``.
1014 overwrite : `bool`, optional
1015 If `False`, do not modify a value in ``config`` if the key
1016 already exists. Default is always to overwrite.
1017 toMerge : `tuple`, optional
1018 Keys to merge content from full to config without overwriting
1019 pre-existing values. Only works if the key refers to a hierarchy.
1020 The ``overwrite`` flag is ignored.
1022 Raises
1023 ------
1024 ValueError
1025 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1026 """
1027 if toUpdate is None and toCopy is None and toMerge is None:
1028 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1030 # If this is a parent configuration then we need to ensure that
1031 # the supplied config has the relevant component key in it.
1032 # If this is a parent configuration we add in the stub entry
1033 # so that the ConfigSubset constructor will do the right thing.
1034 # We check full for this since that is guaranteed to be complete.
1035 if (
1036 configType.component is not None
1037 and configType.component in full
1038 and configType.component not in config
1039 ):
1040 config[configType.component] = {}
1042 # Extract the part of the config we wish to update
1043 localConfig = configType(config, mergeDefaults=False, validate=False)
1045 key: str | Sequence[str]
1046 if toUpdate:
1047 for key, value in toUpdate.items():
1048 if key in localConfig and not overwrite:
1049 log.debug(
1050 "Not overriding key '%s' with value '%s' in config %s",
1051 key,
1052 value,
1053 localConfig.__class__.__name__,
1054 )
1055 else:
1056 localConfig[key] = value
1058 if toCopy or toMerge:
1059 localFullConfig = configType(full, mergeDefaults=False)
1061 if toCopy:
1062 for key in toCopy:
1063 if key in localConfig and not overwrite:
1064 log.debug(
1065 "Not overriding key '%s' from defaults in config %s",
1066 key,
1067 localConfig.__class__.__name__,
1068 )
1069 else:
1070 localConfig[key] = localFullConfig[key]
1071 if toMerge:
1072 for key in toMerge:
1073 if key in localConfig:
1074 # Get the node from the config to do the merge
1075 # but then have to reattach to the config.
1076 subset = localConfig[key]
1077 subset.merge(localFullConfig[key])
1078 localConfig[key] = subset
1079 else:
1080 localConfig[key] = localFullConfig[key]
1082 # Reattach to parent if this is a child config
1083 if configType.component is not None and configType.component in config:
1084 config[configType.component] = localConfig
1085 else:
1086 config.update(localConfig)
1088 def toDict(self) -> dict[str, Any]:
1089 """Convert a `Config` to a standalone hierarchical `dict`.
1091 Returns
1092 -------
1093 d : `dict`
1094 The standalone hierarchical `dict` with any `Config` classes
1095 in the hierarchy converted to `dict`.
1097 Notes
1098 -----
1099 This can be useful when passing a Config to some code that
1100 expects native Python types.
1101 """
1102 output = copy.deepcopy(self._data)
1103 for k, v in output.items():
1104 if isinstance(v, Config): 1104 ↛ 1105line 1104 didn't jump to line 1105, because the condition on line 1104 was never true
1105 v = v.toDict()
1106 output[k] = v
1107 return output
1110class ConfigSubset(Config):
1111 """Config representing a subset of a more general configuration.
1113 Subclasses define their own component and when given a configuration
1114 that includes that component, the resulting configuration only includes
1115 the subset. For example, your config might contain ``dimensions`` if it's
1116 part of a global config and that subset will be stored. If ``dimensions``
1117 can not be found it is assumed that the entire contents of the
1118 configuration should be used.
1120 Default values are read from the environment or supplied search paths
1121 using the default configuration file name specified in the subclass.
1122 This allows a configuration class to be instantiated without any
1123 additional arguments.
1125 Additional validation can be specified to check for keys that are mandatory
1126 in the configuration.
1128 Parameters
1129 ----------
1130 other : `Config` or `~lsst.resources.ResourcePathExpression` or `dict`
1131 Argument specifying the configuration information as understood
1132 by `Config`.
1133 validate : `bool`, optional
1134 If `True` required keys will be checked to ensure configuration
1135 consistency.
1136 mergeDefaults : `bool`, optional
1137 If `True` defaults will be read and the supplied config will
1138 be combined with the defaults, with the supplied values taking
1139 precedence.
1140 searchPaths : `list` or `tuple`, optional
1141 Explicit additional paths to search for defaults. They should
1142 be supplied in priority order. These paths have higher priority
1143 than those read from the environment in
1144 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1145 the local file system or URIs, `lsst.resources.ResourcePath`.
1146 """
1148 component: ClassVar[str | None] = None
1149 """Component to use from supplied config. Can be None. If specified the
1150 key is not required. Can be a full dot-separated path to a component.
1151 """
1153 requiredKeys: ClassVar[Sequence[str]] = ()
1154 """Keys that are required to be specified in the configuration.
1155 """
1157 defaultConfigFile: ClassVar[str | None] = None
1158 """Name of the file containing defaults for this config class.
1159 """
1161 def __init__(
1162 self,
1163 other: Config | ResourcePathExpression | Mapping[str, Any] | None = None,
1164 validate: bool = True,
1165 mergeDefaults: bool = True,
1166 searchPaths: Sequence[ResourcePathExpression] | None = None,
1167 ):
1168 # Create a blank object to receive the defaults
1169 # Once we have the defaults we then update with the external values
1170 super().__init__()
1172 # Create a standard Config rather than subset
1173 externalConfig = Config(other)
1175 # Select the part we need from it
1176 # To simplify the use of !include we also check for the existence of
1177 # component.component (since the included files can themselves
1178 # include the component name)
1179 if self.component is not None: 1179 ↛ 1188line 1179 didn't jump to line 1188, because the condition on line 1179 was never false
1180 doubled = (self.component, self.component)
1181 # Must check for double depth first
1182 if doubled in externalConfig: 1182 ↛ 1183line 1182 didn't jump to line 1183, because the condition on line 1182 was never true
1183 externalConfig = externalConfig[doubled]
1184 elif self.component in externalConfig:
1185 externalConfig._data = externalConfig._data[self.component]
1187 # Default files read to create this configuration
1188 self.filesRead: list[ResourcePath | str] = []
1190 # Assume we are not looking up child configurations
1191 containerKey = None
1193 # Sometimes we do not want to merge with defaults.
1194 if mergeDefaults:
1195 # Supplied search paths have highest priority
1196 fullSearchPath: list[ResourcePath | str] = []
1197 if searchPaths: 1197 ↛ 1198line 1197 didn't jump to line 1198, because the condition on line 1197 was never true
1198 fullSearchPath = [ResourcePath(path, forceDirectory=True) for path in searchPaths]
1200 # Read default paths from environment
1201 fullSearchPath.extend(self.defaultSearchPaths())
1203 # There are two places to find defaults for this particular config
1204 # - The "defaultConfigFile" defined in the subclass
1205 # - The class specified in the "cls" element in the config.
1206 # Read cls after merging in case it changes.
1207 if self.defaultConfigFile is not None: 1207 ↛ 1212line 1207 didn't jump to line 1212, because the condition on line 1207 was never false
1208 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1210 # Can have a class specification in the external config (priority)
1211 # or from the defaults.
1212 pytype = None
1213 if "cls" in externalConfig: 1213 ↛ 1214line 1213 didn't jump to line 1214, because the condition on line 1213 was never true
1214 pytype = externalConfig["cls"]
1215 elif "cls" in self: 1215 ↛ 1216line 1215 didn't jump to line 1216, because the condition on line 1215 was never true
1216 pytype = self["cls"]
1218 if pytype is not None: 1218 ↛ 1219line 1218 didn't jump to line 1219, because the condition on line 1218 was never true
1219 try:
1220 cls = doImportType(pytype)
1221 except ImportError as e:
1222 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1223 # The class referenced from the config file is not required
1224 # to specify a default config file.
1225 defaultsFile = getattr(cls, "defaultConfigFile", None)
1226 if defaultsFile is not None:
1227 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1229 # Get the container key in case we need it and it is specified.
1230 containerKey = getattr(cls, "containerKey", None)
1232 # Now update this object with the external values so that the external
1233 # values always override the defaults
1234 self.update(externalConfig)
1235 if not self.configFile: 1235 ↛ 1241line 1235 didn't jump to line 1241, because the condition on line 1235 was never false
1236 self.configFile = externalConfig.configFile
1238 # If this configuration has child configurations of the same
1239 # config class, we need to expand those defaults as well.
1241 if mergeDefaults and containerKey is not None and containerKey in self: 1241 ↛ 1242line 1241 didn't jump to line 1242, because the condition on line 1241 was never true
1242 for idx, subConfig in enumerate(self[containerKey]):
1243 self[containerKey, idx] = type(self)(
1244 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1245 )
1247 if validate:
1248 self.validate()
1250 @classmethod
1251 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1252 """Read environment to determine search paths to use.
1254 Global defaults, at lowest priority, are found in the ``config``
1255 directory of the butler source tree. Additional defaults can be
1256 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1257 which is a PATH-like variable where paths at the front of the list
1258 have priority over those later.
1260 Returns
1261 -------
1262 paths : `list`
1263 Returns a list of paths to search. The returned order is in
1264 priority with the highest priority paths first. The butler config
1265 configuration resources will not be included here but will
1266 always be searched last.
1268 Notes
1269 -----
1270 The environment variable is split on the standard ``:`` path separator.
1271 This currently makes it incompatible with usage of URIs.
1272 """
1273 # We can pick up defaults from multiple search paths
1274 # We fill defaults by using the butler config path and then
1275 # the config path environment variable in reverse order.
1276 defaultsPaths: list[str | ResourcePath] = []
1278 if CONFIG_PATH in os.environ: 1278 ↛ 1279line 1278 didn't jump to line 1279, because the condition on line 1278 was never true
1279 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1280 defaultsPaths.extend(externalPaths)
1282 # Add the package defaults as a resource
1283 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1284 return defaultsPaths
1286 def _updateWithConfigsFromPath(
1287 self, searchPaths: Sequence[str | ResourcePath], configFile: ResourcePath | str
1288 ) -> None:
1289 """Search the supplied paths, merging the configuration values.
1291 The values read will override values currently stored in the object.
1292 Every file found in the path will be read, such that the earlier
1293 path entries have higher priority.
1295 Parameters
1296 ----------
1297 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1298 Paths to search for the supplied configFile. This path
1299 is the priority order, such that files read from the
1300 first path entry will be selected over those read from
1301 a later path. Can contain `str` referring to the local file
1302 system or a URI string.
1303 configFile : `lsst.resources.ResourcePath`
1304 File to locate in path. If absolute path it will be read
1305 directly and the search path will not be used. Can be a URI
1306 to an explicit resource (which will ignore the search path)
1307 which is assumed to exist.
1308 """
1309 uri = ResourcePath(configFile, forceDirectory=False)
1310 if uri.isabs() and uri.exists(): 1310 ↛ 1312line 1310 didn't jump to line 1312, because the condition on line 1310 was never true
1311 # Assume this resource exists
1312 self._updateWithOtherConfigFile(configFile)
1313 self.filesRead.append(configFile)
1314 else:
1315 # Reverse order so that high priority entries
1316 # update the object last.
1317 for pathDir in reversed(searchPaths):
1318 if isinstance(pathDir, str | ResourcePath): 1318 ↛ 1325line 1318 didn't jump to line 1325, because the condition on line 1318 was never false
1319 pathDir = ResourcePath(pathDir, forceDirectory=True)
1320 file = pathDir.join(configFile)
1321 if file.exists(): 1321 ↛ 1317line 1321 didn't jump to line 1317, because the condition on line 1321 was never false
1322 self.filesRead.append(file)
1323 self._updateWithOtherConfigFile(file)
1324 else:
1325 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1327 def _updateWithOtherConfigFile(self, file: Config | str | ResourcePath | Mapping[str, Any]) -> None:
1328 """Read in some defaults and update.
1330 Update the configuration by reading the supplied file as a config
1331 of this class, and merging such that these values override the
1332 current values. Contents of the external config are not validated.
1334 Parameters
1335 ----------
1336 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1337 Entity that can be converted to a `ConfigSubset`.
1338 """
1339 # Use this class to read the defaults so that subsetting can happen
1340 # correctly.
1341 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1342 self.update(externalConfig)
1344 def validate(self) -> None:
1345 """Check that mandatory keys are present in this configuration.
1347 Ignored if ``requiredKeys`` is empty.
1348 """
1349 # Validation
1350 missing = [k for k in self.requiredKeys if k not in self._data]
1351 if missing: 1351 ↛ 1352line 1351 didn't jump to line 1352, because the condition on line 1351 was never true
1352 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")