Coverage for python/lsst/daf/butler/_config.py: 45%
485 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Configuration control."""
30from __future__ import annotations
32__all__ = ("Config", "ConfigSubset")
34import copy
35import io
36import json
37import logging
38import os
39import pprint
40import sys
41from collections import defaultdict
42from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence
43from pathlib import Path
44from typing import IO, TYPE_CHECKING, Any, ClassVar, cast
46import yaml
47from lsst.resources import ResourcePath, ResourcePathExpression
48from lsst.utils import doImportType
49from yaml.representer import Representer
51yaml.add_representer(defaultdict, Representer.represent_dict)
54# Config module logger
55log = logging.getLogger(__name__)
57# PATH-like environment variable to use for defaults.
58CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
60if TYPE_CHECKING:
61 yamlLoader = yaml.SafeLoader
62else:
63 try:
64 yamlLoader = yaml.CSafeLoader
65 except AttributeError:
66 # Not all installations have the C library
67 # (but assume for mypy's sake that they're the same)
68 yamlLoader = yaml.SafeLoader
71def _doUpdate(d: Mapping[str, Any], u: Mapping[str, Any]) -> Mapping[str, Any]:
72 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true
73 raise RuntimeError(f"Only call update with Mapping, not {type(d)}")
74 for k, v in u.items():
75 if isinstance(v, Mapping):
76 lhs = d.get(k, {})
77 if not isinstance(lhs, Mapping): 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true
78 lhs = {}
79 d[k] = _doUpdate(lhs, v)
80 else:
81 d[k] = v
82 return d
85def _checkNextItem(k: str | int, d: Any, create: bool, must_be_dict: bool) -> tuple[Any, bool]:
86 """See if k is in d and if it is return the new child."""
87 nextVal = None
88 isThere = False
89 if d is None: 89 ↛ 91line 89 didn't jump to line 91, because the condition on line 89 was never true
90 # We have gone past the end of the hierarchy
91 pass
92 elif not must_be_dict and isinstance(d, Sequence): 92 ↛ 97line 92 didn't jump to line 97, because the condition on line 92 was never true
93 # Check for Sequence first because for lists
94 # __contains__ checks whether value is found in list
95 # not whether the index exists in list. When we traverse
96 # the hierarchy we are interested in the index.
97 try:
98 nextVal = d[int(k)]
99 isThere = True
100 except IndexError:
101 pass
102 except ValueError:
103 isThere = k in d
104 elif k in d:
105 nextVal = d[k]
106 isThere = True
107 elif create: 107 ↛ 108line 107 didn't jump to line 108, because the condition on line 107 was never true
108 d[k] = {}
109 nextVal = d[k]
110 isThere = True
112 return nextVal, isThere
115class Loader(yamlLoader):
116 """YAML Loader that supports file include directives.
118 Uses ``!include`` directive in a YAML file to point to another
119 YAML file to be included. The path in the include directive is relative
120 to the file containing that directive.
122 storageClasses: !include storageClasses.yaml
124 Examples
125 --------
126 >>> with open("document.yaml", "r") as f:
127 data = yaml.load(f, Loader=Loader)
129 Notes
130 -----
131 See https://davidchall.github.io/yaml-includes.html
133 Parameters
134 ----------
135 stream : `str` or `io.IO`
136 The stream to parse.
137 """
139 def __init__(self, stream: str | IO): # types-PyYAML annotates 'stream' with a private type
140 super().__init__(stream)
141 # if this is a string and not a stream we may well lack a name
142 if hasattr(stream, "name"): 142 ↛ 146line 142 didn't jump to line 146, because the condition on line 142 was never false
143 self._root = ResourcePath(stream.name)
144 else:
145 # No choice but to assume a local filesystem
146 self._root = ResourcePath("no-file.yaml")
147 self.add_constructor("!include", Loader.include)
149 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
150 result: list[Any] | dict[str, Any]
151 if isinstance(node, yaml.ScalarNode):
152 return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]
154 elif isinstance(node, yaml.SequenceNode):
155 result = []
156 for filename in self.construct_sequence(node):
157 result.append(self.extractFile(filename))
158 return result
160 elif isinstance(node, yaml.MappingNode):
161 result = {}
162 for k, v in self.construct_mapping(node).items():
163 if not isinstance(k, str):
164 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
165 result[k] = self.extractFile(v)
166 return result
168 else:
169 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
170 raise yaml.constructor.ConstructorError
172 def extractFile(self, filename: str) -> Any:
173 # It is possible for the !include to point to an explicit URI
174 # instead of a relative URI, therefore we first see if it is
175 # scheme-less or not. If it has a scheme we use it directly
176 # if it is scheme-less we use it relative to the file root.
177 requesteduri = ResourcePath(filename, forceAbsolute=False)
179 if requesteduri.scheme:
180 fileuri = requesteduri
181 else:
182 fileuri = self._root.updatedFile(filename)
184 log.debug("Opening YAML file via !include: %s", fileuri)
186 # Read all the data from the resource
187 data = fileuri.read()
189 # Store the bytes into a BytesIO so we can attach a .name
190 stream = io.BytesIO(data)
191 stream.name = fileuri.geturl()
192 return yaml.load(stream, Loader)
195# Type of the key used for accessing items in configuration object. It can be
196# a single string as described below or a sequence of srtings and integer
197# indices. Indices are used to access items in sequences stored in config.
198_ConfigKey = str | Sequence[str | int]
201class Config(MutableMapping):
202 r"""Implements a datatype that is used by `Butler` for configuration.
204 It is essentially a `dict` with key/value pairs, including nested dicts
205 (as values). In fact, it can be initialized with a `dict`.
206 This is explained next:
208 Config extends the `dict` api so that hierarchical values may be accessed
209 with delimited notation or as a tuple. If a string is given the delimiter
210 is picked up from the first character in that string. For example,
211 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
212 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
213 If the first character is alphanumeric, no delimiter will be used.
214 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
215 Unicode characters can be used as the delimiter for distinctiveness if
216 required.
218 If a key in the hierarchy starts with a non-alphanumeric character care
219 should be used to ensure that either the tuple interface is used or
220 a distinct delimiter is always given in string form.
222 Finally, the delimiter can be escaped if it is part of a key and also
223 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
224 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
225 always better to use a different delimiter in these cases.
227 Note that adding a multi-level key implicitly creates any nesting levels
228 that do not exist, but removing multi-level keys does not automatically
229 remove empty nesting levels. As a result:
231 >>> c = Config()
232 >>> c[".a.b"] = 1
233 >>> del c[".a.b"]
234 >>> c["a"]
235 Config({'a': {}})
237 Storage formats supported:
239 - yaml: read and write is supported.
240 - json: read and write is supported but no ``!include`` directive.
242 Parameters
243 ----------
244 other : `lsst.resources.ResourcePath` or `Config` or `dict`
245 Other source of configuration, can be:
247 - (`lsst.resources.ResourcePathExpression`)
248 Treated as a URI to a config file. Must end with ".yaml".
249 - (`Config`) Copies the other Config's values into this one.
250 - (`dict`) Copies the values from the dict into this Config.
252 If `None` is provided an empty `Config` will be created.
253 """
255 _D: str = "→"
256 """Default internal delimiter to use for components in the hierarchy when
257 constructing keys for external use (see `Config.names()`)."""
259 includeKey: ClassVar[str] = "includeConfigs"
260 """Key used to indicate that another config should be included at this
261 part of the hierarchy."""
263 resourcesPackage: str = "lsst.daf.butler"
264 """Package to search for default configuration data. The resources
265 themselves will be within a ``configs`` resource hierarchy."""
267 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
268 self._data: dict[str, Any] = {}
269 self.configFile: ResourcePath | None = None
271 if other is None:
272 return
274 if isinstance(other, Config):
275 # Deep copy might be more efficient but if someone has overridden
276 # a config entry to store a complex object then deep copy may
277 # fail. Safer to use update().
278 self.update(other._data)
279 self.configFile = other.configFile
280 elif isinstance(other, dict | Mapping):
281 # In most cases we have a dict, and it's more efficient
282 # to check for a dict instance before checking the generic mapping.
283 self.update(other)
284 elif isinstance(other, str | ResourcePath | Path): 284 ↛ 291line 284 didn't jump to line 291, because the condition on line 284 was never false
285 # if other is a string, assume it is a file path/URI
286 self.__initFromUri(other)
287 self._processExplicitIncludes()
288 else:
289 # if the config specified by other could not be recognized raise
290 # a runtime error.
291 raise RuntimeError(f"A Config could not be loaded from other: {other}")
293 def ppprint(self) -> str:
294 """Return config as formatted readable string.
296 Examples
297 --------
298 use: ``pdb> print(myConfigObject.ppprint())``
300 Returns
301 -------
302 s : `str`
303 A prettyprint formatted string representing the config.
304 """
305 return pprint.pformat(self._data, indent=2, width=1)
307 def __repr__(self) -> str:
308 return f"{type(self).__name__}({self._data!r})"
310 def __str__(self) -> str:
311 return self.ppprint()
313 def __len__(self) -> int:
314 return len(self._data)
316 def __iter__(self) -> Iterator[str]:
317 return iter(self._data)
319 def copy(self) -> Config:
320 return type(self)(self)
322 @classmethod
323 def fromString(cls, string: str, format: str = "yaml") -> Config:
324 """Create a new Config instance from a serialized string.
326 Parameters
327 ----------
328 string : `str`
329 String containing content in specified format.
330 format : `str`, optional
331 Format of the supplied string. Can be ``json`` or ``yaml``.
333 Returns
334 -------
335 c : `Config`
336 Newly-constructed Config.
337 """
338 if format == "yaml":
339 new_config = cls().__initFromYaml(string)
340 elif format == "json":
341 new_config = cls().__initFromJson(string)
342 else:
343 raise ValueError(f"Unexpected format of string: {format}")
344 new_config._processExplicitIncludes()
345 return new_config
347 @classmethod
348 def fromYaml(cls, string: str) -> Config:
349 """Create a new Config instance from a YAML string.
351 Parameters
352 ----------
353 string : `str`
354 String containing content in YAML format.
356 Returns
357 -------
358 c : `Config`
359 Newly-constructed Config.
360 """
361 return cls.fromString(string, format="yaml")
363 def __initFromUri(self, path: ResourcePathExpression) -> None:
364 """Load a file from a path or an URI.
366 Parameters
367 ----------
368 path : `lsst.resources.ResourcePathExpression`
369 Path or a URI to a persisted config file.
370 """
371 uri = ResourcePath(path)
372 ext = uri.getExtension()
373 if ext == ".yaml": 373 ↛ 380line 373 didn't jump to line 380, because the condition on line 373 was never false
374 log.debug("Opening YAML config file: %s", uri.geturl())
375 content = uri.read()
376 # Use a stream so we can name it
377 stream = io.BytesIO(content)
378 stream.name = uri.geturl()
379 self.__initFromYaml(stream)
380 elif ext == ".json":
381 log.debug("Opening JSON config file: %s", uri.geturl())
382 content = uri.read()
383 self.__initFromJson(content)
384 else:
385 # This URI does not have a valid extension. It might be because
386 # we ended up with a directory and not a file. Before we complain
387 # about an extension, do an existence check. No need to do
388 # the (possibly expensive) existence check in the default code
389 # path above because we will find out soon enough that the file
390 # is not there.
391 if not uri.exists():
392 raise FileNotFoundError(f"Config location {uri} does not exist.")
393 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
394 self.configFile = uri
396 def __initFromYaml(self, stream: IO | str | bytes) -> Config:
397 """Load a YAML config from any readable stream that contains one.
399 Parameters
400 ----------
401 stream : `IO` or `str`
402 Stream to pass to the YAML loader. Accepts anything that
403 `yaml.load` accepts. This can include a string as well as an
404 IO stream.
406 Raises
407 ------
408 yaml.YAMLError
409 If there is an error loading the file.
410 """
411 content = yaml.load(stream, Loader=Loader)
412 if content is None: 412 ↛ 413line 412 didn't jump to line 413, because the condition on line 412 was never true
413 content = {}
414 self._data = content
415 return self
417 def __initFromJson(self, stream: IO | str | bytes) -> Config:
418 """Load a JSON config from any readable stream that contains one.
420 Parameters
421 ----------
422 stream : `IO` or `str`
423 Stream to pass to the JSON loader. This can include a string as
424 well as an IO stream.
426 Raises
427 ------
428 TypeError:
429 Raised if there is an error loading the content.
430 """
431 if isinstance(stream, bytes | str):
432 content = json.loads(stream)
433 else:
434 content = json.load(stream)
435 if content is None:
436 content = {}
437 self._data = content
438 return self
440 def _processExplicitIncludes(self) -> None:
441 """Scan through the configuration searching for the special includes.
443 Looks for ``includeConfigs`` directive and processes the includes.
444 """
445 # Search paths for config files
446 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
447 if self.configFile is not None: 447 ↛ 455line 447 didn't jump to line 455, because the condition on line 447 was never false
448 if isinstance(self.configFile, ResourcePath): 448 ↛ 451line 448 didn't jump to line 451, because the condition on line 448 was never false
449 configDir = self.configFile.dirname()
450 else:
451 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
452 searchPaths.append(configDir)
454 # Ensure we know what delimiter to use
455 names = self.nameTuples()
456 for path in names:
457 if path[-1] == self.includeKey: 457 ↛ 458line 457 didn't jump to line 458, because the condition on line 457 was never true
458 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
459 basePath = path[:-1]
461 # Extract the includes and then delete them from the config
462 includes = self[path]
463 del self[path]
465 # Be consistent and convert to a list
466 if not isinstance(includes, list):
467 includes = [includes]
469 # Read each file assuming it is a reference to a file
470 # The file can be relative to config file or cwd
471 # ConfigSubset search paths are not used
472 subConfigs = []
473 for fileName in includes:
474 # Expand any shell variables -- this could be URI
475 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False)
476 found = None
477 if fileName.isabs():
478 found = fileName
479 else:
480 for dir in searchPaths:
481 specific = dir.join(fileName.path)
482 # Remote resource check might be expensive
483 if specific.exists():
484 found = specific
485 break
486 if not found:
487 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
489 # Read the referenced Config as a Config
490 subConfigs.append(type(self)(found))
492 # Now we need to merge these sub configs with the current
493 # information that was present in this node in the config
494 # tree with precedence given to the explicit values
495 newConfig = subConfigs.pop(0)
496 for sc in subConfigs:
497 newConfig.update(sc)
499 # Explicit values take precedence
500 if not basePath:
501 # This is an include at the root config
502 newConfig.update(self)
503 # Replace the current config
504 self._data = newConfig._data
505 else:
506 newConfig.update(self[basePath])
507 # And reattach to the base config
508 self[basePath] = newConfig
510 @staticmethod
511 def _splitIntoKeys(key: _ConfigKey) -> list[str | int]:
512 r"""Split the argument for get/set/in into a hierarchical list.
514 Parameters
515 ----------
516 key : `str` or iterable
517 Argument given to get/set/in. If an iterable is provided it will
518 be converted to a list. If the first character of the string
519 is not an alphanumeric character then it will be used as the
520 delimiter for the purposes of splitting the remainder of the
521 string. If the delimiter is also in one of the keys then it
522 can be escaped using ``\``. There is no default delimiter.
524 Returns
525 -------
526 keys : `list`
527 Hierarchical keys as a `list`.
528 """
529 if isinstance(key, str):
530 if not key[0].isalnum(): 530 ↛ 531line 530 didn't jump to line 531, because the condition on line 530 was never true
531 d = key[0]
532 key = key[1:]
533 else:
534 return [
535 key,
536 ]
537 escaped = f"\\{d}"
538 temp = None
539 if escaped in key:
540 # Complain at the attempt to escape the escape
541 doubled = rf"\{escaped}"
542 if doubled in key:
543 raise ValueError(
544 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
545 )
546 # Replace with a character that won't be in the string
547 temp = "\r"
548 if temp in key or d == temp:
549 raise ValueError(
550 f"Can not use character {temp!r} in hierarchical key or as"
551 " delimiter if escaping the delimiter"
552 )
553 key = key.replace(escaped, temp)
554 hierarchy = key.split(d)
555 if temp:
556 hierarchy = [h.replace(temp, d) for h in hierarchy]
557 # Copy the list to keep mypy quiet.
558 return list(hierarchy)
559 elif isinstance(key, Iterable): 559 ↛ 563line 559 didn't jump to line 563, because the condition on line 559 was never false
560 return list(key)
561 else:
562 # Do not try to guess.
563 raise TypeError(f"Provided key [{key}] neither str nor iterable.")
565 def _getKeyHierarchy(self, name: _ConfigKey) -> list[str | int]:
566 """Retrieve the key hierarchy for accessing the Config.
568 Parameters
569 ----------
570 name : `str` or `tuple`
571 Delimited string or `tuple` of hierarchical keys.
573 Returns
574 -------
575 hierarchy : `list` of `str`
576 Hierarchy to use as a `list`. If the name is available directly
577 as a key in the Config it will be used regardless of the presence
578 of any nominal delimiter.
579 """
580 keys: list[str | int]
581 if name in self._data:
582 keys = [cast(str, name)]
583 else:
584 keys = self._splitIntoKeys(name)
585 return keys
587 def _findInHierarchy(self, keys: Sequence[str | int], create: bool = False) -> tuple[list[Any], bool]:
588 """Look for hierarchy of keys in Config.
590 Parameters
591 ----------
592 keys : `list` or `tuple`
593 Keys to search in hierarchy.
594 create : `bool`, optional
595 If `True`, if a part of the hierarchy does not exist, insert an
596 empty `dict` into the hierarchy.
598 Returns
599 -------
600 hierarchy : `list`
601 List of the value corresponding to each key in the supplied
602 hierarchy. Only keys that exist in the hierarchy will have
603 a value.
604 complete : `bool`
605 `True` if the full hierarchy exists and the final element
606 in ``hierarchy`` is the value of relevant value.
607 """
608 d: Any = self._data
610 # For the first key, d must be a dict so it is a waste
611 # of time to check for a sequence.
612 must_be_dict = True
614 hierarchy = []
615 complete = True
616 for k in keys:
617 d, isThere = _checkNextItem(k, d, create, must_be_dict)
618 if isThere:
619 hierarchy.append(d)
620 else:
621 complete = False
622 break
623 # Second time round it might be a sequence.
624 must_be_dict = False
626 return hierarchy, complete
628 def __getitem__(self, name: _ConfigKey) -> Any:
629 # Override the split for the simple case where there is an exact
630 # match. This allows `Config.items()` to work via a simple
631 # __iter__ implementation that returns top level keys of
632 # self._data.
634 # If the name matches a key in the top-level hierarchy, bypass
635 # all further cleverness.
636 found_directly = False
637 try:
638 if isinstance(name, str): 638 ↛ 644line 638 didn't jump to line 644, because the condition on line 638 was never false
639 data = self._data[name]
640 found_directly = True
641 except KeyError:
642 pass
644 if not found_directly: 644 ↛ 645line 644 didn't jump to line 645, because the condition on line 644 was never true
645 keys = self._getKeyHierarchy(name)
647 hierarchy, complete = self._findInHierarchy(keys)
648 if not complete:
649 raise KeyError(f"{name} not found")
650 data = hierarchy[-1]
652 # In most cases we have a dict, and it's more efficient
653 # to check for a dict instance before checking the generic mapping.
654 if isinstance(data, dict | Mapping):
655 data = Config(data)
656 # Ensure that child configs inherit the parent internal delimiter
657 if self._D != Config._D: 657 ↛ 658line 657 didn't jump to line 658, because the condition on line 657 was never true
658 data._D = self._D
659 return data
661 def __setitem__(self, name: _ConfigKey, value: Any) -> None:
662 keys = self._getKeyHierarchy(name)
663 last = keys.pop()
664 if isinstance(value, Config):
665 value = copy.deepcopy(value._data)
667 hierarchy, complete = self._findInHierarchy(keys, create=True)
668 if hierarchy:
669 data = hierarchy[-1]
670 else:
671 data = self._data
673 try:
674 data[last] = value
675 except TypeError:
676 data[int(last)] = value
678 def __contains__(self, key: Any) -> bool:
679 if not isinstance(key, str | Sequence): 679 ↛ 680line 679 didn't jump to line 680, because the condition on line 679 was never true
680 return False
681 keys = self._getKeyHierarchy(key)
682 hierarchy, complete = self._findInHierarchy(keys)
683 return complete
685 def __delitem__(self, key: str | Sequence[str]) -> None:
686 keys = self._getKeyHierarchy(key)
687 last = keys.pop()
688 hierarchy, complete = self._findInHierarchy(keys)
689 if complete: 689 ↛ 696line 689 didn't jump to line 696, because the condition on line 689 was never false
690 if hierarchy: 690 ↛ 691line 690 didn't jump to line 691, because the condition on line 690 was never true
691 data = hierarchy[-1]
692 else:
693 data = self._data
694 del data[last]
695 else:
696 raise KeyError(f"{key} not found in Config")
698 def update(self, other: Mapping[str, Any]) -> None: # type: ignore[override]
699 """Update config from other `Config` or `dict`.
701 Like `dict.update()`, but will add or modify keys in nested dicts,
702 instead of overwriting the nested dict entirely.
704 Parameters
705 ----------
706 other : `dict` or `Config`
707 Source of configuration.
709 Examples
710 --------
711 >>> c = Config({"a": {"b": 1}})
712 >>> c.update({"a": {"c": 2}})
713 >>> print(c)
714 {'a': {'b': 1, 'c': 2}}
716 >>> foo = {"a": {"b": 1}}
717 >>> foo.update({"a": {"c": 2}})
718 >>> print(foo)
719 {'a': {'c': 2}}
720 """
721 _doUpdate(self._data, other)
723 def merge(self, other: Mapping) -> None:
724 """Merge another Config into this one.
726 Like `Config.update()`, but will add keys & values from other that
727 DO NOT EXIST in self.
729 Keys and values that already exist in self will NOT be overwritten.
731 Parameters
732 ----------
733 other : `dict` or `Config`
734 Source of configuration.
735 """
736 if not isinstance(other, Mapping):
737 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
739 # Convert the supplied mapping to a Config for consistency
740 # This will do a deepcopy if it is already a Config
741 otherCopy = Config(other)
742 otherCopy.update(self)
743 self._data = otherCopy._data
745 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
746 """Get tuples representing the name hierarchies of all keys.
748 The tuples returned from this method are guaranteed to be usable
749 to access items in the configuration object.
751 Parameters
752 ----------
753 topLevelOnly : `bool`, optional
754 If False, the default, a full hierarchy of names is returned.
755 If True, only the top level are returned.
757 Returns
758 -------
759 names : `list` of `tuple` of `str`
760 List of all names present in the `Config` where each element
761 in the list is a `tuple` of strings representing the hierarchy.
762 """
763 if topLevelOnly: 763 ↛ 764line 763 didn't jump to line 764, because the condition on line 763 was never true
764 return [(k,) for k in self]
766 def getKeysAsTuples(
767 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
768 ) -> None:
769 if isinstance(d, Sequence):
770 theseKeys: Iterable[Any] = range(len(d))
771 else:
772 theseKeys = d.keys()
773 for key in theseKeys:
774 val = d[key]
775 levelKey = base + (key,) if base is not None else (key,)
776 keys.append(levelKey)
777 if isinstance(val, Mapping | Sequence) and not isinstance(val, str):
778 getKeysAsTuples(val, keys, levelKey)
780 keys: list[tuple[str, ...]] = []
781 getKeysAsTuples(self._data, keys, None)
782 return keys
784 def names(self, topLevelOnly: bool = False, delimiter: str | None = None) -> list[str]:
785 """Get a delimited name of all the keys in the hierarchy.
787 The values returned from this method are guaranteed to be usable
788 to access items in the configuration object.
790 Parameters
791 ----------
792 topLevelOnly : `bool`, optional
793 If False, the default, a full hierarchy of names is returned.
794 If True, only the top level are returned.
795 delimiter : `str`, optional
796 Delimiter to use when forming the keys. If the delimiter is
797 present in any of the keys, it will be escaped in the returned
798 names. If `None` given a delimiter will be automatically provided.
799 The delimiter can not be alphanumeric.
801 Returns
802 -------
803 names : `list` of `str`
804 List of all names present in the `Config`.
806 Notes
807 -----
808 This is different than the built-in method `dict.keys`, which will
809 return only the first level keys.
811 Raises
812 ------
813 ValueError:
814 The supplied delimiter is alphanumeric.
815 """
816 if topLevelOnly:
817 return list(self.keys())
819 # Get all the tuples of hierarchical keys
820 nameTuples = self.nameTuples()
822 if delimiter is not None and delimiter.isalnum():
823 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
825 if delimiter is None:
826 # Start with something, and ensure it does not need to be
827 # escaped (it is much easier to understand if not escaped)
828 delimiter = self._D
830 # Form big string for easy check of delimiter clash
831 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
833 # Try a delimiter and keep trying until we get something that
834 # works.
835 ntries = 0
836 while delimiter in combined:
837 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
838 ntries += 1
840 if ntries > 100:
841 raise ValueError(f"Unable to determine a delimiter for Config {self}")
843 # try another one
844 while True:
845 delimiter = chr(ord(delimiter) + 1)
846 if not delimiter.isalnum():
847 break
849 log.debug("Using delimiter %r", delimiter)
851 # Form the keys, escaping the delimiter if necessary
852 strings = [
853 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
854 for k in nameTuples
855 ]
856 return strings
858 def asArray(self, name: str | Sequence[str]) -> Sequence[Any]:
859 """Get a value as an array.
861 May contain one or more elements.
863 Parameters
864 ----------
865 name : `str`
866 Key to use to retrieve value.
868 Returns
869 -------
870 array : `collections.abc.Sequence`
871 The value corresponding to name, but guaranteed to be returned
872 as a list with at least one element. If the value is a
873 `~collections.abc.Sequence` (and not a `str`) the value itself
874 will be returned, else the value will be the first element.
875 """
876 val = self.get(name)
877 if isinstance(val, str) or not isinstance(val, Sequence):
878 val = [val]
879 return val
881 def __eq__(self, other: Any) -> bool:
882 if isinstance(other, Config):
883 other = other._data
884 return self._data == other
886 def __ne__(self, other: Any) -> bool:
887 if isinstance(other, Config):
888 other = other._data
889 return self._data != other
891 #######
892 # i/o #
894 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
895 """Write the config to an output stream.
897 Parameters
898 ----------
899 output : `IO`, optional
900 The stream to use for output. If `None` the serialized content
901 will be returned.
902 format : `str`, optional
903 The format to use for the output. Can be "yaml" or "json".
905 Returns
906 -------
907 serialized : `str` or `None`
908 If a stream was given the stream will be used and the return
909 value will be `None`. If the stream was `None` the
910 serialization will be returned as a string.
911 """
912 if format == "yaml":
913 return yaml.safe_dump(self._data, output, default_flow_style=False)
914 elif format == "json":
915 if output is not None:
916 json.dump(self._data, output, ensure_ascii=False)
917 return None
918 else:
919 return json.dumps(self._data, ensure_ascii=False)
920 raise ValueError(f"Unsupported format for Config serialization: {format}")
922 def dumpToUri(
923 self,
924 uri: ResourcePathExpression,
925 updateFile: bool = True,
926 defaultFileName: str = "butler.yaml",
927 overwrite: bool = True,
928 ) -> None:
929 """Write the config to location pointed to by given URI.
931 Currently supports 's3' and 'file' URI schemes.
933 Parameters
934 ----------
935 uri : `lsst.resources.ResourcePathExpression`
936 URI of location where the Config will be written.
937 updateFile : bool, optional
938 If True and uri does not end on a filename with extension, will
939 append `defaultFileName` to the target uri. True by default.
940 defaultFileName : bool, optional
941 The file name that will be appended to target uri if updateFile is
942 True and uri does not end on a file with an extension.
943 overwrite : bool, optional
944 If True the configuration will be written even if it already
945 exists at that location.
946 """
947 # Make local copy of URI or create new one
948 uri = ResourcePath(uri)
950 if updateFile and not uri.getExtension():
951 uri = uri.updatedFile(defaultFileName)
953 # Try to work out the format from the extension
954 ext = uri.getExtension()
955 format = ext[1:].lower()
957 output = self.dump(format=format)
958 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
959 uri.write(output.encode(), overwrite=overwrite)
960 self.configFile = uri
962 @staticmethod
963 def updateParameters(
964 configType: type[ConfigSubset],
965 config: Config,
966 full: Config,
967 toUpdate: dict[str, Any] | None = None,
968 toCopy: Sequence[str | Sequence[str]] | None = None,
969 overwrite: bool = True,
970 toMerge: Sequence[str | Sequence[str]] | None = None,
971 ) -> None:
972 """Update specific config parameters.
974 Allows for named parameters to be set to new values in bulk, and
975 for other values to be set by copying from a reference config.
977 Assumes that the supplied config is compatible with ``configType``
978 and will attach the updated values to the supplied config by
979 looking for the related component key. It is assumed that
980 ``config`` and ``full`` are from the same part of the
981 configuration hierarchy.
983 Parameters
984 ----------
985 configType : `ConfigSubset`
986 Config type to use to extract relevant items from ``config``.
987 config : `Config`
988 A `Config` to update. Only the subset understood by
989 the supplied `ConfigSubset` will be modified. Default values
990 will not be inserted and the content will not be validated
991 since mandatory keys are allowed to be missing until
992 populated later by merging.
993 full : `Config`
994 A complete config with all defaults expanded that can be
995 converted to a ``configType``. Read-only and will not be
996 modified by this method. Values are read from here if
997 ``toCopy`` is defined.
999 Repository-specific options that should not be obtained
1000 from defaults when Butler instances are constructed
1001 should be copied from ``full`` to ``config``.
1002 toUpdate : `dict`, optional
1003 A `dict` defining the keys to update and the new value to use.
1004 The keys and values can be any supported by `Config`
1005 assignment.
1006 toCopy : `tuple`, optional
1007 `tuple` of keys whose values should be copied from ``full``
1008 into ``config``.
1009 overwrite : `bool`, optional
1010 If `False`, do not modify a value in ``config`` if the key
1011 already exists. Default is always to overwrite.
1012 toMerge : `tuple`, optional
1013 Keys to merge content from full to config without overwriting
1014 pre-existing values. Only works if the key refers to a hierarchy.
1015 The ``overwrite`` flag is ignored.
1017 Raises
1018 ------
1019 ValueError
1020 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1021 """
1022 if toUpdate is None and toCopy is None and toMerge is None:
1023 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1025 # If this is a parent configuration then we need to ensure that
1026 # the supplied config has the relevant component key in it.
1027 # If this is a parent configuration we add in the stub entry
1028 # so that the ConfigSubset constructor will do the right thing.
1029 # We check full for this since that is guaranteed to be complete.
1030 if (
1031 configType.component is not None
1032 and configType.component in full
1033 and configType.component not in config
1034 ):
1035 config[configType.component] = {}
1037 # Extract the part of the config we wish to update
1038 localConfig = configType(config, mergeDefaults=False, validate=False)
1040 key: str | Sequence[str]
1041 if toUpdate:
1042 for key, value in toUpdate.items():
1043 if key in localConfig and not overwrite:
1044 log.debug(
1045 "Not overriding key '%s' with value '%s' in config %s",
1046 key,
1047 value,
1048 localConfig.__class__.__name__,
1049 )
1050 else:
1051 localConfig[key] = value
1053 if toCopy or toMerge:
1054 localFullConfig = configType(full, mergeDefaults=False)
1056 if toCopy:
1057 for key in toCopy:
1058 if key in localConfig and not overwrite:
1059 log.debug(
1060 "Not overriding key '%s' from defaults in config %s",
1061 key,
1062 localConfig.__class__.__name__,
1063 )
1064 else:
1065 localConfig[key] = localFullConfig[key]
1066 if toMerge:
1067 for key in toMerge:
1068 if key in localConfig:
1069 # Get the node from the config to do the merge
1070 # but then have to reattach to the config.
1071 subset = localConfig[key]
1072 subset.merge(localFullConfig[key])
1073 localConfig[key] = subset
1074 else:
1075 localConfig[key] = localFullConfig[key]
1077 # Reattach to parent if this is a child config
1078 if configType.component is not None and configType.component in config:
1079 config[configType.component] = localConfig
1080 else:
1081 config.update(localConfig)
1083 def toDict(self) -> dict[str, Any]:
1084 """Convert a `Config` to a standalone hierarchical `dict`.
1086 Returns
1087 -------
1088 d : `dict`
1089 The standalone hierarchical `dict` with any `Config` classes
1090 in the hierarchy converted to `dict`.
1092 Notes
1093 -----
1094 This can be useful when passing a Config to some code that
1095 expects native Python types.
1096 """
1097 output = copy.deepcopy(self._data)
1098 for k, v in output.items():
1099 if isinstance(v, Config): 1099 ↛ 1100line 1099 didn't jump to line 1100, because the condition on line 1099 was never true
1100 v = v.toDict()
1101 output[k] = v
1102 return output
1105class ConfigSubset(Config):
1106 """Config representing a subset of a more general configuration.
1108 Subclasses define their own component and when given a configuration
1109 that includes that component, the resulting configuration only includes
1110 the subset. For example, your config might contain ``dimensions`` if it's
1111 part of a global config and that subset will be stored. If ``dimensions``
1112 can not be found it is assumed that the entire contents of the
1113 configuration should be used.
1115 Default values are read from the environment or supplied search paths
1116 using the default configuration file name specified in the subclass.
1117 This allows a configuration class to be instantiated without any
1118 additional arguments.
1120 Additional validation can be specified to check for keys that are mandatory
1121 in the configuration.
1123 Parameters
1124 ----------
1125 other : `Config` or `~lsst.resources.ResourcePathExpression` or `dict`
1126 Argument specifying the configuration information as understood
1127 by `Config`.
1128 validate : `bool`, optional
1129 If `True` required keys will be checked to ensure configuration
1130 consistency.
1131 mergeDefaults : `bool`, optional
1132 If `True` defaults will be read and the supplied config will
1133 be combined with the defaults, with the supplied values taking
1134 precedence.
1135 searchPaths : `list` or `tuple`, optional
1136 Explicit additional paths to search for defaults. They should
1137 be supplied in priority order. These paths have higher priority
1138 than those read from the environment in
1139 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1140 the local file system or URIs, `lsst.resources.ResourcePath`.
1141 """
1143 component: ClassVar[str | None] = None
1144 """Component to use from supplied config. Can be None. If specified the
1145 key is not required. Can be a full dot-separated path to a component.
1146 """
1148 requiredKeys: ClassVar[Sequence[str]] = ()
1149 """Keys that are required to be specified in the configuration.
1150 """
1152 defaultConfigFile: ClassVar[str | None] = None
1153 """Name of the file containing defaults for this config class.
1154 """
1156 def __init__(
1157 self,
1158 other: Config | ResourcePathExpression | Mapping[str, Any] | None = None,
1159 validate: bool = True,
1160 mergeDefaults: bool = True,
1161 searchPaths: Sequence[ResourcePathExpression] | None = None,
1162 ):
1163 # Create a blank object to receive the defaults
1164 # Once we have the defaults we then update with the external values
1165 super().__init__()
1167 # Create a standard Config rather than subset
1168 externalConfig = Config(other)
1170 # Select the part we need from it
1171 # To simplify the use of !include we also check for the existence of
1172 # component.component (since the included files can themselves
1173 # include the component name)
1174 if self.component is not None: 1174 ↛ 1183line 1174 didn't jump to line 1183, because the condition on line 1174 was never false
1175 doubled = (self.component, self.component)
1176 # Must check for double depth first
1177 if doubled in externalConfig: 1177 ↛ 1178line 1177 didn't jump to line 1178, because the condition on line 1177 was never true
1178 externalConfig = externalConfig[doubled]
1179 elif self.component in externalConfig:
1180 externalConfig._data = externalConfig._data[self.component]
1182 # Default files read to create this configuration
1183 self.filesRead: list[ResourcePath | str] = []
1185 # Assume we are not looking up child configurations
1186 containerKey = None
1188 # Sometimes we do not want to merge with defaults.
1189 if mergeDefaults:
1190 # Supplied search paths have highest priority
1191 fullSearchPath: list[ResourcePath | str] = []
1192 if searchPaths: 1192 ↛ 1193line 1192 didn't jump to line 1193, because the condition on line 1192 was never true
1193 fullSearchPath = [ResourcePath(path) for path in searchPaths]
1195 # Read default paths from environment
1196 fullSearchPath.extend(self.defaultSearchPaths())
1198 # There are two places to find defaults for this particular config
1199 # - The "defaultConfigFile" defined in the subclass
1200 # - The class specified in the "cls" element in the config.
1201 # Read cls after merging in case it changes.
1202 if self.defaultConfigFile is not None: 1202 ↛ 1207line 1202 didn't jump to line 1207, because the condition on line 1202 was never false
1203 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1205 # Can have a class specification in the external config (priority)
1206 # or from the defaults.
1207 pytype = None
1208 if "cls" in externalConfig: 1208 ↛ 1209line 1208 didn't jump to line 1209, because the condition on line 1208 was never true
1209 pytype = externalConfig["cls"]
1210 elif "cls" in self: 1210 ↛ 1211line 1210 didn't jump to line 1211, because the condition on line 1210 was never true
1211 pytype = self["cls"]
1213 if pytype is not None: 1213 ↛ 1214line 1213 didn't jump to line 1214, because the condition on line 1213 was never true
1214 try:
1215 cls = doImportType(pytype)
1216 except ImportError as e:
1217 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1218 # The class referenced from the config file is not required
1219 # to specify a default config file.
1220 defaultsFile = getattr(cls, "defaultConfigFile", None)
1221 if defaultsFile is not None:
1222 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1224 # Get the container key in case we need it and it is specified.
1225 containerKey = getattr(cls, "containerKey", None)
1227 # Now update this object with the external values so that the external
1228 # values always override the defaults
1229 self.update(externalConfig)
1230 if not self.configFile: 1230 ↛ 1236line 1230 didn't jump to line 1236, because the condition on line 1230 was never false
1231 self.configFile = externalConfig.configFile
1233 # If this configuration has child configurations of the same
1234 # config class, we need to expand those defaults as well.
1236 if mergeDefaults and containerKey is not None and containerKey in self: 1236 ↛ 1237line 1236 didn't jump to line 1237, because the condition on line 1236 was never true
1237 for idx, subConfig in enumerate(self[containerKey]):
1238 self[containerKey, idx] = type(self)(
1239 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1240 )
1242 if validate:
1243 self.validate()
1245 @classmethod
1246 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1247 """Read environment to determine search paths to use.
1249 Global defaults, at lowest priority, are found in the ``config``
1250 directory of the butler source tree. Additional defaults can be
1251 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1252 which is a PATH-like variable where paths at the front of the list
1253 have priority over those later.
1255 Returns
1256 -------
1257 paths : `list`
1258 Returns a list of paths to search. The returned order is in
1259 priority with the highest priority paths first. The butler config
1260 configuration resources will not be included here but will
1261 always be searched last.
1263 Notes
1264 -----
1265 The environment variable is split on the standard ``:`` path separator.
1266 This currently makes it incompatible with usage of URIs.
1267 """
1268 # We can pick up defaults from multiple search paths
1269 # We fill defaults by using the butler config path and then
1270 # the config path environment variable in reverse order.
1271 defaultsPaths: list[str | ResourcePath] = []
1273 if CONFIG_PATH in os.environ: 1273 ↛ 1274line 1273 didn't jump to line 1274, because the condition on line 1273 was never true
1274 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1275 defaultsPaths.extend(externalPaths)
1277 # Add the package defaults as a resource
1278 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1279 return defaultsPaths
1281 def _updateWithConfigsFromPath(
1282 self, searchPaths: Sequence[str | ResourcePath], configFile: ResourcePath | str
1283 ) -> None:
1284 """Search the supplied paths, merging the configuration values.
1286 The values read will override values currently stored in the object.
1287 Every file found in the path will be read, such that the earlier
1288 path entries have higher priority.
1290 Parameters
1291 ----------
1292 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1293 Paths to search for the supplied configFile. This path
1294 is the priority order, such that files read from the
1295 first path entry will be selected over those read from
1296 a later path. Can contain `str` referring to the local file
1297 system or a URI string.
1298 configFile : `lsst.resources.ResourcePath`
1299 File to locate in path. If absolute path it will be read
1300 directly and the search path will not be used. Can be a URI
1301 to an explicit resource (which will ignore the search path)
1302 which is assumed to exist.
1303 """
1304 uri = ResourcePath(configFile)
1305 if uri.isabs() and uri.exists(): 1305 ↛ 1307line 1305 didn't jump to line 1307, because the condition on line 1305 was never true
1306 # Assume this resource exists
1307 self._updateWithOtherConfigFile(configFile)
1308 self.filesRead.append(configFile)
1309 else:
1310 # Reverse order so that high priority entries
1311 # update the object last.
1312 for pathDir in reversed(searchPaths):
1313 if isinstance(pathDir, str | ResourcePath): 1313 ↛ 1320line 1313 didn't jump to line 1320, because the condition on line 1313 was never false
1314 pathDir = ResourcePath(pathDir, forceDirectory=True)
1315 file = pathDir.join(configFile)
1316 if file.exists(): 1316 ↛ 1312line 1316 didn't jump to line 1312, because the condition on line 1316 was never false
1317 self.filesRead.append(file)
1318 self._updateWithOtherConfigFile(file)
1319 else:
1320 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1322 def _updateWithOtherConfigFile(self, file: Config | str | ResourcePath | Mapping[str, Any]) -> None:
1323 """Read in some defaults and update.
1325 Update the configuration by reading the supplied file as a config
1326 of this class, and merging such that these values override the
1327 current values. Contents of the external config are not validated.
1329 Parameters
1330 ----------
1331 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1332 Entity that can be converted to a `ConfigSubset`.
1333 """
1334 # Use this class to read the defaults so that subsetting can happen
1335 # correctly.
1336 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1337 self.update(externalConfig)
1339 def validate(self) -> None:
1340 """Check that mandatory keys are present in this configuration.
1342 Ignored if ``requiredKeys`` is empty.
1343 """
1344 # Validation
1345 missing = [k for k in self.requiredKeys if k not in self._data]
1346 if missing: 1346 ↛ 1347line 1346 didn't jump to line 1347, because the condition on line 1346 was never true
1347 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")