Coverage for python/lsst/daf/butler/core/config.py: 45%
487 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Configuration control."""
24from __future__ import annotations
26__all__ = ("Config", "ConfigSubset")
28import copy
29import io
30import json
31import logging
32import os
33import pprint
34import sys
35from collections import defaultdict
36from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence
37from pathlib import Path
38from typing import IO, TYPE_CHECKING, Any, ClassVar, cast
40import yaml
41from lsst.resources import ResourcePath, ResourcePathExpression
42from lsst.utils import doImportType
43from yaml.representer import Representer
45yaml.add_representer(defaultdict, Representer.represent_dict)
48# Config module logger
49log = logging.getLogger(__name__)
51# PATH-like environment variable to use for defaults.
52CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
54if TYPE_CHECKING:
55 yamlLoader = yaml.SafeLoader
56else:
57 try:
58 yamlLoader = yaml.CSafeLoader
59 except AttributeError:
60 # Not all installations have the C library
61 # (but assume for mypy's sake that they're the same)
62 yamlLoader = yaml.SafeLoader
65def _doUpdate(d: Mapping[str, Any], u: Mapping[str, Any]) -> Mapping[str, Any]:
66 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 raise RuntimeError(f"Only call update with Mapping, not {type(d)}")
68 for k, v in u.items():
69 if isinstance(v, Mapping):
70 lhs = d.get(k, {})
71 if not isinstance(lhs, Mapping): 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true
72 lhs = {}
73 d[k] = _doUpdate(lhs, v)
74 else:
75 d[k] = v
76 return d
79def _checkNextItem(k: str | int, d: Any, create: bool, must_be_dict: bool) -> tuple[Any, bool]:
80 """See if k is in d and if it is return the new child."""
81 nextVal = None
82 isThere = False
83 if d is None: 83 ↛ 85line 83 didn't jump to line 85, because the condition on line 83 was never true
84 # We have gone past the end of the hierarchy
85 pass
86 elif not must_be_dict and isinstance(d, Sequence): 86 ↛ 91line 86 didn't jump to line 91, because the condition on line 86 was never true
87 # Check for Sequence first because for lists
88 # __contains__ checks whether value is found in list
89 # not whether the index exists in list. When we traverse
90 # the hierarchy we are interested in the index.
91 try:
92 nextVal = d[int(k)]
93 isThere = True
94 except IndexError:
95 pass
96 except ValueError:
97 isThere = k in d
98 elif k in d:
99 nextVal = d[k]
100 isThere = True
101 elif create: 101 ↛ 102line 101 didn't jump to line 102, because the condition on line 101 was never true
102 d[k] = {}
103 nextVal = d[k]
104 isThere = True
106 return nextVal, isThere
109class Loader(yamlLoader):
110 """YAML Loader that supports file include directives.
112 Uses ``!include`` directive in a YAML file to point to another
113 YAML file to be included. The path in the include directive is relative
114 to the file containing that directive.
116 storageClasses: !include storageClasses.yaml
118 Examples
119 --------
120 >>> with open("document.yaml", "r") as f:
121 data = yaml.load(f, Loader=Loader)
123 Notes
124 -----
125 See https://davidchall.github.io/yaml-includes.html
126 """
128 def __init__(self, stream: str | IO): # types-PyYAML annotates 'stream' with a private type
129 super().__init__(stream)
130 # if this is a string and not a stream we may well lack a name
131 if hasattr(stream, "name"): 131 ↛ 135line 131 didn't jump to line 135, because the condition on line 131 was never false
132 self._root = ResourcePath(stream.name)
133 else:
134 # No choice but to assume a local filesystem
135 self._root = ResourcePath("no-file.yaml")
136 self.add_constructor("!include", Loader.include)
138 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
139 result: list[Any] | dict[str, Any]
140 if isinstance(node, yaml.ScalarNode):
141 return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]
143 elif isinstance(node, yaml.SequenceNode):
144 result = []
145 for filename in self.construct_sequence(node):
146 result.append(self.extractFile(filename))
147 return result
149 elif isinstance(node, yaml.MappingNode):
150 result = {}
151 for k, v in self.construct_mapping(node).items():
152 if not isinstance(k, str):
153 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
154 result[k] = self.extractFile(v)
155 return result
157 else:
158 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
159 raise yaml.constructor.ConstructorError
161 def extractFile(self, filename: str) -> Any:
162 # It is possible for the !include to point to an explicit URI
163 # instead of a relative URI, therefore we first see if it is
164 # scheme-less or not. If it has a scheme we use it directly
165 # if it is scheme-less we use it relative to the file root.
166 requesteduri = ResourcePath(filename, forceAbsolute=False)
168 if requesteduri.scheme:
169 fileuri = requesteduri
170 else:
171 fileuri = self._root.updatedFile(filename)
173 log.debug("Opening YAML file via !include: %s", fileuri)
175 # Read all the data from the resource
176 data = fileuri.read()
178 # Store the bytes into a BytesIO so we can attach a .name
179 stream = io.BytesIO(data)
180 stream.name = fileuri.geturl()
181 return yaml.load(stream, Loader)
184# Type of the key used for accessing items in configuration object. It can be
185# a single string as described below or a sequence of srtings and integer
186# indices. Indices are used to access items in sequences stored in config.
187_ConfigKey = str | Sequence[str | int]
190class Config(MutableMapping):
191 r"""Implements a datatype that is used by `Butler` for configuration.
193 It is essentially a `dict` with key/value pairs, including nested dicts
194 (as values). In fact, it can be initialized with a `dict`.
195 This is explained next:
197 Config extends the `dict` api so that hierarchical values may be accessed
198 with delimited notation or as a tuple. If a string is given the delimiter
199 is picked up from the first character in that string. For example,
200 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
201 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
202 If the first character is alphanumeric, no delimiter will be used.
203 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
204 Unicode characters can be used as the delimiter for distinctiveness if
205 required.
207 If a key in the hierarchy starts with a non-alphanumeric character care
208 should be used to ensure that either the tuple interface is used or
209 a distinct delimiter is always given in string form.
211 Finally, the delimiter can be escaped if it is part of a key and also
212 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
213 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
214 always better to use a different delimiter in these cases.
216 Note that adding a multi-level key implicitly creates any nesting levels
217 that do not exist, but removing multi-level keys does not automatically
218 remove empty nesting levels. As a result:
220 >>> c = Config()
221 >>> c[".a.b"] = 1
222 >>> del c[".a.b"]
223 >>> c["a"]
224 Config({'a': {}})
226 Storage formats supported:
228 - yaml: read and write is supported.
229 - json: read and write is supported but no ``!include`` directive.
231 Parameters
232 ----------
233 other : `lsst.resources.ResourcePath` or `Config` or `dict`
234 Other source of configuration, can be:
236 - (`lsst.resources.ResourcePathExpression`)
237 Treated as a URI to a config file. Must end with ".yaml".
238 - (`Config`) Copies the other Config's values into this one.
239 - (`dict`) Copies the values from the dict into this Config.
241 If `None` is provided an empty `Config` will be created.
242 """
244 _D: str = "→"
245 """Default internal delimiter to use for components in the hierarchy when
246 constructing keys for external use (see `Config.names()`)."""
248 includeKey: ClassVar[str] = "includeConfigs"
249 """Key used to indicate that another config should be included at this
250 part of the hierarchy."""
252 resourcesPackage: str = "lsst.daf.butler"
253 """Package to search for default configuration data. The resources
254 themselves will be within a ``configs`` resource hierarchy."""
256 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
257 self._data: dict[str, Any] = {}
258 self.configFile: ResourcePath | None = None
260 if other is None:
261 return
263 if isinstance(other, Config):
264 # Deep copy might be more efficient but if someone has overridden
265 # a config entry to store a complex object then deep copy may
266 # fail. Safer to use update().
267 self.update(other._data)
268 self.configFile = other.configFile
269 elif isinstance(other, (dict, Mapping)):
270 # In most cases we have a dict, and it's more efficient
271 # to check for a dict instance before checking the generic mapping.
272 self.update(other)
273 elif isinstance(other, (str, ResourcePath, Path)): 273 ↛ 280line 273 didn't jump to line 280, because the condition on line 273 was never false
274 # if other is a string, assume it is a file path/URI
275 self.__initFromUri(other)
276 self._processExplicitIncludes()
277 else:
278 # if the config specified by other could not be recognized raise
279 # a runtime error.
280 raise RuntimeError(f"A Config could not be loaded from other: {other}")
282 def ppprint(self) -> str:
283 """Return config as formatted readable string.
285 Examples
286 --------
287 use: ``pdb> print(myConfigObject.ppprint())``
289 Returns
290 -------
291 s : `str`
292 A prettyprint formatted string representing the config
293 """
294 return pprint.pformat(self._data, indent=2, width=1)
296 def __repr__(self) -> str:
297 return f"{type(self).__name__}({self._data!r})"
299 def __str__(self) -> str:
300 return self.ppprint()
302 def __len__(self) -> int:
303 return len(self._data)
305 def __iter__(self) -> Iterator[str]:
306 return iter(self._data)
308 def copy(self) -> Config:
309 return type(self)(self)
311 @classmethod
312 def fromString(cls, string: str, format: str = "yaml") -> Config:
313 """Create a new Config instance from a serialized string.
315 Parameters
316 ----------
317 string : `str`
318 String containing content in specified format
319 format : `str`, optional
320 Format of the supplied string. Can be ``json`` or ``yaml``.
322 Returns
323 -------
324 c : `Config`
325 Newly-constructed Config.
326 """
327 if format == "yaml":
328 new_config = cls().__initFromYaml(string)
329 elif format == "json":
330 new_config = cls().__initFromJson(string)
331 else:
332 raise ValueError(f"Unexpected format of string: {format}")
333 new_config._processExplicitIncludes()
334 return new_config
336 @classmethod
337 def fromYaml(cls, string: str) -> Config:
338 """Create a new Config instance from a YAML string.
340 Parameters
341 ----------
342 string : `str`
343 String containing content in YAML format
345 Returns
346 -------
347 c : `Config`
348 Newly-constructed Config.
349 """
350 return cls.fromString(string, format="yaml")
352 def __initFromUri(self, path: ResourcePathExpression) -> None:
353 """Load a file from a path or an URI.
355 Parameters
356 ----------
357 path : `lsst.resources.ResourcePathExpression`
358 Path or a URI to a persisted config file.
359 """
360 uri = ResourcePath(path)
361 ext = uri.getExtension()
362 if ext == ".yaml": 362 ↛ 369line 362 didn't jump to line 369, because the condition on line 362 was never false
363 log.debug("Opening YAML config file: %s", uri.geturl())
364 content = uri.read()
365 # Use a stream so we can name it
366 stream = io.BytesIO(content)
367 stream.name = uri.geturl()
368 self.__initFromYaml(stream)
369 elif ext == ".json":
370 log.debug("Opening JSON config file: %s", uri.geturl())
371 content = uri.read()
372 self.__initFromJson(content)
373 else:
374 # This URI does not have a valid extension. It might be because
375 # we ended up with a directory and not a file. Before we complain
376 # about an extension, do an existence check. No need to do
377 # the (possibly expensive) existence check in the default code
378 # path above because we will find out soon enough that the file
379 # is not there.
380 if not uri.exists():
381 raise FileNotFoundError(f"Config location {uri} does not exist.")
382 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
383 self.configFile = uri
385 def __initFromYaml(self, stream: IO | str | bytes) -> Config:
386 """Load a YAML config from any readable stream that contains one.
388 Parameters
389 ----------
390 stream: `IO` or `str`
391 Stream to pass to the YAML loader. Accepts anything that
392 `yaml.load` accepts. This can include a string as well as an
393 IO stream.
395 Raises
396 ------
397 yaml.YAMLError
398 If there is an error loading the file.
399 """
400 content = yaml.load(stream, Loader=Loader)
401 if content is None: 401 ↛ 402line 401 didn't jump to line 402, because the condition on line 401 was never true
402 content = {}
403 self._data = content
404 return self
406 def __initFromJson(self, stream: IO | str | bytes) -> Config:
407 """Load a JSON config from any readable stream that contains one.
409 Parameters
410 ----------
411 stream: `IO` or `str`
412 Stream to pass to the JSON loader. This can include a string as
413 well as an IO stream.
415 Raises
416 ------
417 TypeError:
418 Raised if there is an error loading the content.
419 """
420 if isinstance(stream, (bytes, str)):
421 content = json.loads(stream)
422 else:
423 content = json.load(stream)
424 if content is None:
425 content = {}
426 self._data = content
427 return self
429 def _processExplicitIncludes(self) -> None:
430 """Scan through the configuration searching for the special includes.
432 Looks for ``includeConfigs`` directive and processes the includes.
433 """
434 # Search paths for config files
435 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
436 if self.configFile is not None: 436 ↛ 444line 436 didn't jump to line 444, because the condition on line 436 was never false
437 if isinstance(self.configFile, ResourcePath): 437 ↛ 440line 437 didn't jump to line 440, because the condition on line 437 was never false
438 configDir = self.configFile.dirname()
439 else:
440 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
441 searchPaths.append(configDir)
443 # Ensure we know what delimiter to use
444 names = self.nameTuples()
445 for path in names:
446 if path[-1] == self.includeKey: 446 ↛ 447line 446 didn't jump to line 447, because the condition on line 446 was never true
447 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
448 basePath = path[:-1]
450 # Extract the includes and then delete them from the config
451 includes = self[path]
452 del self[path]
454 # Be consistent and convert to a list
455 if not isinstance(includes, list):
456 includes = [includes]
458 # Read each file assuming it is a reference to a file
459 # The file can be relative to config file or cwd
460 # ConfigSubset search paths are not used
461 subConfigs = []
462 for fileName in includes:
463 # Expand any shell variables -- this could be URI
464 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False)
465 found = None
466 if fileName.isabs():
467 found = fileName
468 else:
469 for dir in searchPaths:
470 specific = dir.join(fileName.path)
471 # Remote resource check might be expensive
472 if specific.exists():
473 found = specific
474 break
475 if not found:
476 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
478 # Read the referenced Config as a Config
479 subConfigs.append(type(self)(found))
481 # Now we need to merge these sub configs with the current
482 # information that was present in this node in the config
483 # tree with precedence given to the explicit values
484 newConfig = subConfigs.pop(0)
485 for sc in subConfigs:
486 newConfig.update(sc)
488 # Explicit values take precedence
489 if not basePath:
490 # This is an include at the root config
491 newConfig.update(self)
492 # Replace the current config
493 self._data = newConfig._data
494 else:
495 newConfig.update(self[basePath])
496 # And reattach to the base config
497 self[basePath] = newConfig
499 @staticmethod
500 def _splitIntoKeys(key: _ConfigKey) -> list[str | int]:
501 r"""Split the argument for get/set/in into a hierarchical list.
503 Parameters
504 ----------
505 key : `str` or iterable
506 Argument given to get/set/in. If an iterable is provided it will
507 be converted to a list. If the first character of the string
508 is not an alphanumeric character then it will be used as the
509 delimiter for the purposes of splitting the remainder of the
510 string. If the delimiter is also in one of the keys then it
511 can be escaped using ``\``. There is no default delimiter.
513 Returns
514 -------
515 keys : `list`
516 Hierarchical keys as a `list`.
517 """
518 if isinstance(key, str):
519 if not key[0].isalnum(): 519 ↛ 520line 519 didn't jump to line 520, because the condition on line 519 was never true
520 d = key[0]
521 key = key[1:]
522 else:
523 return [
524 key,
525 ]
526 escaped = f"\\{d}"
527 temp = None
528 if escaped in key:
529 # Complain at the attempt to escape the escape
530 doubled = rf"\{escaped}"
531 if doubled in key:
532 raise ValueError(
533 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
534 )
535 # Replace with a character that won't be in the string
536 temp = "\r"
537 if temp in key or d == temp:
538 raise ValueError(
539 f"Can not use character {temp!r} in hierarchical key or as"
540 " delimiter if escaping the delimiter"
541 )
542 key = key.replace(escaped, temp)
543 hierarchy = key.split(d)
544 if temp:
545 hierarchy = [h.replace(temp, d) for h in hierarchy]
546 # Copy the list to keep mypy quiet.
547 return list(hierarchy)
548 elif isinstance(key, Iterable): 548 ↛ 552line 548 didn't jump to line 552, because the condition on line 548 was never false
549 return list(key)
550 else:
551 # Do not try to guess.
552 raise TypeError(f"Provided key [{key}] neither str nor iterable.")
554 def _getKeyHierarchy(self, name: _ConfigKey) -> list[str | int]:
555 """Retrieve the key hierarchy for accessing the Config.
557 Parameters
558 ----------
559 name : `str` or `tuple`
560 Delimited string or `tuple` of hierarchical keys.
562 Returns
563 -------
564 hierarchy : `list` of `str`
565 Hierarchy to use as a `list`. If the name is available directly
566 as a key in the Config it will be used regardless of the presence
567 of any nominal delimiter.
568 """
569 keys: list[str | int]
570 if name in self._data:
571 keys = [cast(str, name)]
572 else:
573 keys = self._splitIntoKeys(name)
574 return keys
576 def _findInHierarchy(self, keys: Sequence[str | int], create: bool = False) -> tuple[list[Any], bool]:
577 """Look for hierarchy of keys in Config.
579 Parameters
580 ----------
581 keys : `list` or `tuple`
582 Keys to search in hierarchy.
583 create : `bool`, optional
584 If `True`, if a part of the hierarchy does not exist, insert an
585 empty `dict` into the hierarchy.
587 Returns
588 -------
589 hierarchy : `list`
590 List of the value corresponding to each key in the supplied
591 hierarchy. Only keys that exist in the hierarchy will have
592 a value.
593 complete : `bool`
594 `True` if the full hierarchy exists and the final element
595 in ``hierarchy`` is the value of relevant value.
596 """
597 d: Any = self._data
599 # For the first key, d must be a dict so it is a waste
600 # of time to check for a sequence.
601 must_be_dict = True
603 hierarchy = []
604 complete = True
605 for k in keys:
606 d, isThere = _checkNextItem(k, d, create, must_be_dict)
607 if isThere:
608 hierarchy.append(d)
609 else:
610 complete = False
611 break
612 # Second time round it might be a sequence.
613 must_be_dict = False
615 return hierarchy, complete
617 def __getitem__(self, name: _ConfigKey) -> Any:
618 # Override the split for the simple case where there is an exact
619 # match. This allows `Config.items()` to work via a simple
620 # __iter__ implementation that returns top level keys of
621 # self._data.
623 # If the name matches a key in the top-level hierarchy, bypass
624 # all further cleverness.
625 found_directly = False
626 try:
627 if isinstance(name, str): 627 ↛ 633line 627 didn't jump to line 633, because the condition on line 627 was never false
628 data = self._data[name]
629 found_directly = True
630 except KeyError:
631 pass
633 if not found_directly: 633 ↛ 634line 633 didn't jump to line 634, because the condition on line 633 was never true
634 keys = self._getKeyHierarchy(name)
636 hierarchy, complete = self._findInHierarchy(keys)
637 if not complete:
638 raise KeyError(f"{name} not found")
639 data = hierarchy[-1]
641 # In most cases we have a dict, and it's more efficient
642 # to check for a dict instance before checking the generic mapping.
643 if isinstance(data, (dict, Mapping)):
644 data = Config(data)
645 # Ensure that child configs inherit the parent internal delimiter
646 if self._D != Config._D: 646 ↛ 647line 646 didn't jump to line 647, because the condition on line 646 was never true
647 data._D = self._D
648 return data
650 def __setitem__(self, name: _ConfigKey, value: Any) -> None:
651 keys = self._getKeyHierarchy(name)
652 last = keys.pop()
653 if isinstance(value, Config):
654 value = copy.deepcopy(value._data)
656 hierarchy, complete = self._findInHierarchy(keys, create=True)
657 if hierarchy:
658 data = hierarchy[-1]
659 else:
660 data = self._data
662 try:
663 data[last] = value
664 except TypeError:
665 data[int(last)] = value
667 def __contains__(self, key: Any) -> bool:
668 if not isinstance(key, str | Sequence): 668 ↛ 669line 668 didn't jump to line 669, because the condition on line 668 was never true
669 return False
670 keys = self._getKeyHierarchy(key)
671 hierarchy, complete = self._findInHierarchy(keys)
672 return complete
674 def __delitem__(self, key: str | Sequence[str]) -> None:
675 keys = self._getKeyHierarchy(key)
676 last = keys.pop()
677 hierarchy, complete = self._findInHierarchy(keys)
678 if complete: 678 ↛ 685line 678 didn't jump to line 685, because the condition on line 678 was never false
679 if hierarchy: 679 ↛ 680line 679 didn't jump to line 680, because the condition on line 679 was never true
680 data = hierarchy[-1]
681 else:
682 data = self._data
683 del data[last]
684 else:
685 raise KeyError(f"{key} not found in Config")
687 def update(self, other: Mapping[str, Any]) -> None: # type: ignore[override]
688 """Update config from other `Config` or `dict`.
690 Like `dict.update()`, but will add or modify keys in nested dicts,
691 instead of overwriting the nested dict entirely.
693 Parameters
694 ----------
695 other : `dict` or `Config`
696 Source of configuration:
698 Examples
699 --------
700 >>> c = Config({"a": {"b": 1}})
701 >>> c.update({"a": {"c": 2}})
702 >>> print(c)
703 {'a': {'b': 1, 'c': 2}}
705 >>> foo = {"a": {"b": 1}}
706 >>> foo.update({"a": {"c": 2}})
707 >>> print(foo)
708 {'a': {'c': 2}}
709 """
710 _doUpdate(self._data, other)
712 def merge(self, other: Mapping) -> None:
713 """Merge another Config into this one.
715 Like `Config.update()`, but will add keys & values from other that
716 DO NOT EXIST in self.
718 Keys and values that already exist in self will NOT be overwritten.
720 Parameters
721 ----------
722 other : `dict` or `Config`
723 Source of configuration:
724 """
725 if not isinstance(other, Mapping):
726 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
728 # Convert the supplied mapping to a Config for consistency
729 # This will do a deepcopy if it is already a Config
730 otherCopy = Config(other)
731 otherCopy.update(self)
732 self._data = otherCopy._data
734 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
735 """Get tuples representing the name hierarchies of all keys.
737 The tuples returned from this method are guaranteed to be usable
738 to access items in the configuration object.
740 Parameters
741 ----------
742 topLevelOnly : `bool`, optional
743 If False, the default, a full hierarchy of names is returned.
744 If True, only the top level are returned.
746 Returns
747 -------
748 names : `list` of `tuple` of `str`
749 List of all names present in the `Config` where each element
750 in the list is a `tuple` of strings representing the hierarchy.
751 """
752 if topLevelOnly: 752 ↛ 753line 752 didn't jump to line 753, because the condition on line 752 was never true
753 return list((k,) for k in self)
755 def getKeysAsTuples(
756 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
757 ) -> None:
758 if isinstance(d, Sequence):
759 theseKeys: Iterable[Any] = range(len(d))
760 else:
761 theseKeys = d.keys()
762 for key in theseKeys:
763 val = d[key]
764 levelKey = base + (key,) if base is not None else (key,)
765 keys.append(levelKey)
766 if isinstance(val, (Mapping, Sequence)) and not isinstance(val, str):
767 getKeysAsTuples(val, keys, levelKey)
769 keys: list[tuple[str, ...]] = []
770 getKeysAsTuples(self._data, keys, None)
771 return keys
773 def names(self, topLevelOnly: bool = False, delimiter: str | None = None) -> list[str]:
774 """Get a delimited name of all the keys in the hierarchy.
776 The values returned from this method are guaranteed to be usable
777 to access items in the configuration object.
779 Parameters
780 ----------
781 topLevelOnly : `bool`, optional
782 If False, the default, a full hierarchy of names is returned.
783 If True, only the top level are returned.
784 delimiter : `str`, optional
785 Delimiter to use when forming the keys. If the delimiter is
786 present in any of the keys, it will be escaped in the returned
787 names. If `None` given a delimiter will be automatically provided.
788 The delimiter can not be alphanumeric.
790 Returns
791 -------
792 names : `list` of `str`
793 List of all names present in the `Config`.
795 Notes
796 -----
797 This is different than the built-in method `dict.keys`, which will
798 return only the first level keys.
800 Raises
801 ------
802 ValueError:
803 The supplied delimiter is alphanumeric.
804 """
805 if topLevelOnly:
806 return list(self.keys())
808 # Get all the tuples of hierarchical keys
809 nameTuples = self.nameTuples()
811 if delimiter is not None and delimiter.isalnum():
812 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
814 if delimiter is None:
815 # Start with something, and ensure it does not need to be
816 # escaped (it is much easier to understand if not escaped)
817 delimiter = self._D
819 # Form big string for easy check of delimiter clash
820 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
822 # Try a delimiter and keep trying until we get something that
823 # works.
824 ntries = 0
825 while delimiter in combined:
826 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
827 ntries += 1
829 if ntries > 100:
830 raise ValueError(f"Unable to determine a delimiter for Config {self}")
832 # try another one
833 while True:
834 delimiter = chr(ord(delimiter) + 1)
835 if not delimiter.isalnum():
836 break
838 log.debug("Using delimiter %r", delimiter)
840 # Form the keys, escaping the delimiter if necessary
841 strings = [
842 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
843 for k in nameTuples
844 ]
845 return strings
847 def asArray(self, name: str | Sequence[str]) -> Sequence[Any]:
848 """Get a value as an array.
850 May contain one or more elements.
852 Parameters
853 ----------
854 name : `str`
855 Key to use to retrieve value.
857 Returns
858 -------
859 array : `collections.abc.Sequence`
860 The value corresponding to name, but guaranteed to be returned
861 as a list with at least one element. If the value is a
862 `~collections.abc.Sequence` (and not a `str`) the value itself
863 will be returned, else the value will be the first element.
864 """
865 val = self.get(name)
866 if isinstance(val, str):
867 val = [val]
868 elif not isinstance(val, Sequence):
869 val = [val]
870 return val
872 def __eq__(self, other: Any) -> bool:
873 if isinstance(other, Config):
874 other = other._data
875 return self._data == other
877 def __ne__(self, other: Any) -> bool:
878 if isinstance(other, Config):
879 other = other._data
880 return self._data != other
882 #######
883 # i/o #
885 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
886 """Write the config to an output stream.
888 Parameters
889 ----------
890 output : `IO`, optional
891 The stream to use for output. If `None` the serialized content
892 will be returned.
893 format : `str`, optional
894 The format to use for the output. Can be "yaml" or "json".
896 Returns
897 -------
898 serialized : `str` or `None`
899 If a stream was given the stream will be used and the return
900 value will be `None`. If the stream was `None` the
901 serialization will be returned as a string.
902 """
903 if format == "yaml":
904 return yaml.safe_dump(self._data, output, default_flow_style=False)
905 elif format == "json":
906 if output is not None:
907 json.dump(self._data, output, ensure_ascii=False)
908 return None
909 else:
910 return json.dumps(self._data, ensure_ascii=False)
911 raise ValueError(f"Unsupported format for Config serialization: {format}")
913 def dumpToUri(
914 self,
915 uri: ResourcePathExpression,
916 updateFile: bool = True,
917 defaultFileName: str = "butler.yaml",
918 overwrite: bool = True,
919 ) -> None:
920 """Write the config to location pointed to by given URI.
922 Currently supports 's3' and 'file' URI schemes.
924 Parameters
925 ----------
926 uri: `lsst.resources.ResourcePathExpression`
927 URI of location where the Config will be written.
928 updateFile : bool, optional
929 If True and uri does not end on a filename with extension, will
930 append `defaultFileName` to the target uri. True by default.
931 defaultFileName : bool, optional
932 The file name that will be appended to target uri if updateFile is
933 True and uri does not end on a file with an extension.
934 overwrite : bool, optional
935 If True the configuration will be written even if it already
936 exists at that location.
937 """
938 # Make local copy of URI or create new one
939 uri = ResourcePath(uri)
941 if updateFile and not uri.getExtension():
942 uri = uri.updatedFile(defaultFileName)
944 # Try to work out the format from the extension
945 ext = uri.getExtension()
946 format = ext[1:].lower()
948 output = self.dump(format=format)
949 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
950 uri.write(output.encode(), overwrite=overwrite)
951 self.configFile = uri
953 @staticmethod
954 def updateParameters(
955 configType: type[ConfigSubset],
956 config: Config,
957 full: Config,
958 toUpdate: dict[str, Any] | None = None,
959 toCopy: Sequence[str | Sequence[str]] | None = None,
960 overwrite: bool = True,
961 toMerge: Sequence[str | Sequence[str]] | None = None,
962 ) -> None:
963 """Update specific config parameters.
965 Allows for named parameters to be set to new values in bulk, and
966 for other values to be set by copying from a reference config.
968 Assumes that the supplied config is compatible with ``configType``
969 and will attach the updated values to the supplied config by
970 looking for the related component key. It is assumed that
971 ``config`` and ``full`` are from the same part of the
972 configuration hierarchy.
974 Parameters
975 ----------
976 configType : `ConfigSubset`
977 Config type to use to extract relevant items from ``config``.
978 config : `Config`
979 A `Config` to update. Only the subset understood by
980 the supplied `ConfigSubset` will be modified. Default values
981 will not be inserted and the content will not be validated
982 since mandatory keys are allowed to be missing until
983 populated later by merging.
984 full : `Config`
985 A complete config with all defaults expanded that can be
986 converted to a ``configType``. Read-only and will not be
987 modified by this method. Values are read from here if
988 ``toCopy`` is defined.
990 Repository-specific options that should not be obtained
991 from defaults when Butler instances are constructed
992 should be copied from ``full`` to ``config``.
993 toUpdate : `dict`, optional
994 A `dict` defining the keys to update and the new value to use.
995 The keys and values can be any supported by `Config`
996 assignment.
997 toCopy : `tuple`, optional
998 `tuple` of keys whose values should be copied from ``full``
999 into ``config``.
1000 overwrite : `bool`, optional
1001 If `False`, do not modify a value in ``config`` if the key
1002 already exists. Default is always to overwrite.
1003 toMerge : `tuple`, optional
1004 Keys to merge content from full to config without overwriting
1005 pre-existing values. Only works if the key refers to a hierarchy.
1006 The ``overwrite`` flag is ignored.
1008 Raises
1009 ------
1010 ValueError
1011 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1012 """
1013 if toUpdate is None and toCopy is None and toMerge is None:
1014 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1016 # If this is a parent configuration then we need to ensure that
1017 # the supplied config has the relevant component key in it.
1018 # If this is a parent configuration we add in the stub entry
1019 # so that the ConfigSubset constructor will do the right thing.
1020 # We check full for this since that is guaranteed to be complete.
1021 if (
1022 configType.component is not None
1023 and configType.component in full
1024 and configType.component not in config
1025 ):
1026 config[configType.component] = {}
1028 # Extract the part of the config we wish to update
1029 localConfig = configType(config, mergeDefaults=False, validate=False)
1031 key: str | Sequence[str]
1032 if toUpdate:
1033 for key, value in toUpdate.items():
1034 if key in localConfig and not overwrite:
1035 log.debug(
1036 "Not overriding key '%s' with value '%s' in config %s",
1037 key,
1038 value,
1039 localConfig.__class__.__name__,
1040 )
1041 else:
1042 localConfig[key] = value
1044 if toCopy or toMerge:
1045 localFullConfig = configType(full, mergeDefaults=False)
1047 if toCopy:
1048 for key in toCopy:
1049 if key in localConfig and not overwrite:
1050 log.debug(
1051 "Not overriding key '%s' from defaults in config %s",
1052 key,
1053 localConfig.__class__.__name__,
1054 )
1055 else:
1056 localConfig[key] = localFullConfig[key]
1057 if toMerge:
1058 for key in toMerge:
1059 if key in localConfig:
1060 # Get the node from the config to do the merge
1061 # but then have to reattach to the config.
1062 subset = localConfig[key]
1063 subset.merge(localFullConfig[key])
1064 localConfig[key] = subset
1065 else:
1066 localConfig[key] = localFullConfig[key]
1068 # Reattach to parent if this is a child config
1069 if configType.component is not None and configType.component in config:
1070 config[configType.component] = localConfig
1071 else:
1072 config.update(localConfig)
1074 def toDict(self) -> dict[str, Any]:
1075 """Convert a `Config` to a standalone hierarchical `dict`.
1077 Returns
1078 -------
1079 d : `dict`
1080 The standalone hierarchical `dict` with any `Config` classes
1081 in the hierarchy converted to `dict`.
1083 Notes
1084 -----
1085 This can be useful when passing a Config to some code that
1086 expects native Python types.
1087 """
1088 output = copy.deepcopy(self._data)
1089 for k, v in output.items():
1090 if isinstance(v, Config): 1090 ↛ 1091line 1090 didn't jump to line 1091, because the condition on line 1090 was never true
1091 v = v.toDict()
1092 output[k] = v
1093 return output
1096class ConfigSubset(Config):
1097 """Config representing a subset of a more general configuration.
1099 Subclasses define their own component and when given a configuration
1100 that includes that component, the resulting configuration only includes
1101 the subset. For example, your config might contain ``dimensions`` if it's
1102 part of a global config and that subset will be stored. If ``dimensions``
1103 can not be found it is assumed that the entire contents of the
1104 configuration should be used.
1106 Default values are read from the environment or supplied search paths
1107 using the default configuration file name specified in the subclass.
1108 This allows a configuration class to be instantiated without any
1109 additional arguments.
1111 Additional validation can be specified to check for keys that are mandatory
1112 in the configuration.
1114 Parameters
1115 ----------
1116 other : `Config` or `~lsst.resources.ResourcePathExpression` or `dict`
1117 Argument specifying the configuration information as understood
1118 by `Config`
1119 validate : `bool`, optional
1120 If `True` required keys will be checked to ensure configuration
1121 consistency.
1122 mergeDefaults : `bool`, optional
1123 If `True` defaults will be read and the supplied config will
1124 be combined with the defaults, with the supplied values taking
1125 precedence.
1126 searchPaths : `list` or `tuple`, optional
1127 Explicit additional paths to search for defaults. They should
1128 be supplied in priority order. These paths have higher priority
1129 than those read from the environment in
1130 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1131 the local file system or URIs, `lsst.resources.ResourcePath`.
1132 """
1134 component: ClassVar[str | None] = None
1135 """Component to use from supplied config. Can be None. If specified the
1136 key is not required. Can be a full dot-separated path to a component.
1137 """
1139 requiredKeys: ClassVar[Sequence[str]] = ()
1140 """Keys that are required to be specified in the configuration.
1141 """
1143 defaultConfigFile: ClassVar[str | None] = None
1144 """Name of the file containing defaults for this config class.
1145 """
1147 def __init__(
1148 self,
1149 other: Config | ResourcePathExpression | Mapping[str, Any] | None = None,
1150 validate: bool = True,
1151 mergeDefaults: bool = True,
1152 searchPaths: Sequence[ResourcePathExpression] | None = None,
1153 ):
1154 # Create a blank object to receive the defaults
1155 # Once we have the defaults we then update with the external values
1156 super().__init__()
1158 # Create a standard Config rather than subset
1159 externalConfig = Config(other)
1161 # Select the part we need from it
1162 # To simplify the use of !include we also check for the existence of
1163 # component.component (since the included files can themselves
1164 # include the component name)
1165 if self.component is not None: 1165 ↛ 1174line 1165 didn't jump to line 1174, because the condition on line 1165 was never false
1166 doubled = (self.component, self.component)
1167 # Must check for double depth first
1168 if doubled in externalConfig: 1168 ↛ 1169line 1168 didn't jump to line 1169, because the condition on line 1168 was never true
1169 externalConfig = externalConfig[doubled]
1170 elif self.component in externalConfig:
1171 externalConfig._data = externalConfig._data[self.component]
1173 # Default files read to create this configuration
1174 self.filesRead: list[ResourcePath | str] = []
1176 # Assume we are not looking up child configurations
1177 containerKey = None
1179 # Sometimes we do not want to merge with defaults.
1180 if mergeDefaults:
1181 # Supplied search paths have highest priority
1182 fullSearchPath: list[ResourcePath | str] = []
1183 if searchPaths: 1183 ↛ 1184line 1183 didn't jump to line 1184, because the condition on line 1183 was never true
1184 fullSearchPath = [ResourcePath(path) for path in searchPaths]
1186 # Read default paths from environment
1187 fullSearchPath.extend(self.defaultSearchPaths())
1189 # There are two places to find defaults for this particular config
1190 # - The "defaultConfigFile" defined in the subclass
1191 # - The class specified in the "cls" element in the config.
1192 # Read cls after merging in case it changes.
1193 if self.defaultConfigFile is not None: 1193 ↛ 1198line 1193 didn't jump to line 1198, because the condition on line 1193 was never false
1194 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1196 # Can have a class specification in the external config (priority)
1197 # or from the defaults.
1198 pytype = None
1199 if "cls" in externalConfig: 1199 ↛ 1200line 1199 didn't jump to line 1200, because the condition on line 1199 was never true
1200 pytype = externalConfig["cls"]
1201 elif "cls" in self: 1201 ↛ 1202line 1201 didn't jump to line 1202, because the condition on line 1201 was never true
1202 pytype = self["cls"]
1204 if pytype is not None: 1204 ↛ 1205line 1204 didn't jump to line 1205, because the condition on line 1204 was never true
1205 try:
1206 cls = doImportType(pytype)
1207 except ImportError as e:
1208 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1209 # The class referenced from the config file is not required
1210 # to specify a default config file.
1211 defaultsFile = getattr(cls, "defaultConfigFile", None)
1212 if defaultsFile is not None:
1213 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1215 # Get the container key in case we need it and it is specified.
1216 containerKey = getattr(cls, "containerKey", None)
1218 # Now update this object with the external values so that the external
1219 # values always override the defaults
1220 self.update(externalConfig)
1221 if not self.configFile: 1221 ↛ 1227line 1221 didn't jump to line 1227, because the condition on line 1221 was never false
1222 self.configFile = externalConfig.configFile
1224 # If this configuration has child configurations of the same
1225 # config class, we need to expand those defaults as well.
1227 if mergeDefaults and containerKey is not None and containerKey in self: 1227 ↛ 1228line 1227 didn't jump to line 1228, because the condition on line 1227 was never true
1228 for idx, subConfig in enumerate(self[containerKey]):
1229 self[containerKey, idx] = type(self)(
1230 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1231 )
1233 if validate:
1234 self.validate()
1236 @classmethod
1237 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1238 """Read environment to determine search paths to use.
1240 Global defaults, at lowest priority, are found in the ``config``
1241 directory of the butler source tree. Additional defaults can be
1242 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1243 which is a PATH-like variable where paths at the front of the list
1244 have priority over those later.
1246 Returns
1247 -------
1248 paths : `list`
1249 Returns a list of paths to search. The returned order is in
1250 priority with the highest priority paths first. The butler config
1251 configuration resources will not be included here but will
1252 always be searched last.
1254 Notes
1255 -----
1256 The environment variable is split on the standard ``:`` path separator.
1257 This currently makes it incompatible with usage of URIs.
1258 """
1259 # We can pick up defaults from multiple search paths
1260 # We fill defaults by using the butler config path and then
1261 # the config path environment variable in reverse order.
1262 defaultsPaths: list[str | ResourcePath] = []
1264 if CONFIG_PATH in os.environ: 1264 ↛ 1265line 1264 didn't jump to line 1265, because the condition on line 1264 was never true
1265 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1266 defaultsPaths.extend(externalPaths)
1268 # Add the package defaults as a resource
1269 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1270 return defaultsPaths
1272 def _updateWithConfigsFromPath(
1273 self, searchPaths: Sequence[str | ResourcePath], configFile: ResourcePath | str
1274 ) -> None:
1275 """Search the supplied paths, merging the configuration values.
1277 The values read will override values currently stored in the object.
1278 Every file found in the path will be read, such that the earlier
1279 path entries have higher priority.
1281 Parameters
1282 ----------
1283 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1284 Paths to search for the supplied configFile. This path
1285 is the priority order, such that files read from the
1286 first path entry will be selected over those read from
1287 a later path. Can contain `str` referring to the local file
1288 system or a URI string.
1289 configFile : `lsst.resources.ResourcePath`
1290 File to locate in path. If absolute path it will be read
1291 directly and the search path will not be used. Can be a URI
1292 to an explicit resource (which will ignore the search path)
1293 which is assumed to exist.
1294 """
1295 uri = ResourcePath(configFile)
1296 if uri.isabs() and uri.exists(): 1296 ↛ 1298line 1296 didn't jump to line 1298, because the condition on line 1296 was never true
1297 # Assume this resource exists
1298 self._updateWithOtherConfigFile(configFile)
1299 self.filesRead.append(configFile)
1300 else:
1301 # Reverse order so that high priority entries
1302 # update the object last.
1303 for pathDir in reversed(searchPaths):
1304 if isinstance(pathDir, (str, ResourcePath)): 1304 ↛ 1311line 1304 didn't jump to line 1311, because the condition on line 1304 was never false
1305 pathDir = ResourcePath(pathDir, forceDirectory=True)
1306 file = pathDir.join(configFile)
1307 if file.exists(): 1307 ↛ 1303line 1307 didn't jump to line 1303, because the condition on line 1307 was never false
1308 self.filesRead.append(file)
1309 self._updateWithOtherConfigFile(file)
1310 else:
1311 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1313 def _updateWithOtherConfigFile(self, file: Config | str | ResourcePath | Mapping[str, Any]) -> None:
1314 """Read in some defaults and update.
1316 Update the configuration by reading the supplied file as a config
1317 of this class, and merging such that these values override the
1318 current values. Contents of the external config are not validated.
1320 Parameters
1321 ----------
1322 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1323 Entity that can be converted to a `ConfigSubset`.
1324 """
1325 # Use this class to read the defaults so that subsetting can happen
1326 # correctly.
1327 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1328 self.update(externalConfig)
1330 def validate(self) -> None:
1331 """Check that mandatory keys are present in this configuration.
1333 Ignored if ``requiredKeys`` is empty.
1334 """
1335 # Validation
1336 missing = [k for k in self.requiredKeys if k not in self._data]
1337 if missing: 1337 ↛ 1338line 1337 didn't jump to line 1338, because the condition on line 1337 was never true
1338 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")