Coverage for python/lsst/daf/butler/core/config.py: 44%
487 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-31 02:41 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28import copy
29import io
30import json
31import logging
32import os
33import pprint
34import sys
35from collections import defaultdict
36from collections.abc import Iterable, Mapping, MutableMapping, Sequence
37from pathlib import Path
38from typing import IO, TYPE_CHECKING, Any, ClassVar, Iterator, cast
40import yaml
41from lsst.resources import ResourcePath, ResourcePathExpression
42from lsst.utils import doImport
43from yaml.representer import Representer
45yaml.add_representer(defaultdict, Representer.represent_dict)
48# Config module logger
49log = logging.getLogger(__name__)
51# PATH-like environment variable to use for defaults.
52CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
54if TYPE_CHECKING:
55 yamlLoader = yaml.SafeLoader
56else:
57 try:
58 yamlLoader = yaml.CSafeLoader
59 except AttributeError:
60 # Not all installations have the C library
61 # (but assume for mypy's sake that they're the same)
62 yamlLoader = yaml.SafeLoader
65def _doUpdate(d: Mapping[str, Any], u: Mapping[str, Any]) -> Mapping[str, Any]:
66 if not isinstance(u, Mapping) or not isinstance(d, MutableMapping): 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
68 for k, v in u.items():
69 if isinstance(v, Mapping):
70 d[k] = _doUpdate(d.get(k, {}), v)
71 else:
72 d[k] = v
73 return d
76def _checkNextItem(k: str | int, d: Any, create: bool, must_be_dict: bool) -> tuple[Any, bool]:
77 """See if k is in d and if it is return the new child."""
78 nextVal = None
79 isThere = False
80 if d is None: 80 ↛ 82line 80 didn't jump to line 82, because the condition on line 80 was never true
81 # We have gone past the end of the hierarchy
82 pass
83 elif not must_be_dict and isinstance(d, Sequence): 83 ↛ 88line 83 didn't jump to line 88, because the condition on line 83 was never true
84 # Check for Sequence first because for lists
85 # __contains__ checks whether value is found in list
86 # not whether the index exists in list. When we traverse
87 # the hierarchy we are interested in the index.
88 try:
89 nextVal = d[int(k)]
90 isThere = True
91 except IndexError:
92 pass
93 except ValueError:
94 isThere = k in d
95 elif k in d:
96 nextVal = d[k]
97 isThere = True
98 elif create: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 d[k] = {}
100 nextVal = d[k]
101 isThere = True
103 return nextVal, isThere
106class Loader(yamlLoader):
107 """YAML Loader that supports file include directives.
109 Uses ``!include`` directive in a YAML file to point to another
110 YAML file to be included. The path in the include directive is relative
111 to the file containing that directive.
113 storageClasses: !include storageClasses.yaml
115 Examples
116 --------
117 >>> with open("document.yaml", "r") as f:
118 data = yaml.load(f, Loader=Loader)
120 Notes
121 -----
122 See https://davidchall.github.io/yaml-includes.html
123 """
125 def __init__(self, stream: str | IO): # types-PyYAML annotates 'stream' with a private type
126 super().__init__(stream)
127 # if this is a string and not a stream we may well lack a name
128 if hasattr(stream, "name"): 128 ↛ 132line 128 didn't jump to line 132, because the condition on line 128 was never false
129 self._root = ResourcePath(stream.name)
130 else:
131 # No choice but to assume a local filesystem
132 self._root = ResourcePath("no-file.yaml")
133 self.add_constructor("!include", Loader.include)
135 def include(self, node: yaml.Node) -> list[Any] | dict[str, Any]:
136 result: list[Any] | dict[str, Any]
137 if isinstance(node, yaml.ScalarNode):
138 return self.extractFile(self.construct_scalar(node)) # type: ignore[arg-type]
140 elif isinstance(node, yaml.SequenceNode):
141 result = []
142 for filename in self.construct_sequence(node):
143 result.append(self.extractFile(filename))
144 return result
146 elif isinstance(node, yaml.MappingNode):
147 result = {}
148 for k, v in self.construct_mapping(node).items():
149 if not isinstance(k, str):
150 raise TypeError(f"Expected only strings in YAML mapping; got {k!r} of type {type(k)}.")
151 result[k] = self.extractFile(v)
152 return result
154 else:
155 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
156 raise yaml.constructor.ConstructorError
158 def extractFile(self, filename: str) -> Any:
159 # It is possible for the !include to point to an explicit URI
160 # instead of a relative URI, therefore we first see if it is
161 # scheme-less or not. If it has a scheme we use it directly
162 # if it is scheme-less we use it relative to the file root.
163 requesteduri = ResourcePath(filename, forceAbsolute=False)
165 if requesteduri.scheme:
166 fileuri = requesteduri
167 else:
168 fileuri = self._root.updatedFile(filename)
170 log.debug("Opening YAML file via !include: %s", fileuri)
172 # Read all the data from the resource
173 data = fileuri.read()
175 # Store the bytes into a BytesIO so we can attach a .name
176 stream = io.BytesIO(data)
177 stream.name = fileuri.geturl()
178 return yaml.load(stream, Loader)
181# Type of the key used for accessing items in configuration object. It can be
182# a single string as described below or a sequence of srtings and integer
183# indices. Indices are used to access items in sequences stored in config.
184_ConfigKey = str | Sequence[str | int]
187class Config(MutableMapping):
188 r"""Implements a datatype that is used by `Butler` for configuration.
190 It is essentially a `dict` with key/value pairs, including nested dicts
191 (as values). In fact, it can be initialized with a `dict`.
192 This is explained next:
194 Config extends the `dict` api so that hierarchical values may be accessed
195 with delimited notation or as a tuple. If a string is given the delimiter
196 is picked up from the first character in that string. For example,
197 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
198 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
199 If the first character is alphanumeric, no delimiter will be used.
200 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
201 Unicode characters can be used as the delimiter for distinctiveness if
202 required.
204 If a key in the hierarchy starts with a non-alphanumeric character care
205 should be used to ensure that either the tuple interface is used or
206 a distinct delimiter is always given in string form.
208 Finally, the delimiter can be escaped if it is part of a key and also
209 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
210 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
211 always better to use a different delimiter in these cases.
213 Note that adding a multi-level key implicitly creates any nesting levels
214 that do not exist, but removing multi-level keys does not automatically
215 remove empty nesting levels. As a result:
217 >>> c = Config()
218 >>> c[".a.b"] = 1
219 >>> del c[".a.b"]
220 >>> c["a"]
221 Config({'a': {}})
223 Storage formats supported:
225 - yaml: read and write is supported.
226 - json: read and write is supported but no ``!include`` directive.
228 Parameters
229 ----------
230 other : `lsst.resources.ResourcePath` or `Config` or `dict`
231 Other source of configuration, can be:
233 - (`lsst.resources.ResourcePathExpression`)
234 Treated as a URI to a config file. Must end with ".yaml".
235 - (`Config`) Copies the other Config's values into this one.
236 - (`dict`) Copies the values from the dict into this Config.
238 If `None` is provided an empty `Config` will be created.
239 """
241 _D: str = "→"
242 """Default internal delimiter to use for components in the hierarchy when
243 constructing keys for external use (see `Config.names()`)."""
245 includeKey: ClassVar[str] = "includeConfigs"
246 """Key used to indicate that another config should be included at this
247 part of the hierarchy."""
249 resourcesPackage: str = "lsst.daf.butler"
250 """Package to search for default configuration data. The resources
251 themselves will be within a ``configs`` resource hierarchy."""
253 def __init__(self, other: ResourcePathExpression | Config | Mapping[str, Any] | None = None):
254 self._data: dict[str, Any] = {}
255 self.configFile: ResourcePath | None = None
257 if other is None:
258 return
260 if isinstance(other, Config):
261 # Deep copy might be more efficient but if someone has overridden
262 # a config entry to store a complex object then deep copy may
263 # fail. Safer to use update().
264 self.update(other._data)
265 self.configFile = other.configFile
266 elif isinstance(other, (dict, Mapping)):
267 # In most cases we have a dict, and it's more efficient
268 # to check for a dict instance before checking the generic mapping.
269 self.update(other)
270 elif isinstance(other, (str, ResourcePath, Path)): 270 ↛ 277line 270 didn't jump to line 277, because the condition on line 270 was never false
271 # if other is a string, assume it is a file path/URI
272 self.__initFromUri(other)
273 self._processExplicitIncludes()
274 else:
275 # if the config specified by other could not be recognized raise
276 # a runtime error.
277 raise RuntimeError(f"A Config could not be loaded from other: {other}")
279 def ppprint(self) -> str:
280 """Return config as formatted readable string.
282 Examples
283 --------
284 use: ``pdb> print(myConfigObject.ppprint())``
286 Returns
287 -------
288 s : `str`
289 A prettyprint formatted string representing the config
290 """
291 return pprint.pformat(self._data, indent=2, width=1)
293 def __repr__(self) -> str:
294 return f"{type(self).__name__}({self._data!r})"
296 def __str__(self) -> str:
297 return self.ppprint()
299 def __len__(self) -> int:
300 return len(self._data)
302 def __iter__(self) -> Iterator[str]:
303 return iter(self._data)
305 def copy(self) -> Config:
306 return type(self)(self)
308 @classmethod
309 def fromString(cls, string: str, format: str = "yaml") -> Config:
310 """Create a new Config instance from a serialized string.
312 Parameters
313 ----------
314 string : `str`
315 String containing content in specified format
316 format : `str`, optional
317 Format of the supplied string. Can be ``json`` or ``yaml``.
319 Returns
320 -------
321 c : `Config`
322 Newly-constructed Config.
323 """
324 if format == "yaml":
325 new_config = cls().__initFromYaml(string)
326 elif format == "json":
327 new_config = cls().__initFromJson(string)
328 else:
329 raise ValueError(f"Unexpected format of string: {format}")
330 new_config._processExplicitIncludes()
331 return new_config
333 @classmethod
334 def fromYaml(cls, string: str) -> Config:
335 """Create a new Config instance from a YAML string.
337 Parameters
338 ----------
339 string : `str`
340 String containing content in YAML format
342 Returns
343 -------
344 c : `Config`
345 Newly-constructed Config.
346 """
347 return cls.fromString(string, format="yaml")
349 def __initFromUri(self, path: ResourcePathExpression) -> None:
350 """Load a file from a path or an URI.
352 Parameters
353 ----------
354 path : `lsst.resources.ResourcePathExpression`
355 Path or a URI to a persisted config file.
356 """
357 uri = ResourcePath(path)
358 ext = uri.getExtension()
359 if ext == ".yaml": 359 ↛ 366line 359 didn't jump to line 366, because the condition on line 359 was never false
360 log.debug("Opening YAML config file: %s", uri.geturl())
361 content = uri.read()
362 # Use a stream so we can name it
363 stream = io.BytesIO(content)
364 stream.name = uri.geturl()
365 self.__initFromYaml(stream)
366 elif ext == ".json":
367 log.debug("Opening JSON config file: %s", uri.geturl())
368 content = uri.read()
369 self.__initFromJson(content)
370 else:
371 # This URI does not have a valid extension. It might be because
372 # we ended up with a directory and not a file. Before we complain
373 # about an extension, do an existence check. No need to do
374 # the (possibly expensive) existence check in the default code
375 # path above because we will find out soon enough that the file
376 # is not there.
377 if not uri.exists():
378 raise FileNotFoundError(f"Config location {uri} does not exist.")
379 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
380 self.configFile = uri
382 def __initFromYaml(self, stream: IO | str | bytes) -> Config:
383 """Load a YAML config from any readable stream that contains one.
385 Parameters
386 ----------
387 stream: `IO` or `str`
388 Stream to pass to the YAML loader. Accepts anything that
389 `yaml.load` accepts. This can include a string as well as an
390 IO stream.
392 Raises
393 ------
394 yaml.YAMLError
395 If there is an error loading the file.
396 """
397 content = yaml.load(stream, Loader=Loader)
398 if content is None: 398 ↛ 399line 398 didn't jump to line 399, because the condition on line 398 was never true
399 content = {}
400 self._data = content
401 return self
403 def __initFromJson(self, stream: IO | str | bytes) -> Config:
404 """Load a JSON config from any readable stream that contains one.
406 Parameters
407 ----------
408 stream: `IO` or `str`
409 Stream to pass to the JSON loader. This can include a string as
410 well as an IO stream.
412 Raises
413 ------
414 TypeError:
415 Raised if there is an error loading the content.
416 """
417 if isinstance(stream, (bytes, str)):
418 content = json.loads(stream)
419 else:
420 content = json.load(stream)
421 if content is None:
422 content = {}
423 self._data = content
424 return self
426 def _processExplicitIncludes(self) -> None:
427 """Scan through the configuration searching for the special includes.
429 Looks for ``includeConfigs`` directive and processes the includes.
430 """
431 # Search paths for config files
432 searchPaths = [ResourcePath(os.path.curdir, forceDirectory=True)]
433 if self.configFile is not None: 433 ↛ 441line 433 didn't jump to line 441, because the condition on line 433 was never false
434 if isinstance(self.configFile, ResourcePath): 434 ↛ 437line 434 didn't jump to line 437, because the condition on line 434 was never false
435 configDir = self.configFile.dirname()
436 else:
437 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
438 searchPaths.append(configDir)
440 # Ensure we know what delimiter to use
441 names = self.nameTuples()
442 for path in names:
443 if path[-1] == self.includeKey: 443 ↛ 444line 443 didn't jump to line 444, because the condition on line 443 was never true
444 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
445 basePath = path[:-1]
447 # Extract the includes and then delete them from the config
448 includes = self[path]
449 del self[path]
451 # Be consistent and convert to a list
452 if not isinstance(includes, list):
453 includes = [includes]
455 # Read each file assuming it is a reference to a file
456 # The file can be relative to config file or cwd
457 # ConfigSubset search paths are not used
458 subConfigs = []
459 for fileName in includes:
460 # Expand any shell variables -- this could be URI
461 fileName = ResourcePath(os.path.expandvars(fileName), forceAbsolute=False)
462 found = None
463 if fileName.isabs():
464 found = fileName
465 else:
466 for dir in searchPaths:
467 specific = dir.join(fileName.path)
468 # Remote resource check might be expensive
469 if specific.exists():
470 found = specific
471 break
472 if not found:
473 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
475 # Read the referenced Config as a Config
476 subConfigs.append(type(self)(found))
478 # Now we need to merge these sub configs with the current
479 # information that was present in this node in the config
480 # tree with precedence given to the explicit values
481 newConfig = subConfigs.pop(0)
482 for sc in subConfigs:
483 newConfig.update(sc)
485 # Explicit values take precedence
486 if not basePath:
487 # This is an include at the root config
488 newConfig.update(self)
489 # Replace the current config
490 self._data = newConfig._data
491 else:
492 newConfig.update(self[basePath])
493 # And reattach to the base config
494 self[basePath] = newConfig
496 @staticmethod
497 def _splitIntoKeys(key: _ConfigKey) -> list[str | int]:
498 r"""Split the argument for get/set/in into a hierarchical list.
500 Parameters
501 ----------
502 key : `str` or iterable
503 Argument given to get/set/in. If an iterable is provided it will
504 be converted to a list. If the first character of the string
505 is not an alphanumeric character then it will be used as the
506 delimiter for the purposes of splitting the remainder of the
507 string. If the delimiter is also in one of the keys then it
508 can be escaped using ``\``. There is no default delimiter.
510 Returns
511 -------
512 keys : `list`
513 Hierarchical keys as a `list`.
514 """
515 if isinstance(key, str):
516 if not key[0].isalnum(): 516 ↛ 517line 516 didn't jump to line 517, because the condition on line 516 was never true
517 d = key[0]
518 key = key[1:]
519 else:
520 return [
521 key,
522 ]
523 escaped = f"\\{d}"
524 temp = None
525 if escaped in key:
526 # Complain at the attempt to escape the escape
527 doubled = rf"\{escaped}"
528 if doubled in key:
529 raise ValueError(
530 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
531 )
532 # Replace with a character that won't be in the string
533 temp = "\r"
534 if temp in key or d == temp:
535 raise ValueError(
536 f"Can not use character {temp!r} in hierarchical key or as"
537 " delimiter if escaping the delimiter"
538 )
539 key = key.replace(escaped, temp)
540 hierarchy = key.split(d)
541 if temp:
542 hierarchy = [h.replace(temp, d) for h in hierarchy]
543 # Copy the list to keep mypy quiet.
544 return list(hierarchy)
545 elif isinstance(key, Iterable): 545 ↛ 549line 545 didn't jump to line 549, because the condition on line 545 was never false
546 return list(key)
547 else:
548 # Do not try to guess.
549 raise TypeError(f"Provided key [{key}] neither str nor iterable.")
551 def _getKeyHierarchy(self, name: _ConfigKey) -> list[str | int]:
552 """Retrieve the key hierarchy for accessing the Config.
554 Parameters
555 ----------
556 name : `str` or `tuple`
557 Delimited string or `tuple` of hierarchical keys.
559 Returns
560 -------
561 hierarchy : `list` of `str`
562 Hierarchy to use as a `list`. If the name is available directly
563 as a key in the Config it will be used regardless of the presence
564 of any nominal delimiter.
565 """
566 keys: list[str | int]
567 if name in self._data:
568 keys = [cast(str, name)]
569 else:
570 keys = self._splitIntoKeys(name)
571 return keys
573 def _findInHierarchy(self, keys: Sequence[str | int], create: bool = False) -> tuple[list[Any], bool]:
574 """Look for hierarchy of keys in Config.
576 Parameters
577 ----------
578 keys : `list` or `tuple`
579 Keys to search in hierarchy.
580 create : `bool`, optional
581 If `True`, if a part of the hierarchy does not exist, insert an
582 empty `dict` into the hierarchy.
584 Returns
585 -------
586 hierarchy : `list`
587 List of the value corresponding to each key in the supplied
588 hierarchy. Only keys that exist in the hierarchy will have
589 a value.
590 complete : `bool`
591 `True` if the full hierarchy exists and the final element
592 in ``hierarchy`` is the value of relevant value.
593 """
594 d: Any = self._data
596 # For the first key, d must be a dict so it is a waste
597 # of time to check for a sequence.
598 must_be_dict = True
600 hierarchy = []
601 complete = True
602 for k in keys:
603 d, isThere = _checkNextItem(k, d, create, must_be_dict)
604 if isThere:
605 hierarchy.append(d)
606 else:
607 complete = False
608 break
609 # Second time round it might be a sequence.
610 must_be_dict = False
612 return hierarchy, complete
614 def __getitem__(self, name: _ConfigKey) -> Any:
615 # Override the split for the simple case where there is an exact
616 # match. This allows `Config.items()` to work via a simple
617 # __iter__ implementation that returns top level keys of
618 # self._data.
620 # If the name matches a key in the top-level hierarchy, bypass
621 # all further cleverness.
622 found_directly = False
623 try:
624 if isinstance(name, str): 624 ↛ 630line 624 didn't jump to line 630, because the condition on line 624 was never false
625 data = self._data[name]
626 found_directly = True
627 except KeyError:
628 pass
630 if not found_directly: 630 ↛ 631line 630 didn't jump to line 631, because the condition on line 630 was never true
631 keys = self._getKeyHierarchy(name)
633 hierarchy, complete = self._findInHierarchy(keys)
634 if not complete:
635 raise KeyError(f"{name} not found")
636 data = hierarchy[-1]
638 # In most cases we have a dict, and it's more efficient
639 # to check for a dict instance before checking the generic mapping.
640 if isinstance(data, (dict, Mapping)):
641 data = Config(data)
642 # Ensure that child configs inherit the parent internal delimiter
643 if self._D != Config._D: 643 ↛ 644line 643 didn't jump to line 644, because the condition on line 643 was never true
644 data._D = self._D
645 return data
647 def __setitem__(self, name: _ConfigKey, value: Any) -> None:
648 keys = self._getKeyHierarchy(name)
649 last = keys.pop()
650 if isinstance(value, Config):
651 value = copy.deepcopy(value._data)
653 hierarchy, complete = self._findInHierarchy(keys, create=True)
654 if hierarchy:
655 data = hierarchy[-1]
656 else:
657 data = self._data
659 try:
660 data[last] = value
661 except TypeError:
662 data[int(last)] = value
664 def __contains__(self, key: Any) -> bool:
665 if not isinstance(key, str | Sequence): 665 ↛ 666line 665 didn't jump to line 666, because the condition on line 665 was never true
666 return False
667 keys = self._getKeyHierarchy(key)
668 hierarchy, complete = self._findInHierarchy(keys)
669 return complete
671 def __delitem__(self, key: str | Sequence[str]) -> None:
672 keys = self._getKeyHierarchy(key)
673 last = keys.pop()
674 hierarchy, complete = self._findInHierarchy(keys)
675 if complete: 675 ↛ 682line 675 didn't jump to line 682, because the condition on line 675 was never false
676 if hierarchy: 676 ↛ 677line 676 didn't jump to line 677, because the condition on line 676 was never true
677 data = hierarchy[-1]
678 else:
679 data = self._data
680 del data[last]
681 else:
682 raise KeyError(f"{key} not found in Config")
684 def update(self, other: Mapping[str, Any]) -> None: # type: ignore[override]
685 """Update config from other `Config` or `dict`.
687 Like `dict.update()`, but will add or modify keys in nested dicts,
688 instead of overwriting the nested dict entirely.
690 Parameters
691 ----------
692 other : `dict` or `Config`
693 Source of configuration:
695 Examples
696 --------
697 >>> c = Config({"a": {"b": 1}})
698 >>> c.update({"a": {"c": 2}})
699 >>> print(c)
700 {'a': {'b': 1, 'c': 2}}
702 >>> foo = {"a": {"b": 1}}
703 >>> foo.update({"a": {"c": 2}})
704 >>> print(foo)
705 {'a': {'c': 2}}
706 """
707 _doUpdate(self._data, other)
709 def merge(self, other: Mapping) -> None:
710 """Merge another Config into this one.
712 Like `Config.update()`, but will add keys & values from other that
713 DO NOT EXIST in self.
715 Keys and values that already exist in self will NOT be overwritten.
717 Parameters
718 ----------
719 other : `dict` or `Config`
720 Source of configuration:
721 """
722 if not isinstance(other, Mapping):
723 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
725 # Convert the supplied mapping to a Config for consistency
726 # This will do a deepcopy if it is already a Config
727 otherCopy = Config(other)
728 otherCopy.update(self)
729 self._data = otherCopy._data
731 def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
732 """Get tuples representing the name hierarchies of all keys.
734 The tuples returned from this method are guaranteed to be usable
735 to access items in the configuration object.
737 Parameters
738 ----------
739 topLevelOnly : `bool`, optional
740 If False, the default, a full hierarchy of names is returned.
741 If True, only the top level are returned.
743 Returns
744 -------
745 names : `list` of `tuple` of `str`
746 List of all names present in the `Config` where each element
747 in the list is a `tuple` of strings representing the hierarchy.
748 """
749 if topLevelOnly: 749 ↛ 750line 749 didn't jump to line 750, because the condition on line 749 was never true
750 return list((k,) for k in self)
752 def getKeysAsTuples(
753 d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None
754 ) -> None:
755 if isinstance(d, Sequence):
756 theseKeys: Iterable[Any] = range(len(d))
757 else:
758 theseKeys = d.keys()
759 for key in theseKeys:
760 val = d[key]
761 levelKey = base + (key,) if base is not None else (key,)
762 keys.append(levelKey)
763 if isinstance(val, (Mapping, Sequence)) and not isinstance(val, str):
764 getKeysAsTuples(val, keys, levelKey)
766 keys: list[tuple[str, ...]] = []
767 getKeysAsTuples(self._data, keys, None)
768 return keys
770 def names(self, topLevelOnly: bool = False, delimiter: str | None = None) -> list[str]:
771 """Get a delimited name of all the keys in the hierarchy.
773 The values returned from this method are guaranteed to be usable
774 to access items in the configuration object.
776 Parameters
777 ----------
778 topLevelOnly : `bool`, optional
779 If False, the default, a full hierarchy of names is returned.
780 If True, only the top level are returned.
781 delimiter : `str`, optional
782 Delimiter to use when forming the keys. If the delimiter is
783 present in any of the keys, it will be escaped in the returned
784 names. If `None` given a delimiter will be automatically provided.
785 The delimiter can not be alphanumeric.
787 Returns
788 -------
789 names : `list` of `str`
790 List of all names present in the `Config`.
792 Notes
793 -----
794 This is different than the built-in method `dict.keys`, which will
795 return only the first level keys.
797 Raises
798 ------
799 ValueError:
800 The supplied delimiter is alphanumeric.
801 """
802 if topLevelOnly:
803 return list(self.keys())
805 # Get all the tuples of hierarchical keys
806 nameTuples = self.nameTuples()
808 if delimiter is not None and delimiter.isalnum():
809 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
811 if delimiter is None:
812 # Start with something, and ensure it does not need to be
813 # escaped (it is much easier to understand if not escaped)
814 delimiter = self._D
816 # Form big string for easy check of delimiter clash
817 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
819 # Try a delimiter and keep trying until we get something that
820 # works.
821 ntries = 0
822 while delimiter in combined:
823 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
824 ntries += 1
826 if ntries > 100:
827 raise ValueError(f"Unable to determine a delimiter for Config {self}")
829 # try another one
830 while True:
831 delimiter = chr(ord(delimiter) + 1)
832 if not delimiter.isalnum():
833 break
835 log.debug("Using delimiter %r", delimiter)
837 # Form the keys, escaping the delimiter if necessary
838 strings = [
839 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
840 for k in nameTuples
841 ]
842 return strings
844 def asArray(self, name: str | Sequence[str]) -> Sequence[Any]:
845 """Get a value as an array.
847 May contain one or more elements.
849 Parameters
850 ----------
851 name : `str`
852 Key to use to retrieve value.
854 Returns
855 -------
856 array : `collections.abc.Sequence`
857 The value corresponding to name, but guaranteed to be returned
858 as a list with at least one element. If the value is a
859 `~collections.abc.Sequence` (and not a `str`) the value itself
860 will be returned, else the value will be the first element.
861 """
862 val = self.get(name)
863 if isinstance(val, str):
864 val = [val]
865 elif not isinstance(val, Sequence):
866 val = [val]
867 return val
869 def __eq__(self, other: Any) -> bool:
870 if isinstance(other, Config):
871 other = other._data
872 return self._data == other
874 def __ne__(self, other: Any) -> bool:
875 if isinstance(other, Config):
876 other = other._data
877 return self._data != other
879 #######
880 # i/o #
882 def dump(self, output: IO | None = None, format: str = "yaml") -> str | None:
883 """Write the config to an output stream.
885 Parameters
886 ----------
887 output : `IO`, optional
888 The stream to use for output. If `None` the serialized content
889 will be returned.
890 format : `str`, optional
891 The format to use for the output. Can be "yaml" or "json".
893 Returns
894 -------
895 serialized : `str` or `None`
896 If a stream was given the stream will be used and the return
897 value will be `None`. If the stream was `None` the
898 serialization will be returned as a string.
899 """
900 if format == "yaml":
901 return yaml.safe_dump(self._data, output, default_flow_style=False)
902 elif format == "json":
903 if output is not None:
904 json.dump(self._data, output, ensure_ascii=False)
905 return None
906 else:
907 return json.dumps(self._data, ensure_ascii=False)
908 raise ValueError(f"Unsupported format for Config serialization: {format}")
910 def dumpToUri(
911 self,
912 uri: ResourcePathExpression,
913 updateFile: bool = True,
914 defaultFileName: str = "butler.yaml",
915 overwrite: bool = True,
916 ) -> None:
917 """Write the config to location pointed to by given URI.
919 Currently supports 's3' and 'file' URI schemes.
921 Parameters
922 ----------
923 uri: `lsst.resources.ResourcePathExpression`
924 URI of location where the Config will be written.
925 updateFile : bool, optional
926 If True and uri does not end on a filename with extension, will
927 append `defaultFileName` to the target uri. True by default.
928 defaultFileName : bool, optional
929 The file name that will be appended to target uri if updateFile is
930 True and uri does not end on a file with an extension.
931 overwrite : bool, optional
932 If True the configuration will be written even if it already
933 exists at that location.
934 """
935 # Make local copy of URI or create new one
936 uri = ResourcePath(uri)
938 if updateFile and not uri.getExtension():
939 uri = uri.updatedFile(defaultFileName)
941 # Try to work out the format from the extension
942 ext = uri.getExtension()
943 format = ext[1:].lower()
945 output = self.dump(format=format)
946 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
947 uri.write(output.encode(), overwrite=overwrite)
948 self.configFile = uri
950 @staticmethod
951 def updateParameters(
952 configType: type[ConfigSubset],
953 config: Config,
954 full: Config,
955 toUpdate: dict[str, Any] | None = None,
956 toCopy: Sequence[str | Sequence[str]] | None = None,
957 overwrite: bool = True,
958 toMerge: Sequence[str | Sequence[str]] | None = None,
959 ) -> None:
960 """Update specific config parameters.
962 Allows for named parameters to be set to new values in bulk, and
963 for other values to be set by copying from a reference config.
965 Assumes that the supplied config is compatible with ``configType``
966 and will attach the updated values to the supplied config by
967 looking for the related component key. It is assumed that
968 ``config`` and ``full`` are from the same part of the
969 configuration hierarchy.
971 Parameters
972 ----------
973 configType : `ConfigSubset`
974 Config type to use to extract relevant items from ``config``.
975 config : `Config`
976 A `Config` to update. Only the subset understood by
977 the supplied `ConfigSubset` will be modified. Default values
978 will not be inserted and the content will not be validated
979 since mandatory keys are allowed to be missing until
980 populated later by merging.
981 full : `Config`
982 A complete config with all defaults expanded that can be
983 converted to a ``configType``. Read-only and will not be
984 modified by this method. Values are read from here if
985 ``toCopy`` is defined.
987 Repository-specific options that should not be obtained
988 from defaults when Butler instances are constructed
989 should be copied from ``full`` to ``config``.
990 toUpdate : `dict`, optional
991 A `dict` defining the keys to update and the new value to use.
992 The keys and values can be any supported by `Config`
993 assignment.
994 toCopy : `tuple`, optional
995 `tuple` of keys whose values should be copied from ``full``
996 into ``config``.
997 overwrite : `bool`, optional
998 If `False`, do not modify a value in ``config`` if the key
999 already exists. Default is always to overwrite.
1000 toMerge : `tuple`, optional
1001 Keys to merge content from full to config without overwriting
1002 pre-existing values. Only works if the key refers to a hierarchy.
1003 The ``overwrite`` flag is ignored.
1005 Raises
1006 ------
1007 ValueError
1008 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
1009 """
1010 if toUpdate is None and toCopy is None and toMerge is None:
1011 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
1013 # If this is a parent configuration then we need to ensure that
1014 # the supplied config has the relevant component key in it.
1015 # If this is a parent configuration we add in the stub entry
1016 # so that the ConfigSubset constructor will do the right thing.
1017 # We check full for this since that is guaranteed to be complete.
1018 if (
1019 configType.component is not None
1020 and configType.component in full
1021 and configType.component not in config
1022 ):
1023 config[configType.component] = {}
1025 # Extract the part of the config we wish to update
1026 localConfig = configType(config, mergeDefaults=False, validate=False)
1028 key: str | Sequence[str]
1029 if toUpdate:
1030 for key, value in toUpdate.items():
1031 if key in localConfig and not overwrite:
1032 log.debug(
1033 "Not overriding key '%s' with value '%s' in config %s",
1034 key,
1035 value,
1036 localConfig.__class__.__name__,
1037 )
1038 else:
1039 localConfig[key] = value
1041 if toCopy or toMerge:
1042 localFullConfig = configType(full, mergeDefaults=False)
1044 if toCopy:
1045 for key in toCopy:
1046 if key in localConfig and not overwrite:
1047 log.debug(
1048 "Not overriding key '%s' from defaults in config %s",
1049 key,
1050 localConfig.__class__.__name__,
1051 )
1052 else:
1053 localConfig[key] = localFullConfig[key]
1054 if toMerge:
1055 for key in toMerge:
1056 if key in localConfig:
1057 # Get the node from the config to do the merge
1058 # but then have to reattach to the config.
1059 subset = localConfig[key]
1060 subset.merge(localFullConfig[key])
1061 localConfig[key] = subset
1062 else:
1063 localConfig[key] = localFullConfig[key]
1065 # Reattach to parent if this is a child config
1066 if configType.component is not None and configType.component in config:
1067 config[configType.component] = localConfig
1068 else:
1069 config.update(localConfig)
1071 def toDict(self) -> dict[str, Any]:
1072 """Convert a `Config` to a standalone hierarchical `dict`.
1074 Returns
1075 -------
1076 d : `dict`
1077 The standalone hierarchical `dict` with any `Config` classes
1078 in the hierarchy converted to `dict`.
1080 Notes
1081 -----
1082 This can be useful when passing a Config to some code that
1083 expects native Python types.
1084 """
1085 output = copy.deepcopy(self._data)
1086 for k, v in output.items():
1087 if isinstance(v, Config): 1087 ↛ 1088line 1087 didn't jump to line 1088, because the condition on line 1087 was never true
1088 v = v.toDict()
1089 output[k] = v
1090 return output
1093class ConfigSubset(Config):
1094 """Config representing a subset of a more general configuration.
1096 Subclasses define their own component and when given a configuration
1097 that includes that component, the resulting configuration only includes
1098 the subset. For example, your config might contain ``dimensions`` if it's
1099 part of a global config and that subset will be stored. If ``dimensions``
1100 can not be found it is assumed that the entire contents of the
1101 configuration should be used.
1103 Default values are read from the environment or supplied search paths
1104 using the default configuration file name specified in the subclass.
1105 This allows a configuration class to be instantiated without any
1106 additional arguments.
1108 Additional validation can be specified to check for keys that are mandatory
1109 in the configuration.
1111 Parameters
1112 ----------
1113 other : `Config` or `str` or `dict`
1114 Argument specifying the configuration information as understood
1115 by `Config`
1116 validate : `bool`, optional
1117 If `True` required keys will be checked to ensure configuration
1118 consistency.
1119 mergeDefaults : `bool`, optional
1120 If `True` defaults will be read and the supplied config will
1121 be combined with the defaults, with the supplied values taking
1122 precedence.
1123 searchPaths : `list` or `tuple`, optional
1124 Explicit additional paths to search for defaults. They should
1125 be supplied in priority order. These paths have higher priority
1126 than those read from the environment in
1127 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1128 the local file system or URIs, `lsst.resources.ResourcePath`.
1129 """
1131 component: ClassVar[str | None] = None
1132 """Component to use from supplied config. Can be None. If specified the
1133 key is not required. Can be a full dot-separated path to a component.
1134 """
1136 requiredKeys: ClassVar[Sequence[str]] = ()
1137 """Keys that are required to be specified in the configuration.
1138 """
1140 defaultConfigFile: ClassVar[str | None] = None
1141 """Name of the file containing defaults for this config class.
1142 """
1144 def __init__(
1145 self,
1146 other: Config | ResourcePathExpression | Mapping[str, Any] | None = None,
1147 validate: bool = True,
1148 mergeDefaults: bool = True,
1149 searchPaths: Sequence[ResourcePathExpression] | None = None,
1150 ):
1151 # Create a blank object to receive the defaults
1152 # Once we have the defaults we then update with the external values
1153 super().__init__()
1155 # Create a standard Config rather than subset
1156 externalConfig = Config(other)
1158 # Select the part we need from it
1159 # To simplify the use of !include we also check for the existence of
1160 # component.component (since the included files can themselves
1161 # include the component name)
1162 if self.component is not None: 1162 ↛ 1171line 1162 didn't jump to line 1171, because the condition on line 1162 was never false
1163 doubled = (self.component, self.component)
1164 # Must check for double depth first
1165 if doubled in externalConfig: 1165 ↛ 1166line 1165 didn't jump to line 1166, because the condition on line 1165 was never true
1166 externalConfig = externalConfig[doubled]
1167 elif self.component in externalConfig:
1168 externalConfig._data = externalConfig._data[self.component]
1170 # Default files read to create this configuration
1171 self.filesRead: list[ResourcePath | str] = []
1173 # Assume we are not looking up child configurations
1174 containerKey = None
1176 # Sometimes we do not want to merge with defaults.
1177 if mergeDefaults:
1178 # Supplied search paths have highest priority
1179 fullSearchPath: list[ResourcePath | str] = []
1180 if searchPaths: 1180 ↛ 1181line 1180 didn't jump to line 1181, because the condition on line 1180 was never true
1181 fullSearchPath = [ResourcePath(path) for path in searchPaths]
1183 # Read default paths from environment
1184 fullSearchPath.extend(self.defaultSearchPaths())
1186 # There are two places to find defaults for this particular config
1187 # - The "defaultConfigFile" defined in the subclass
1188 # - The class specified in the "cls" element in the config.
1189 # Read cls after merging in case it changes.
1190 if self.defaultConfigFile is not None: 1190 ↛ 1195line 1190 didn't jump to line 1195, because the condition on line 1190 was never false
1191 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1193 # Can have a class specification in the external config (priority)
1194 # or from the defaults.
1195 pytype = None
1196 if "cls" in externalConfig: 1196 ↛ 1197line 1196 didn't jump to line 1197, because the condition on line 1196 was never true
1197 pytype = externalConfig["cls"]
1198 elif "cls" in self: 1198 ↛ 1199line 1198 didn't jump to line 1199, because the condition on line 1198 was never true
1199 pytype = self["cls"]
1201 if pytype is not None: 1201 ↛ 1202line 1201 didn't jump to line 1202, because the condition on line 1201 was never true
1202 try:
1203 cls = doImport(pytype)
1204 except ImportError as e:
1205 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1206 defaultsFile = cls.defaultConfigFile
1207 if defaultsFile is not None:
1208 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1210 # Get the container key in case we need it
1211 try:
1212 containerKey = cls.containerKey
1213 except AttributeError:
1214 pass
1216 # Now update this object with the external values so that the external
1217 # values always override the defaults
1218 self.update(externalConfig)
1219 if not self.configFile: 1219 ↛ 1225line 1219 didn't jump to line 1225, because the condition on line 1219 was never false
1220 self.configFile = externalConfig.configFile
1222 # If this configuration has child configurations of the same
1223 # config class, we need to expand those defaults as well.
1225 if mergeDefaults and containerKey is not None and containerKey in self: 1225 ↛ 1226line 1225 didn't jump to line 1226, because the condition on line 1225 was never true
1226 for idx, subConfig in enumerate(self[containerKey]):
1227 self[containerKey, idx] = type(self)(
1228 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1229 )
1231 if validate:
1232 self.validate()
1234 @classmethod
1235 def defaultSearchPaths(cls) -> list[ResourcePath | str]:
1236 """Read environment to determine search paths to use.
1238 Global defaults, at lowest priority, are found in the ``config``
1239 directory of the butler source tree. Additional defaults can be
1240 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1241 which is a PATH-like variable where paths at the front of the list
1242 have priority over those later.
1244 Returns
1245 -------
1246 paths : `list`
1247 Returns a list of paths to search. The returned order is in
1248 priority with the highest priority paths first. The butler config
1249 configuration resources will not be included here but will
1250 always be searched last.
1252 Notes
1253 -----
1254 The environment variable is split on the standard ``:`` path separator.
1255 This currently makes it incompatible with usage of URIs.
1256 """
1257 # We can pick up defaults from multiple search paths
1258 # We fill defaults by using the butler config path and then
1259 # the config path environment variable in reverse order.
1260 defaultsPaths: list[str | ResourcePath] = []
1262 if CONFIG_PATH in os.environ: 1262 ↛ 1263line 1262 didn't jump to line 1263, because the condition on line 1262 was never true
1263 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1264 defaultsPaths.extend(externalPaths)
1266 # Add the package defaults as a resource
1267 defaultsPaths.append(ResourcePath(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1268 return defaultsPaths
1270 def _updateWithConfigsFromPath(
1271 self, searchPaths: Sequence[str | ResourcePath], configFile: ResourcePath | str
1272 ) -> None:
1273 """Search the supplied paths, merging the configuration values.
1275 The values read will override values currently stored in the object.
1276 Every file found in the path will be read, such that the earlier
1277 path entries have higher priority.
1279 Parameters
1280 ----------
1281 searchPaths : `list` of `lsst.resources.ResourcePath`, `str`
1282 Paths to search for the supplied configFile. This path
1283 is the priority order, such that files read from the
1284 first path entry will be selected over those read from
1285 a later path. Can contain `str` referring to the local file
1286 system or a URI string.
1287 configFile : `lsst.resources.ResourcePath`
1288 File to locate in path. If absolute path it will be read
1289 directly and the search path will not be used. Can be a URI
1290 to an explicit resource (which will ignore the search path)
1291 which is assumed to exist.
1292 """
1293 uri = ResourcePath(configFile)
1294 if uri.isabs() and uri.exists(): 1294 ↛ 1296line 1294 didn't jump to line 1296, because the condition on line 1294 was never true
1295 # Assume this resource exists
1296 self._updateWithOtherConfigFile(configFile)
1297 self.filesRead.append(configFile)
1298 else:
1299 # Reverse order so that high priority entries
1300 # update the object last.
1301 for pathDir in reversed(searchPaths):
1302 if isinstance(pathDir, (str, ResourcePath)): 1302 ↛ 1309line 1302 didn't jump to line 1309, because the condition on line 1302 was never false
1303 pathDir = ResourcePath(pathDir, forceDirectory=True)
1304 file = pathDir.join(configFile)
1305 if file.exists(): 1305 ↛ 1301line 1305 didn't jump to line 1301, because the condition on line 1305 was never false
1306 self.filesRead.append(file)
1307 self._updateWithOtherConfigFile(file)
1308 else:
1309 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1311 def _updateWithOtherConfigFile(self, file: Config | str | ResourcePath | Mapping[str, Any]) -> None:
1312 """Read in some defaults and update.
1314 Update the configuration by reading the supplied file as a config
1315 of this class, and merging such that these values override the
1316 current values. Contents of the external config are not validated.
1318 Parameters
1319 ----------
1320 file : `Config`, `str`, `lsst.resources.ResourcePath`, or `dict`
1321 Entity that can be converted to a `ConfigSubset`.
1322 """
1323 # Use this class to read the defaults so that subsetting can happen
1324 # correctly.
1325 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1326 self.update(externalConfig)
1328 def validate(self) -> None:
1329 """Check that mandatory keys are present in this configuration.
1331 Ignored if ``requiredKeys`` is empty.
1332 """
1333 # Validation
1334 missing = [k for k in self.requiredKeys if k not in self._data]
1335 if missing: 1335 ↛ 1336line 1335 didn't jump to line 1336, because the condition on line 1335 was never true
1336 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")