Coverage for python/lsst/daf/butler/core/config.py: 44%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28import collections
29import copy
30import io
31import json
32import logging
33import os
34import pprint
35import sys
36from pathlib import Path
37from typing import IO, Any, ClassVar, Dict, List, Optional, Sequence, Tuple, Union
39import yaml
40from lsst.utils import doImport
41from yaml.representer import Representer
43from ._butlerUri import ButlerURI
45yaml.add_representer(collections.defaultdict, Representer.represent_dict)
48# Config module logger
49log = logging.getLogger(__name__)
51# PATH-like environment variable to use for defaults.
52CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
54try:
55 yamlLoader = yaml.CSafeLoader
56except AttributeError:
57 # Not all installations have the C library
58 # (but assume for mypy's sake that they're the same)
59 yamlLoader = yaml.SafeLoader # type: ignore
62def _doUpdate(d, u):
63 if not isinstance(u, collections.abc.Mapping) or not isinstance(d, collections.abc.MutableMapping): 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true
64 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
65 for k, v in u.items():
66 if isinstance(v, collections.abc.Mapping):
67 d[k] = _doUpdate(d.get(k, {}), v)
68 else:
69 d[k] = v
70 return d
73def _checkNextItem(k, d, create, must_be_dict):
74 """See if k is in d and if it is return the new child."""
75 nextVal = None
76 isThere = False
77 if d is None: 77 ↛ 79line 77 didn't jump to line 79, because the condition on line 77 was never true
78 # We have gone past the end of the hierarchy
79 pass
80 elif not must_be_dict and isinstance(d, collections.abc.Sequence): 80 ↛ 85line 80 didn't jump to line 85, because the condition on line 80 was never true
81 # Check for Sequence first because for lists
82 # __contains__ checks whether value is found in list
83 # not whether the index exists in list. When we traverse
84 # the hierarchy we are interested in the index.
85 try:
86 nextVal = d[int(k)]
87 isThere = True
88 except IndexError:
89 pass
90 except ValueError:
91 isThere = k in d
92 elif k in d:
93 nextVal = d[k]
94 isThere = True
95 elif create: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 d[k] = {}
97 nextVal = d[k]
98 isThere = True
100 return nextVal, isThere
103class Loader(yamlLoader):
104 """YAML Loader that supports file include directives.
106 Uses ``!include`` directive in a YAML file to point to another
107 YAML file to be included. The path in the include directive is relative
108 to the file containing that directive.
110 storageClasses: !include storageClasses.yaml
112 Examples
113 --------
114 >>> with open("document.yaml", "r") as f:
115 data = yaml.load(f, Loader=Loader)
117 Notes
118 -----
119 See https://davidchall.github.io/yaml-includes.html
120 """
122 def __init__(self, stream):
123 super().__init__(stream)
124 # if this is a string and not a stream we may well lack a name
125 try:
126 self._root = ButlerURI(stream.name)
127 except AttributeError:
128 # No choice but to assume a local filesystem
129 self._root = ButlerURI("no-file.yaml")
130 Loader.add_constructor("!include", Loader.include)
132 def include(self, node):
133 result: Union[List[Any], Dict[str, Any]]
134 if isinstance(node, yaml.ScalarNode):
135 return self.extractFile(self.construct_scalar(node))
137 elif isinstance(node, yaml.SequenceNode):
138 result = []
139 for filename in self.construct_sequence(node):
140 result.append(self.extractFile(filename))
141 return result
143 elif isinstance(node, yaml.MappingNode):
144 result = {}
145 for k, v in self.construct_mapping(node).items():
146 result[k] = self.extractFile(v)
147 return result
149 else:
150 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
151 raise yaml.constructor.ConstructorError
153 def extractFile(self, filename):
154 # It is possible for the !include to point to an explicit URI
155 # instead of a relative URI, therefore we first see if it is
156 # scheme-less or not. If it has a scheme we use it directly
157 # if it is scheme-less we use it relative to the file root.
158 requesteduri = ButlerURI(filename, forceAbsolute=False)
160 if requesteduri.scheme:
161 fileuri = requesteduri
162 else:
163 fileuri = self._root.updatedFile(filename)
165 log.debug("Opening YAML file via !include: %s", fileuri)
167 # Read all the data from the resource
168 data = fileuri.read()
170 # Store the bytes into a BytesIO so we can attach a .name
171 stream = io.BytesIO(data)
172 stream.name = fileuri.geturl()
173 return yaml.load(stream, Loader)
176class Config(collections.abc.MutableMapping):
177 r"""Implements a datatype that is used by `Butler` for configuration.
179 It is essentially a `dict` with key/value pairs, including nested dicts
180 (as values). In fact, it can be initialized with a `dict`.
181 This is explained next:
183 Config extends the `dict` api so that hierarchical values may be accessed
184 with delimited notation or as a tuple. If a string is given the delimiter
185 is picked up from the first character in that string. For example,
186 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
187 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
188 If the first character is alphanumeric, no delimiter will be used.
189 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
190 Unicode characters can be used as the delimiter for distinctiveness if
191 required.
193 If a key in the hierarchy starts with a non-alphanumeric character care
194 should be used to ensure that either the tuple interface is used or
195 a distinct delimiter is always given in string form.
197 Finally, the delimiter can be escaped if it is part of a key and also
198 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
199 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
200 always better to use a different delimiter in these cases.
202 Note that adding a multi-level key implicitly creates any nesting levels
203 that do not exist, but removing multi-level keys does not automatically
204 remove empty nesting levels. As a result:
206 >>> c = Config()
207 >>> c[".a.b"] = 1
208 >>> del c[".a.b"]
209 >>> c["a"]
210 Config({'a': {}})
212 Storage formats supported:
214 - yaml: read and write is supported.
215 - json: read and write is supported but no ``!include`` directive.
217 Parameters
218 ----------
219 other : `str` or `Config` or `dict` or `ButlerURI` or `pathlib.Path`
220 Other source of configuration, can be:
222 - (`str` or `ButlerURI`) Treated as a URI to a config file. Must end
223 with ".yaml".
224 - (`Config`) Copies the other Config's values into this one.
225 - (`dict`) Copies the values from the dict into this Config.
227 If `None` is provided an empty `Config` will be created.
228 """
230 _D: str = "→"
231 """Default internal delimiter to use for components in the hierarchy when
232 constructing keys for external use (see `Config.names()`)."""
234 includeKey: ClassVar[str] = "includeConfigs"
235 """Key used to indicate that another config should be included at this
236 part of the hierarchy."""
238 resourcesPackage: str = "lsst.daf.butler"
239 """Package to search for default configuration data. The resources
240 themselves will be within a ``configs`` resource hierarchy."""
242 def __init__(self, other=None):
243 self._data: Dict[str, Any] = {}
244 self.configFile = None
246 if other is None:
247 return
249 if isinstance(other, Config):
250 self._data = copy.deepcopy(other._data)
251 self.configFile = other.configFile
252 elif isinstance(other, (dict, collections.abc.Mapping)):
253 # In most cases we have a dict, and it's more efficient
254 # to check for a dict instance before checking the generic mapping.
255 self.update(other)
256 elif isinstance(other, (str, ButlerURI, Path)): 256 ↛ 263line 256 didn't jump to line 263, because the condition on line 256 was never false
257 # if other is a string, assume it is a file path/URI
258 self.__initFromUri(other)
259 self._processExplicitIncludes()
260 else:
261 # if the config specified by other could not be recognized raise
262 # a runtime error.
263 raise RuntimeError(f"A Config could not be loaded from other: {other}")
265 def ppprint(self):
266 """Return config as formatted readable string.
268 Examples
269 --------
270 use: ``pdb> print(myConfigObject.ppprint())``
272 Returns
273 -------
274 s : `str`
275 A prettyprint formatted string representing the config
276 """
277 return pprint.pformat(self._data, indent=2, width=1)
279 def __repr__(self):
280 return f"{type(self).__name__}({self._data!r})"
282 def __str__(self):
283 return self.ppprint()
285 def __len__(self):
286 return len(self._data)
288 def __iter__(self):
289 return iter(self._data)
291 def copy(self):
292 return type(self)(self)
294 @classmethod
295 def fromString(cls, string: str, format: str = "yaml") -> Config:
296 """Create a new Config instance from a serialized string.
298 Parameters
299 ----------
300 string : `str`
301 String containing content in specified format
302 format : `str`, optional
303 Format of the supplied string. Can be ``json`` or ``yaml``.
305 Returns
306 -------
307 c : `Config`
308 Newly-constructed Config.
309 """
310 if format == "yaml":
311 new_config = cls().__initFromYaml(string)
312 elif format == "json":
313 new_config = cls().__initFromJson(string)
314 else:
315 raise ValueError(f"Unexpected format of string: {format}")
316 new_config._processExplicitIncludes()
317 return new_config
319 @classmethod
320 def fromYaml(cls, string: str) -> Config:
321 """Create a new Config instance from a YAML string.
323 Parameters
324 ----------
325 string : `str`
326 String containing content in YAML format
328 Returns
329 -------
330 c : `Config`
331 Newly-constructed Config.
332 """
333 return cls.fromString(string, format="yaml")
335 def __initFromUri(self, path: Union[str, ButlerURI, Path]) -> None:
336 """Load a file from a path or an URI.
338 Parameters
339 ----------
340 path : `str`
341 Path or a URI to a persisted config file.
342 """
343 uri = ButlerURI(path)
344 ext = uri.getExtension()
345 if ext == ".yaml": 345 ↛ 352line 345 didn't jump to line 352, because the condition on line 345 was never false
346 log.debug("Opening YAML config file: %s", uri.geturl())
347 content = uri.read()
348 # Use a stream so we can name it
349 stream = io.BytesIO(content)
350 stream.name = uri.geturl()
351 self.__initFromYaml(stream)
352 elif ext == ".json":
353 log.debug("Opening JSON config file: %s", uri.geturl())
354 content = uri.read()
355 self.__initFromJson(content)
356 else:
357 # This URI does not have a valid extension. It might be because
358 # we ended up with a directory and not a file. Before we complain
359 # about an extension, do an existence check. No need to do
360 # the (possibly expensive) existence check in the default code
361 # path above because we will find out soon enough that the file
362 # is not there.
363 if not uri.exists():
364 raise FileNotFoundError(f"Config location {uri} does not exist.")
365 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
366 self.configFile = uri
368 def __initFromYaml(self, stream):
369 """Load a YAML config from any readable stream that contains one.
371 Parameters
372 ----------
373 stream: `IO` or `str`
374 Stream to pass to the YAML loader. Accepts anything that
375 `yaml.load` accepts. This can include a string as well as an
376 IO stream.
378 Raises
379 ------
380 yaml.YAMLError
381 If there is an error loading the file.
382 """
383 content = yaml.load(stream, Loader=Loader)
384 if content is None: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true
385 content = {}
386 self._data = content
387 return self
389 def __initFromJson(self, stream):
390 """Load a JSON config from any readable stream that contains one.
392 Parameters
393 ----------
394 stream: `IO` or `str`
395 Stream to pass to the JSON loader. This can include a string as
396 well as an IO stream.
398 Raises
399 ------
400 TypeError:
401 Raised if there is an error loading the content.
402 """
403 if isinstance(stream, (bytes, str)):
404 content = json.loads(stream)
405 else:
406 content = json.load(stream)
407 if content is None:
408 content = {}
409 self._data = content
410 return self
412 def _processExplicitIncludes(self):
413 """Scan through the configuration searching for the special includes.
415 Looks for ``includeConfigs`` directive and processes the includes.
416 """
417 # Search paths for config files
418 searchPaths = [ButlerURI(os.path.curdir, forceDirectory=True)]
419 if self.configFile is not None: 419 ↛ 427line 419 didn't jump to line 427, because the condition on line 419 was never false
420 if isinstance(self.configFile, ButlerURI): 420 ↛ 423line 420 didn't jump to line 423, because the condition on line 420 was never false
421 configDir = self.configFile.dirname()
422 else:
423 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
424 searchPaths.append(configDir)
426 # Ensure we know what delimiter to use
427 names = self.nameTuples()
428 for path in names:
429 if path[-1] == self.includeKey: 429 ↛ 431line 429 didn't jump to line 431, because the condition on line 429 was never true
431 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
432 basePath = path[:-1]
434 # Extract the includes and then delete them from the config
435 includes = self[path]
436 del self[path]
438 # Be consistent and convert to a list
439 if not isinstance(includes, list):
440 includes = [includes]
442 # Read each file assuming it is a reference to a file
443 # The file can be relative to config file or cwd
444 # ConfigSubset search paths are not used
445 subConfigs = []
446 for fileName in includes:
447 # Expand any shell variables -- this could be URI
448 fileName = ButlerURI(os.path.expandvars(fileName), forceAbsolute=False)
449 found = None
450 if fileName.isabs():
451 found = fileName
452 else:
453 for dir in searchPaths:
454 if isinstance(dir, ButlerURI):
455 specific = dir.join(fileName.path)
456 # Remote resource check might be expensive
457 if specific.exists():
458 found = specific
459 else:
460 log.warning(
461 "Do not understand search path entry '%s' of type %s",
462 dir,
463 type(dir).__name__,
464 )
465 if not found:
466 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
468 # Read the referenced Config as a Config
469 subConfigs.append(type(self)(found))
471 # Now we need to merge these sub configs with the current
472 # information that was present in this node in the config
473 # tree with precedence given to the explicit values
474 newConfig = subConfigs.pop(0)
475 for sc in subConfigs:
476 newConfig.update(sc)
478 # Explicit values take precedence
479 if not basePath:
480 # This is an include at the root config
481 newConfig.update(self)
482 # Replace the current config
483 self._data = newConfig._data
484 else:
485 newConfig.update(self[basePath])
486 # And reattach to the base config
487 self[basePath] = newConfig
489 @staticmethod
490 def _splitIntoKeys(key):
491 r"""Split the argument for get/set/in into a hierarchical list.
493 Parameters
494 ----------
495 key : `str` or iterable
496 Argument given to get/set/in. If an iterable is provided it will
497 be converted to a list. If the first character of the string
498 is not an alphanumeric character then it will be used as the
499 delimiter for the purposes of splitting the remainder of the
500 string. If the delimiter is also in one of the keys then it
501 can be escaped using ``\``. There is no default delimiter.
503 Returns
504 -------
505 keys : `list`
506 Hierarchical keys as a `list`.
507 """
508 if isinstance(key, str):
509 if not key[0].isalnum(): 509 ↛ 510line 509 didn't jump to line 510, because the condition on line 509 was never true
510 d = key[0]
511 key = key[1:]
512 else:
513 return [
514 key,
515 ]
516 escaped = f"\\{d}"
517 temp = None
518 if escaped in key:
519 # Complain at the attempt to escape the escape
520 doubled = fr"\{escaped}"
521 if doubled in key:
522 raise ValueError(
523 f"Escaping an escaped delimiter ({doubled} in {key}) is not yet supported."
524 )
525 # Replace with a character that won't be in the string
526 temp = "\r"
527 if temp in key or d == temp:
528 raise ValueError(
529 f"Can not use character {temp!r} in hierarchical key or as"
530 " delimiter if escaping the delimiter"
531 )
532 key = key.replace(escaped, temp)
533 hierarchy = key.split(d)
534 if temp:
535 hierarchy = [h.replace(temp, d) for h in hierarchy]
536 return hierarchy
537 elif isinstance(key, collections.abc.Iterable): 537 ↛ 541line 537 didn't jump to line 541, because the condition on line 537 was never false
538 return list(key)
539 else:
540 # Not sure what this is so try it anyway
541 return [
542 key,
543 ]
545 def _getKeyHierarchy(self, name):
546 """Retrieve the key hierarchy for accessing the Config.
548 Parameters
549 ----------
550 name : `str` or `tuple`
551 Delimited string or `tuple` of hierarchical keys.
553 Returns
554 -------
555 hierarchy : `list` of `str`
556 Hierarchy to use as a `list`. If the name is available directly
557 as a key in the Config it will be used regardless of the presence
558 of any nominal delimiter.
559 """
560 if name in self._data:
561 keys = [
562 name,
563 ]
564 else:
565 keys = self._splitIntoKeys(name)
566 return keys
568 def _findInHierarchy(self, keys, create=False):
569 """Look for hierarchy of keys in Config.
571 Parameters
572 ----------
573 keys : `list` or `tuple`
574 Keys to search in hierarchy.
575 create : `bool`, optional
576 If `True`, if a part of the hierarchy does not exist, insert an
577 empty `dict` into the hierarchy.
579 Returns
580 -------
581 hierarchy : `list`
582 List of the value corresponding to each key in the supplied
583 hierarchy. Only keys that exist in the hierarchy will have
584 a value.
585 complete : `bool`
586 `True` if the full hierarchy exists and the final element
587 in ``hierarchy`` is the value of relevant value.
588 """
589 d = self._data
591 # For the first key, d must be a dict so it is a waste
592 # of time to check for a sequence.
593 must_be_dict = True
595 hierarchy = []
596 complete = True
597 for k in keys:
598 d, isThere = _checkNextItem(k, d, create, must_be_dict)
599 if isThere:
600 hierarchy.append(d)
601 else:
602 complete = False
603 break
604 # Second time round it might be a sequence.
605 must_be_dict = False
607 return hierarchy, complete
609 def __getitem__(self, name):
610 # Override the split for the simple case where there is an exact
611 # match. This allows `Config.items()` to work via a simple
612 # __iter__ implementation that returns top level keys of
613 # self._data.
615 # If the name matches a key in the top-level hierarchy, bypass
616 # all further cleverness.
617 found_directly = False
618 try:
619 data = self._data[name]
620 found_directly = True
621 except KeyError:
622 pass
624 if not found_directly: 624 ↛ 625line 624 didn't jump to line 625, because the condition on line 624 was never true
625 keys = self._getKeyHierarchy(name)
627 hierarchy, complete = self._findInHierarchy(keys)
628 if not complete:
629 raise KeyError(f"{name} not found")
630 data = hierarchy[-1]
632 # In most cases we have a dict, and it's more efficient
633 # to check for a dict instance before checking the generic mapping.
634 if isinstance(data, (dict, collections.abc.Mapping)):
635 data = Config(data)
636 # Ensure that child configs inherit the parent internal delimiter
637 if self._D != Config._D: 637 ↛ 638line 637 didn't jump to line 638, because the condition on line 637 was never true
638 data._D = self._D
639 return data
641 def __setitem__(self, name, value):
642 keys = self._getKeyHierarchy(name)
643 last = keys.pop()
644 if isinstance(value, Config):
645 value = copy.deepcopy(value._data)
647 hierarchy, complete = self._findInHierarchy(keys, create=True)
648 if hierarchy:
649 data = hierarchy[-1]
650 else:
651 data = self._data
653 try:
654 data[last] = value
655 except TypeError:
656 data[int(last)] = value
658 def __contains__(self, key):
659 keys = self._getKeyHierarchy(key)
660 hierarchy, complete = self._findInHierarchy(keys)
661 return complete
663 def __delitem__(self, key):
664 keys = self._getKeyHierarchy(key)
665 last = keys.pop()
666 hierarchy, complete = self._findInHierarchy(keys)
667 if complete: 667 ↛ 674line 667 didn't jump to line 674, because the condition on line 667 was never false
668 if hierarchy: 668 ↛ 669line 668 didn't jump to line 669, because the condition on line 668 was never true
669 data = hierarchy[-1]
670 else:
671 data = self._data
672 del data[last]
673 else:
674 raise KeyError(f"{key} not found in Config")
676 def update(self, other):
677 """Update config from other `Config` or `dict`.
679 Like `dict.update()`, but will add or modify keys in nested dicts,
680 instead of overwriting the nested dict entirely.
682 Parameters
683 ----------
684 other : `dict` or `Config`
685 Source of configuration:
687 Examples
688 --------
689 >>> c = Config({"a": {"b": 1}})
690 >>> c.update({"a": {"c": 2}})
691 >>> print(c)
692 {'a': {'b': 1, 'c': 2}}
694 >>> foo = {"a": {"b": 1}}
695 >>> foo.update({"a": {"c": 2}})
696 >>> print(foo)
697 {'a': {'c': 2}}
698 """
699 _doUpdate(self._data, other)
701 def merge(self, other):
702 """Merge another Config into this one.
704 Like `Config.update()`, but will add keys & values from other that
705 DO NOT EXIST in self.
707 Keys and values that already exist in self will NOT be overwritten.
709 Parameters
710 ----------
711 other : `dict` or `Config`
712 Source of configuration:
713 """
714 if not isinstance(other, collections.abc.Mapping):
715 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
717 # Convert the supplied mapping to a Config for consistency
718 # This will do a deepcopy if it is already a Config
719 otherCopy = Config(other)
720 otherCopy.update(self)
721 self._data = otherCopy._data
723 def nameTuples(self, topLevelOnly=False):
724 """Get tuples representing the name hierarchies of all keys.
726 The tuples returned from this method are guaranteed to be usable
727 to access items in the configuration object.
729 Parameters
730 ----------
731 topLevelOnly : `bool`, optional
732 If False, the default, a full hierarchy of names is returned.
733 If True, only the top level are returned.
735 Returns
736 -------
737 names : `list` of `tuple` of `str`
738 List of all names present in the `Config` where each element
739 in the list is a `tuple` of strings representing the hierarchy.
740 """
741 if topLevelOnly: 741 ↛ 742line 741 didn't jump to line 742, because the condition on line 741 was never true
742 return list((k,) for k in self)
744 def getKeysAsTuples(d, keys, base):
745 if isinstance(d, collections.abc.Sequence):
746 theseKeys = range(len(d))
747 else:
748 theseKeys = d.keys()
749 for key in theseKeys:
750 val = d[key]
751 levelKey = base + (key,) if base is not None else (key,)
752 keys.append(levelKey)
753 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) and not isinstance(
754 val, str
755 ):
756 getKeysAsTuples(val, keys, levelKey)
758 keys: List[Tuple[str, ...]] = []
759 getKeysAsTuples(self._data, keys, None)
760 return keys
762 def names(self, topLevelOnly=False, delimiter=None):
763 """Get a delimited name of all the keys in the hierarchy.
765 The values returned from this method are guaranteed to be usable
766 to access items in the configuration object.
768 Parameters
769 ----------
770 topLevelOnly : `bool`, optional
771 If False, the default, a full hierarchy of names is returned.
772 If True, only the top level are returned.
773 delimiter : `str`, optional
774 Delimiter to use when forming the keys. If the delimiter is
775 present in any of the keys, it will be escaped in the returned
776 names. If `None` given a delimiter will be automatically provided.
777 The delimiter can not be alphanumeric.
779 Returns
780 -------
781 names : `list` of `str`
782 List of all names present in the `Config`.
784 Notes
785 -----
786 This is different than the built-in method `dict.keys`, which will
787 return only the first level keys.
789 Raises
790 ------
791 ValueError:
792 The supplied delimiter is alphanumeric.
793 """
794 if topLevelOnly:
795 return list(self.keys())
797 # Get all the tuples of hierarchical keys
798 nameTuples = self.nameTuples()
800 if delimiter is not None and delimiter.isalnum():
801 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
803 if delimiter is None:
804 # Start with something, and ensure it does not need to be
805 # escaped (it is much easier to understand if not escaped)
806 delimiter = self._D
808 # Form big string for easy check of delimiter clash
809 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
811 # Try a delimiter and keep trying until we get something that
812 # works.
813 ntries = 0
814 while delimiter in combined:
815 log.debug("Delimiter '%s' could not be used. Trying another.", delimiter)
816 ntries += 1
818 if ntries > 100:
819 raise ValueError(f"Unable to determine a delimiter for Config {self}")
821 # try another one
822 while True:
823 delimiter = chr(ord(delimiter) + 1)
824 if not delimiter.isalnum():
825 break
827 log.debug("Using delimiter %r", delimiter)
829 # Form the keys, escaping the delimiter if necessary
830 strings = [
831 delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
832 for k in nameTuples
833 ]
834 return strings
836 def asArray(self, name):
837 """Get a value as an array.
839 May contain one or more elements.
841 Parameters
842 ----------
843 name : `str`
844 Key to use to retrieve value.
846 Returns
847 -------
848 array : `collections.abc.Sequence`
849 The value corresponding to name, but guaranteed to be returned
850 as a list with at least one element. If the value is a
851 `~collections.abc.Sequence` (and not a `str`) the value itself
852 will be returned, else the value will be the first element.
853 """
854 val = self.get(name)
855 if isinstance(val, str):
856 val = [val]
857 elif not isinstance(val, collections.abc.Sequence):
858 val = [val]
859 return val
861 def __eq__(self, other):
862 if isinstance(other, Config):
863 other = other._data
864 return self._data == other
866 def __ne__(self, other):
867 if isinstance(other, Config):
868 other = other._data
869 return self._data != other
871 #######
872 # i/o #
874 def dump(self, output: Optional[IO] = None, format: str = "yaml") -> Optional[str]:
875 """Write the config to an output stream.
877 Parameters
878 ----------
879 output : `IO`, optional
880 The stream to use for output. If `None` the serialized content
881 will be returned.
882 format : `str`, optional
883 The format to use for the output. Can be "yaml" or "json".
885 Returns
886 -------
887 serialized : `str` or `None`
888 If a stream was given the stream will be used and the return
889 value will be `None`. If the stream was `None` the
890 serialization will be returned as a string.
891 """
892 if format == "yaml":
893 return yaml.safe_dump(self._data, output, default_flow_style=False)
894 elif format == "json":
895 if output is not None:
896 json.dump(self._data, output, ensure_ascii=False)
897 return None
898 else:
899 return json.dumps(self._data, ensure_ascii=False)
900 raise ValueError(f"Unsupported format for Config serialization: {format}")
902 def dumpToUri(
903 self,
904 uri: Union[ButlerURI, str],
905 updateFile: bool = True,
906 defaultFileName: str = "butler.yaml",
907 overwrite: bool = True,
908 ) -> None:
909 """Write the config to location pointed to by given URI.
911 Currently supports 's3' and 'file' URI schemes.
913 Parameters
914 ----------
915 uri: `str` or `ButlerURI`
916 URI of location where the Config will be written.
917 updateFile : bool, optional
918 If True and uri does not end on a filename with extension, will
919 append `defaultFileName` to the target uri. True by default.
920 defaultFileName : bool, optional
921 The file name that will be appended to target uri if updateFile is
922 True and uri does not end on a file with an extension.
923 overwrite : bool, optional
924 If True the configuration will be written even if it already
925 exists at that location.
926 """
927 # Make local copy of URI or create new one
928 uri = ButlerURI(uri)
930 if updateFile and not uri.getExtension():
931 uri = uri.updatedFile(defaultFileName)
933 # Try to work out the format from the extension
934 ext = uri.getExtension()
935 format = ext[1:].lower()
937 output = self.dump(format=format)
938 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
939 uri.write(output.encode(), overwrite=overwrite)
940 self.configFile = uri
942 @staticmethod
943 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True, toMerge=None):
944 """Update specific config parameters.
946 Allows for named parameters to be set to new values in bulk, and
947 for other values to be set by copying from a reference config.
949 Assumes that the supplied config is compatible with ``configType``
950 and will attach the updated values to the supplied config by
951 looking for the related component key. It is assumed that
952 ``config`` and ``full`` are from the same part of the
953 configuration hierarchy.
955 Parameters
956 ----------
957 configType : `ConfigSubset`
958 Config type to use to extract relevant items from ``config``.
959 config : `Config`
960 A `Config` to update. Only the subset understood by
961 the supplied `ConfigSubset` will be modified. Default values
962 will not be inserted and the content will not be validated
963 since mandatory keys are allowed to be missing until
964 populated later by merging.
965 full : `Config`
966 A complete config with all defaults expanded that can be
967 converted to a ``configType``. Read-only and will not be
968 modified by this method. Values are read from here if
969 ``toCopy`` is defined.
971 Repository-specific options that should not be obtained
972 from defaults when Butler instances are constructed
973 should be copied from ``full`` to ``config``.
974 toUpdate : `dict`, optional
975 A `dict` defining the keys to update and the new value to use.
976 The keys and values can be any supported by `Config`
977 assignment.
978 toCopy : `tuple`, optional
979 `tuple` of keys whose values should be copied from ``full``
980 into ``config``.
981 overwrite : `bool`, optional
982 If `False`, do not modify a value in ``config`` if the key
983 already exists. Default is always to overwrite.
984 toMerge : `tuple`, optional
985 Keys to merge content from full to config without overwriting
986 pre-existing values. Only works if the key refers to a hierarchy.
987 The ``overwrite`` flag is ignored.
989 Raises
990 ------
991 ValueError
992 Neither ``toUpdate``, ``toCopy`` nor ``toMerge`` were defined.
993 """
994 if toUpdate is None and toCopy is None and toMerge is None:
995 raise ValueError("At least one of toUpdate, toCopy, or toMerge parameters must be set.")
997 # If this is a parent configuration then we need to ensure that
998 # the supplied config has the relevant component key in it.
999 # If this is a parent configuration we add in the stub entry
1000 # so that the ConfigSubset constructor will do the right thing.
1001 # We check full for this since that is guaranteed to be complete.
1002 if configType.component in full and configType.component not in config:
1003 config[configType.component] = {}
1005 # Extract the part of the config we wish to update
1006 localConfig = configType(config, mergeDefaults=False, validate=False)
1008 if toUpdate:
1009 for key, value in toUpdate.items():
1010 if key in localConfig and not overwrite:
1011 log.debug(
1012 "Not overriding key '%s' with value '%s' in config %s",
1013 key,
1014 value,
1015 localConfig.__class__.__name__,
1016 )
1017 else:
1018 localConfig[key] = value
1020 if toCopy or toMerge:
1021 localFullConfig = configType(full, mergeDefaults=False)
1023 if toCopy:
1024 for key in toCopy:
1025 if key in localConfig and not overwrite:
1026 log.debug(
1027 "Not overriding key '%s' from defaults in config %s",
1028 key,
1029 localConfig.__class__.__name__,
1030 )
1031 else:
1032 localConfig[key] = localFullConfig[key]
1033 if toMerge:
1034 for key in toMerge:
1035 if key in localConfig:
1036 # Get the node from the config to do the merge
1037 # but then have to reattach to the config.
1038 subset = localConfig[key]
1039 subset.merge(localFullConfig[key])
1040 localConfig[key] = subset
1041 else:
1042 localConfig[key] = localFullConfig[key]
1044 # Reattach to parent if this is a child config
1045 if configType.component in config:
1046 config[configType.component] = localConfig
1047 else:
1048 config.update(localConfig)
1050 def toDict(self):
1051 """Convert a `Config` to a standalone hierarchical `dict`.
1053 Returns
1054 -------
1055 d : `dict`
1056 The standalone hierarchical `dict` with any `Config` classes
1057 in the hierarchy converted to `dict`.
1059 Notes
1060 -----
1061 This can be useful when passing a Config to some code that
1062 expects native Python types.
1063 """
1064 output = copy.deepcopy(self._data)
1065 for k, v in output.items():
1066 if isinstance(v, Config): 1066 ↛ 1067line 1066 didn't jump to line 1067, because the condition on line 1066 was never true
1067 v = v.toDict()
1068 output[k] = v
1069 return output
1072class ConfigSubset(Config):
1073 """Config representing a subset of a more general configuration.
1075 Subclasses define their own component and when given a configuration
1076 that includes that component, the resulting configuration only includes
1077 the subset. For example, your config might contain ``dimensions`` if it's
1078 part of a global config and that subset will be stored. If ``dimensions``
1079 can not be found it is assumed that the entire contents of the
1080 configuration should be used.
1082 Default values are read from the environment or supplied search paths
1083 using the default configuration file name specified in the subclass.
1084 This allows a configuration class to be instantiated without any
1085 additional arguments.
1087 Additional validation can be specified to check for keys that are mandatory
1088 in the configuration.
1090 Parameters
1091 ----------
1092 other : `Config` or `str` or `dict`
1093 Argument specifying the configuration information as understood
1094 by `Config`
1095 validate : `bool`, optional
1096 If `True` required keys will be checked to ensure configuration
1097 consistency.
1098 mergeDefaults : `bool`, optional
1099 If `True` defaults will be read and the supplied config will
1100 be combined with the defaults, with the supplied values taking
1101 precedence.
1102 searchPaths : `list` or `tuple`, optional
1103 Explicit additional paths to search for defaults. They should
1104 be supplied in priority order. These paths have higher priority
1105 than those read from the environment in
1106 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1107 the local file system or URIs, `ButlerURI`.
1108 """
1110 component: ClassVar[Optional[str]] = None
1111 """Component to use from supplied config. Can be None. If specified the
1112 key is not required. Can be a full dot-separated path to a component.
1113 """
1115 requiredKeys: ClassVar[Sequence[str]] = ()
1116 """Keys that are required to be specified in the configuration.
1117 """
1119 defaultConfigFile: ClassVar[Optional[str]] = None
1120 """Name of the file containing defaults for this config class.
1121 """
1123 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1125 # Create a blank object to receive the defaults
1126 # Once we have the defaults we then update with the external values
1127 super().__init__()
1129 # Create a standard Config rather than subset
1130 externalConfig = Config(other)
1132 # Select the part we need from it
1133 # To simplify the use of !include we also check for the existence of
1134 # component.component (since the included files can themselves
1135 # include the component name)
1136 if self.component is not None: 1136 ↛ 1145line 1136 didn't jump to line 1145, because the condition on line 1136 was never false
1137 doubled = (self.component, self.component)
1138 # Must check for double depth first
1139 if doubled in externalConfig: 1139 ↛ 1140line 1139 didn't jump to line 1140, because the condition on line 1139 was never true
1140 externalConfig = externalConfig[doubled]
1141 elif self.component in externalConfig:
1142 externalConfig._data = externalConfig._data[self.component]
1144 # Default files read to create this configuration
1145 self.filesRead = []
1147 # Assume we are not looking up child configurations
1148 containerKey = None
1150 # Sometimes we do not want to merge with defaults.
1151 if mergeDefaults:
1153 # Supplied search paths have highest priority
1154 fullSearchPath = []
1155 if searchPaths: 1155 ↛ 1156line 1155 didn't jump to line 1156, because the condition on line 1155 was never true
1156 fullSearchPath.extend(searchPaths)
1158 # Read default paths from environment
1159 fullSearchPath.extend(self.defaultSearchPaths())
1161 # There are two places to find defaults for this particular config
1162 # - The "defaultConfigFile" defined in the subclass
1163 # - The class specified in the "cls" element in the config.
1164 # Read cls after merging in case it changes.
1165 if self.defaultConfigFile is not None: 1165 ↛ 1170line 1165 didn't jump to line 1170, because the condition on line 1165 was never false
1166 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1168 # Can have a class specification in the external config (priority)
1169 # or from the defaults.
1170 pytype = None
1171 if "cls" in externalConfig: 1171 ↛ 1172line 1171 didn't jump to line 1172, because the condition on line 1171 was never true
1172 pytype = externalConfig["cls"]
1173 elif "cls" in self: 1173 ↛ 1174line 1173 didn't jump to line 1174, because the condition on line 1173 was never true
1174 pytype = self["cls"]
1176 if pytype is not None: 1176 ↛ 1177line 1176 didn't jump to line 1177, because the condition on line 1176 was never true
1177 try:
1178 cls = doImport(pytype)
1179 except ImportError as e:
1180 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1181 defaultsFile = cls.defaultConfigFile
1182 if defaultsFile is not None:
1183 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1185 # Get the container key in case we need it
1186 try:
1187 containerKey = cls.containerKey
1188 except AttributeError:
1189 pass
1191 # Now update this object with the external values so that the external
1192 # values always override the defaults
1193 self.update(externalConfig)
1195 # If this configuration has child configurations of the same
1196 # config class, we need to expand those defaults as well.
1198 if mergeDefaults and containerKey is not None and containerKey in self: 1198 ↛ 1199line 1198 didn't jump to line 1199, because the condition on line 1198 was never true
1199 for idx, subConfig in enumerate(self[containerKey]):
1200 self[containerKey, idx] = type(self)(
1201 other=subConfig, validate=validate, mergeDefaults=mergeDefaults, searchPaths=searchPaths
1202 )
1204 if validate:
1205 self.validate()
1207 @classmethod
1208 def defaultSearchPaths(cls):
1209 """Read environment to determine search paths to use.
1211 Global defaults, at lowest priority, are found in the ``config``
1212 directory of the butler source tree. Additional defaults can be
1213 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1214 which is a PATH-like variable where paths at the front of the list
1215 have priority over those later.
1217 Returns
1218 -------
1219 paths : `list`
1220 Returns a list of paths to search. The returned order is in
1221 priority with the highest priority paths first. The butler config
1222 configuration resources will not be included here but will
1223 always be searched last.
1225 Notes
1226 -----
1227 The environment variable is split on the standard ``:`` path separator.
1228 This currently makes it incompatible with usage of URIs.
1229 """
1230 # We can pick up defaults from multiple search paths
1231 # We fill defaults by using the butler config path and then
1232 # the config path environment variable in reverse order.
1233 defaultsPaths: List[Union[str, ButlerURI]] = []
1235 if CONFIG_PATH in os.environ: 1235 ↛ 1236line 1235 didn't jump to line 1236, because the condition on line 1235 was never true
1236 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1237 defaultsPaths.extend(externalPaths)
1239 # Add the package defaults as a resource
1240 defaultsPaths.append(ButlerURI(f"resource://{cls.resourcesPackage}/configs", forceDirectory=True))
1241 return defaultsPaths
1243 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1244 """Search the supplied paths, merging the configuration values.
1246 The values read will override values currently stored in the object.
1247 Every file found in the path will be read, such that the earlier
1248 path entries have higher priority.
1250 Parameters
1251 ----------
1252 searchPaths : `list` of `ButlerURI`, `str`
1253 Paths to search for the supplied configFile. This path
1254 is the priority order, such that files read from the
1255 first path entry will be selected over those read from
1256 a later path. Can contain `str` referring to the local file
1257 system or a URI string.
1258 configFile : `ButlerURI`
1259 File to locate in path. If absolute path it will be read
1260 directly and the search path will not be used. Can be a URI
1261 to an explicit resource (which will ignore the search path)
1262 which is assumed to exist.
1263 """
1264 uri = ButlerURI(configFile)
1265 if uri.isabs() and uri.exists(): 1265 ↛ 1267line 1265 didn't jump to line 1267, because the condition on line 1265 was never true
1266 # Assume this resource exists
1267 self._updateWithOtherConfigFile(configFile)
1268 self.filesRead.append(configFile)
1269 else:
1270 # Reverse order so that high priority entries
1271 # update the object last.
1272 for pathDir in reversed(searchPaths):
1273 if isinstance(pathDir, (str, ButlerURI)): 1273 ↛ 1280line 1273 didn't jump to line 1280, because the condition on line 1273 was never false
1274 pathDir = ButlerURI(pathDir, forceDirectory=True)
1275 file = pathDir.join(configFile)
1276 if file.exists(): 1276 ↛ 1272line 1276 didn't jump to line 1272, because the condition on line 1276 was never false
1277 self.filesRead.append(file)
1278 self._updateWithOtherConfigFile(file)
1279 else:
1280 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1282 def _updateWithOtherConfigFile(self, file):
1283 """Read in some defaults and update.
1285 Update the configuration by reading the supplied file as a config
1286 of this class, and merging such that these values override the
1287 current values. Contents of the external config are not validated.
1289 Parameters
1290 ----------
1291 file : `Config`, `str`, `ButlerURI`, or `dict`
1292 Entity that can be converted to a `ConfigSubset`.
1293 """
1294 # Use this class to read the defaults so that subsetting can happen
1295 # correctly.
1296 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1297 self.update(externalConfig)
1299 def validate(self):
1300 """Check that mandatory keys are present in this configuration.
1302 Ignored if ``requiredKeys`` is empty.
1303 """
1304 # Validation
1305 missing = [k for k in self.requiredKeys if k not in self._data]
1306 if missing: 1306 ↛ 1307line 1306 didn't jump to line 1307, because the condition on line 1306 was never true
1307 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")