Coverage for python/lsst/daf/butler/core/config.py : 43%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28import collections
29import copy
30import json
31import logging
32import pprint
33import os
34import yaml
35import sys
36from pathlib import Path
37from yaml.representer import Representer
38import io
39from typing import Any, Dict, List, Sequence, Optional, ClassVar, IO, Tuple, Union
41from lsst.utils import doImport
42from ._butlerUri import ButlerURI
44yaml.add_representer(collections.defaultdict, Representer.represent_dict)
47# Config module logger
48log = logging.getLogger(__name__)
50# PATH-like environment variable to use for defaults.
51CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
53try:
54 yamlLoader = yaml.CSafeLoader
55except AttributeError:
56 # Not all installations have the C library
57 # (but assume for mypy's sake that they're the same)
58 yamlLoader = yaml.SafeLoader # type: ignore
61class Loader(yamlLoader):
62 """YAML Loader that supports file include directives.
64 Uses ``!include`` directive in a YAML file to point to another
65 YAML file to be included. The path in the include directive is relative
66 to the file containing that directive.
68 storageClasses: !include storageClasses.yaml
70 Examples
71 --------
72 >>> with open("document.yaml", "r") as f:
73 data = yaml.load(f, Loader=Loader)
75 Notes
76 -----
77 See https://davidchall.github.io/yaml-includes.html
78 """
80 def __init__(self, stream):
81 super().__init__(stream)
82 # if this is a string and not a stream we may well lack a name
83 try:
84 self._root = ButlerURI(stream.name)
85 except AttributeError:
86 # No choice but to assume a local filesystem
87 self._root = ButlerURI("no-file.yaml")
88 Loader.add_constructor("!include", Loader.include)
90 def include(self, node):
91 result: Union[List[Any], Dict[str, Any]]
92 if isinstance(node, yaml.ScalarNode):
93 return self.extractFile(self.construct_scalar(node))
95 elif isinstance(node, yaml.SequenceNode):
96 result = []
97 for filename in self.construct_sequence(node):
98 result.append(self.extractFile(filename))
99 return result
101 elif isinstance(node, yaml.MappingNode):
102 result = {}
103 for k, v in self.construct_mapping(node).items():
104 result[k] = self.extractFile(v)
105 return result
107 else:
108 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
109 raise yaml.constructor.ConstructorError
111 def extractFile(self, filename):
112 # It is possible for the !include to point to an explicit URI
113 # instead of a relative URI, therefore we first see if it is
114 # scheme-less or not. If it has a scheme we use it directly
115 # if it is scheme-less we use it relative to the file root.
116 requesteduri = ButlerURI(filename, forceAbsolute=False)
118 if requesteduri.scheme:
119 fileuri = requesteduri
120 else:
121 fileuri = self._root.updatedFile(filename)
123 log.debug("Opening YAML file via !include: %s", fileuri)
125 # Read all the data from the resource
126 data = fileuri.read()
128 # Store the bytes into a BytesIO so we can attach a .name
129 stream = io.BytesIO(data)
130 stream.name = fileuri.geturl()
131 return yaml.load(stream, Loader)
134class Config(collections.abc.MutableMapping):
135 r"""Implements a datatype that is used by `Butler` for configuration.
137 It is essentially a `dict` with key/value pairs, including nested dicts
138 (as values). In fact, it can be initialized with a `dict`.
139 This is explained next:
141 Config extends the `dict` api so that hierarchical values may be accessed
142 with delimited notation or as a tuple. If a string is given the delimiter
143 is picked up from the first character in that string. For example,
144 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
145 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
146 If the first character is alphanumeric, no delimiter will be used.
147 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
148 Unicode characters can be used as the delimiter for distinctiveness if
149 required.
151 If a key in the hierarchy starts with a non-alphanumeric character care
152 should be used to ensure that either the tuple interface is used or
153 a distinct delimiter is always given in string form.
155 Finally, the delimiter can be escaped if it is part of a key and also
156 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
157 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
158 always better to use a different delimiter in these cases.
160 Note that adding a multi-level key implicitly creates any nesting levels
161 that do not exist, but removing multi-level keys does not automatically
162 remove empty nesting levels. As a result:
164 >>> c = Config()
165 >>> c[".a.b"] = 1
166 >>> del c[".a.b"]
167 >>> c["a"]
168 Config({'a': {}})
170 Storage formats supported:
172 - yaml: read and write is supported.
173 - json: read and write is supported but no ``!include`` directive.
175 Parameters
176 ----------
177 other : `str` or `Config` or `dict` or `ButlerURI` or `pathlib.Path`
178 Other source of configuration, can be:
180 - (`str` or `ButlerURI`) Treated as a URI to a config file. Must end
181 with ".yaml".
182 - (`Config`) Copies the other Config's values into this one.
183 - (`dict`) Copies the values from the dict into this Config.
185 If `None` is provided an empty `Config` will be created.
186 """
188 _D: str = "→"
189 """Default internal delimiter to use for components in the hierarchy when
190 constructing keys for external use (see `Config.names()`)."""
192 includeKey: ClassVar[str] = "includeConfigs"
193 """Key used to indicate that another config should be included at this
194 part of the hierarchy."""
196 resourcesPackage: str = "lsst.daf.butler"
197 """Package to search for default configuration data. The resources
198 themselves will be within a ``configs`` resource hierarchy."""
200 def __init__(self, other=None):
201 self._data: Dict[str, Any] = {}
202 self.configFile = None
204 if other is None:
205 return
207 if isinstance(other, Config):
208 self._data = copy.deepcopy(other._data)
209 self.configFile = other.configFile
210 elif isinstance(other, collections.abc.Mapping):
211 self.update(other)
212 elif isinstance(other, (str, ButlerURI, Path)): 212 ↛ 219line 212 didn't jump to line 219, because the condition on line 212 was never false
213 # if other is a string, assume it is a file path/URI
214 self.__initFromUri(other)
215 self._processExplicitIncludes()
216 else:
217 # if the config specified by other could not be recognized raise
218 # a runtime error.
219 raise RuntimeError(f"A Config could not be loaded from other: {other}")
221 def ppprint(self):
222 """Return config as formatted readable string.
224 Examples
225 --------
226 use: ``pdb> print(myConfigObject.ppprint())``
228 Returns
229 -------
230 s : `str`
231 A prettyprint formatted string representing the config
232 """
233 return pprint.pformat(self._data, indent=2, width=1)
235 def __repr__(self):
236 return f"{type(self).__name__}({self._data!r})"
238 def __str__(self):
239 return self.ppprint()
241 def __len__(self):
242 return len(self._data)
244 def __iter__(self):
245 return iter(self._data)
247 def copy(self):
248 return type(self)(self)
250 @classmethod
251 def fromString(cls, string: str, format: str = "yaml") -> Config:
252 """Create a new Config instance from a serialized string.
254 Parameters
255 ----------
256 string : `str`
257 String containing content in specified format
258 format : `str`, optional
259 Format of the supplied string. Can be ``json`` or ``yaml``.
261 Returns
262 -------
263 c : `Config`
264 Newly-constructed Config.
265 """
266 if format == "yaml":
267 new_config = cls().__initFromYaml(string)
268 elif format == "json":
269 new_config = cls().__initFromJson(string)
270 else:
271 raise ValueError(f"Unexpected format of string: {format}")
272 new_config._processExplicitIncludes()
273 return new_config
275 @classmethod
276 def fromYaml(cls, string: str) -> Config:
277 """Create a new Config instance from a YAML string.
279 Parameters
280 ----------
281 string : `str`
282 String containing content in YAML format
284 Returns
285 -------
286 c : `Config`
287 Newly-constructed Config.
288 """
289 return cls.fromString(string, format="yaml")
291 def __initFromUri(self, path: Union[str, ButlerURI, Path]) -> None:
292 """Load a file from a path or an URI.
294 Parameters
295 ----------
296 path : `str`
297 Path or a URI to a persisted config file.
298 """
299 uri = ButlerURI(path)
300 ext = uri.getExtension()
301 if ext == ".yaml": 301 ↛ 308line 301 didn't jump to line 308, because the condition on line 301 was never false
302 log.debug("Opening YAML config file: %s", uri.geturl())
303 content = uri.read()
304 # Use a stream so we can name it
305 stream = io.BytesIO(content)
306 stream.name = uri.geturl()
307 self.__initFromYaml(stream)
308 elif ext == ".json":
309 log.debug("Opening JSON config file: %s", uri.geturl())
310 content = uri.read()
311 self.__initFromJson(content)
312 else:
313 # This URI does not have a valid extension. It might be because
314 # we ended up with a directory and not a file. Before we complain
315 # about an extension, do an existence check. No need to do
316 # the (possibly expensive) existence check in the default code
317 # path above because we will find out soon enough that the file
318 # is not there.
319 if not uri.exists():
320 raise FileNotFoundError(f"Config location {uri} does not exist.")
321 raise RuntimeError(f"The Config URI does not have a supported extension: {uri}")
322 self.configFile = uri
324 def __initFromYaml(self, stream):
325 """Load a YAML config from any readable stream that contains one.
327 Parameters
328 ----------
329 stream: `IO` or `str`
330 Stream to pass to the YAML loader. Accepts anything that
331 `yaml.load` accepts. This can include a string as well as an
332 IO stream.
334 Raises
335 ------
336 yaml.YAMLError
337 If there is an error loading the file.
338 """
339 content = yaml.load(stream, Loader=Loader)
340 if content is None: 340 ↛ 341line 340 didn't jump to line 341, because the condition on line 340 was never true
341 content = {}
342 self._data = content
343 return self
345 def __initFromJson(self, stream):
346 """Load a JSON config from any readable stream that contains one.
348 Parameters
349 ----------
350 stream: `IO` or `str`
351 Stream to pass to the JSON loader. This can include a string as
352 well as an IO stream.
354 Raises
355 ------
356 TypeError:
357 Raised if there is an error loading the content.
358 """
359 if isinstance(stream, (bytes, str)):
360 content = json.loads(stream)
361 else:
362 content = json.load(stream)
363 if content is None:
364 content = {}
365 self._data = content
366 return self
368 def _processExplicitIncludes(self):
369 """Scan through the configuration searching for the special includes.
371 Looks for ``includeConfigs`` directive and processes the includes.
372 """
373 # Search paths for config files
374 searchPaths = [ButlerURI(os.path.curdir, forceDirectory=True)]
375 if self.configFile is not None: 375 ↛ 383line 375 didn't jump to line 383, because the condition on line 375 was never false
376 if isinstance(self.configFile, ButlerURI): 376 ↛ 379line 376 didn't jump to line 379, because the condition on line 376 was never false
377 configDir = self.configFile.dirname()
378 else:
379 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
380 searchPaths.append(configDir)
382 # Ensure we know what delimiter to use
383 names = self.nameTuples()
384 for path in names:
385 if path[-1] == self.includeKey: 385 ↛ 387line 385 didn't jump to line 387, because the condition on line 385 was never true
387 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
388 basePath = path[:-1]
390 # Extract the includes and then delete them from the config
391 includes = self[path]
392 del self[path]
394 # Be consistent and convert to a list
395 if not isinstance(includes, list):
396 includes = [includes]
398 # Read each file assuming it is a reference to a file
399 # The file can be relative to config file or cwd
400 # ConfigSubset search paths are not used
401 subConfigs = []
402 for fileName in includes:
403 # Expand any shell variables -- this could be URI
404 fileName = ButlerURI(os.path.expandvars(fileName), forceAbsolute=False)
405 found = None
406 if fileName.isabs():
407 found = fileName
408 else:
409 for dir in searchPaths:
410 if isinstance(dir, ButlerURI):
411 specific = dir.join(fileName.path)
412 # Remote resource check might be expensive
413 if specific.exists():
414 found = specific
415 else:
416 log.warning("Do not understand search path entry '%s' of type %s",
417 dir, type(dir).__name__)
418 if not found:
419 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
421 # Read the referenced Config as a Config
422 subConfigs.append(type(self)(found))
424 # Now we need to merge these sub configs with the current
425 # information that was present in this node in the config
426 # tree with precedence given to the explicit values
427 newConfig = subConfigs.pop(0)
428 for sc in subConfigs:
429 newConfig.update(sc)
431 # Explicit values take precedence
432 if not basePath:
433 # This is an include at the root config
434 newConfig.update(self)
435 # Replace the current config
436 self._data = newConfig._data
437 else:
438 newConfig.update(self[basePath])
439 # And reattach to the base config
440 self[basePath] = newConfig
442 @staticmethod
443 def _splitIntoKeys(key):
444 r"""Split the argument for get/set/in into a hierarchical list.
446 Parameters
447 ----------
448 key : `str` or iterable
449 Argument given to get/set/in. If an iterable is provided it will
450 be converted to a list. If the first character of the string
451 is not an alphanumeric character then it will be used as the
452 delimiter for the purposes of splitting the remainder of the
453 string. If the delimiter is also in one of the keys then it
454 can be escaped using ``\``. There is no default delimiter.
456 Returns
457 -------
458 keys : `list`
459 Hierarchical keys as a `list`.
460 """
461 if isinstance(key, str):
462 if not key[0].isalnum(): 462 ↛ 463line 462 didn't jump to line 463, because the condition on line 462 was never true
463 d = key[0]
464 key = key[1:]
465 else:
466 return [key, ]
467 escaped = f"\\{d}"
468 temp = None
469 if escaped in key:
470 # Complain at the attempt to escape the escape
471 doubled = fr"\{escaped}"
472 if doubled in key:
473 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})"
474 " is not yet supported.")
475 # Replace with a character that won't be in the string
476 temp = "\r"
477 if temp in key or d == temp:
478 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as"
479 " delimiter if escaping the delimiter")
480 key = key.replace(escaped, temp)
481 hierarchy = key.split(d)
482 if temp:
483 hierarchy = [h.replace(temp, d) for h in hierarchy]
484 return hierarchy
485 elif isinstance(key, collections.abc.Iterable): 485 ↛ 489line 485 didn't jump to line 489, because the condition on line 485 was never false
486 return list(key)
487 else:
488 # Not sure what this is so try it anyway
489 return [key, ]
491 def _getKeyHierarchy(self, name):
492 """Retrieve the key hierarchy for accessing the Config.
494 Parameters
495 ----------
496 name : `str` or `tuple`
497 Delimited string or `tuple` of hierarchical keys.
499 Returns
500 -------
501 hierarchy : `list` of `str`
502 Hierarchy to use as a `list`. If the name is available directly
503 as a key in the Config it will be used regardless of the presence
504 of any nominal delimiter.
505 """
506 if name in self._data:
507 keys = [name, ]
508 else:
509 keys = self._splitIntoKeys(name)
510 return keys
512 def _findInHierarchy(self, keys, create=False):
513 """Look for hierarchy of keys in Config.
515 Parameters
516 ----------
517 keys : `list` or `tuple`
518 Keys to search in hierarchy.
519 create : `bool`, optional
520 If `True`, if a part of the hierarchy does not exist, insert an
521 empty `dict` into the hierarchy.
523 Returns
524 -------
525 hierarchy : `list`
526 List of the value corresponding to each key in the supplied
527 hierarchy. Only keys that exist in the hierarchy will have
528 a value.
529 complete : `bool`
530 `True` if the full hierarchy exists and the final element
531 in ``hierarchy`` is the value of relevant value.
532 """
533 d = self._data
535 def checkNextItem(k, d, create):
536 """See if k is in d and if it is return the new child."""
537 nextVal = None
538 isThere = False
539 if d is None: 539 ↛ 541line 539 didn't jump to line 541, because the condition on line 539 was never true
540 # We have gone past the end of the hierarchy
541 pass
542 elif isinstance(d, collections.abc.Sequence): 542 ↛ 547line 542 didn't jump to line 547, because the condition on line 542 was never true
543 # Check sequence first because for lists
544 # __contains__ checks whether value is found in list
545 # not whether the index exists in list. When we traverse
546 # the hierarchy we are interested in the index.
547 try:
548 nextVal = d[int(k)]
549 isThere = True
550 except IndexError:
551 pass
552 except ValueError:
553 isThere = k in d
554 elif k in d:
555 nextVal = d[k]
556 isThere = True
557 elif create: 557 ↛ 558line 557 didn't jump to line 558, because the condition on line 557 was never true
558 d[k] = {}
559 nextVal = d[k]
560 isThere = True
561 return nextVal, isThere
563 hierarchy = []
564 complete = True
565 for k in keys:
566 d, isThere = checkNextItem(k, d, create)
567 if isThere:
568 hierarchy.append(d)
569 else:
570 complete = False
571 break
573 return hierarchy, complete
575 def __getitem__(self, name):
576 # Override the split for the simple case where there is an exact
577 # match. This allows `Config.items()` to work via a simple
578 # __iter__ implementation that returns top level keys of
579 # self._data.
580 keys = self._getKeyHierarchy(name)
582 hierarchy, complete = self._findInHierarchy(keys)
583 if not complete: 583 ↛ 584line 583 didn't jump to line 584, because the condition on line 583 was never true
584 raise KeyError(f"{name} not found")
585 data = hierarchy[-1]
587 if isinstance(data, collections.abc.Mapping):
588 data = Config(data)
589 # Ensure that child configs inherit the parent internal delimiter
590 if self._D != Config._D: 590 ↛ 591line 590 didn't jump to line 591, because the condition on line 590 was never true
591 data._D = self._D
592 return data
594 def __setitem__(self, name, value):
595 keys = self._getKeyHierarchy(name)
596 last = keys.pop()
597 if isinstance(value, Config):
598 value = copy.deepcopy(value._data)
600 hierarchy, complete = self._findInHierarchy(keys, create=True)
601 if hierarchy:
602 data = hierarchy[-1]
603 else:
604 data = self._data
606 try:
607 data[last] = value
608 except TypeError:
609 data[int(last)] = value
611 def __contains__(self, key):
612 keys = self._getKeyHierarchy(key)
613 hierarchy, complete = self._findInHierarchy(keys)
614 return complete
616 def __delitem__(self, key):
617 keys = self._getKeyHierarchy(key)
618 last = keys.pop()
619 hierarchy, complete = self._findInHierarchy(keys)
620 if complete: 620 ↛ 627line 620 didn't jump to line 627, because the condition on line 620 was never false
621 if hierarchy: 621 ↛ 622line 621 didn't jump to line 622, because the condition on line 621 was never true
622 data = hierarchy[-1]
623 else:
624 data = self._data
625 del data[last]
626 else:
627 raise KeyError(f"{key} not found in Config")
629 def update(self, other):
630 """Update config from other `Config` or `dict`.
632 Like `dict.update()`, but will add or modify keys in nested dicts,
633 instead of overwriting the nested dict entirely.
635 Parameters
636 ----------
637 other : `dict` or `Config`
638 Source of configuration:
640 Examples
641 --------
642 >>> c = Config({"a": {"b": 1}})
643 >>> c.update({"a": {"c": 2}})
644 >>> print(c)
645 {'a': {'b': 1, 'c': 2}}
647 >>> foo = {"a": {"b": 1}}
648 >>> foo.update({"a": {"c": 2}})
649 >>> print(foo)
650 {'a': {'c': 2}}
651 """
652 def doUpdate(d, u):
653 if not isinstance(u, collections.abc.Mapping) or \ 653 ↛ 655line 653 didn't jump to line 655, because the condition on line 653 was never true
654 not isinstance(d, collections.abc.MutableMapping):
655 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
656 for k, v in u.items():
657 if isinstance(v, collections.abc.Mapping):
658 d[k] = doUpdate(d.get(k, {}), v)
659 else:
660 d[k] = v
661 return d
662 doUpdate(self._data, other)
664 def merge(self, other):
665 """Merge another Config into this one.
667 Like `Config.update()`, but will add keys & values from other that
668 DO NOT EXIST in self.
670 Keys and values that already exist in self will NOT be overwritten.
672 Parameters
673 ----------
674 other : `dict` or `Config`
675 Source of configuration:
676 """
677 if not isinstance(other, collections.abc.Mapping):
678 raise TypeError(f"Can only merge a Mapping into a Config, not {type(other)}")
680 # Convert the supplied mapping to a Config for consistency
681 # This will do a deepcopy if it is already a Config
682 otherCopy = Config(other)
683 otherCopy.update(self)
684 self._data = otherCopy._data
686 def nameTuples(self, topLevelOnly=False):
687 """Get tuples representing the name hierarchies of all keys.
689 The tuples returned from this method are guaranteed to be usable
690 to access items in the configuration object.
692 Parameters
693 ----------
694 topLevelOnly : `bool`, optional
695 If False, the default, a full hierarchy of names is returned.
696 If True, only the top level are returned.
698 Returns
699 -------
700 names : `list` of `tuple` of `str`
701 List of all names present in the `Config` where each element
702 in the list is a `tuple` of strings representing the hierarchy.
703 """
704 if topLevelOnly: 704 ↛ 705line 704 didn't jump to line 705, because the condition on line 704 was never true
705 return list((k,) for k in self)
707 def getKeysAsTuples(d, keys, base):
708 if isinstance(d, collections.abc.Sequence):
709 theseKeys = range(len(d))
710 else:
711 theseKeys = d.keys()
712 for key in theseKeys:
713 val = d[key]
714 levelKey = base + (key,) if base is not None else (key,)
715 keys.append(levelKey)
716 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \
717 and not isinstance(val, str):
718 getKeysAsTuples(val, keys, levelKey)
719 keys: List[Tuple[str, ...]] = []
720 getKeysAsTuples(self._data, keys, None)
721 return keys
723 def names(self, topLevelOnly=False, delimiter=None):
724 """Get a delimited name of all the keys in the hierarchy.
726 The values returned from this method are guaranteed to be usable
727 to access items in the configuration object.
729 Parameters
730 ----------
731 topLevelOnly : `bool`, optional
732 If False, the default, a full hierarchy of names is returned.
733 If True, only the top level are returned.
734 delimiter : `str`, optional
735 Delimiter to use when forming the keys. If the delimiter is
736 present in any of the keys, it will be escaped in the returned
737 names. If `None` given a delimiter will be automatically provided.
738 The delimiter can not be alphanumeric.
740 Returns
741 -------
742 names : `list` of `str`
743 List of all names present in the `Config`.
745 Notes
746 -----
747 This is different than the built-in method `dict.keys`, which will
748 return only the first level keys.
750 Raises
751 ------
752 ValueError:
753 The supplied delimiter is alphanumeric.
754 """
755 if topLevelOnly:
756 return list(self.keys())
758 # Get all the tuples of hierarchical keys
759 nameTuples = self.nameTuples()
761 if delimiter is not None and delimiter.isalnum():
762 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
764 if delimiter is None:
765 # Start with something, and ensure it does not need to be
766 # escaped (it is much easier to understand if not escaped)
767 delimiter = self._D
769 # Form big string for easy check of delimiter clash
770 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
772 # Try a delimiter and keep trying until we get something that
773 # works.
774 ntries = 0
775 while delimiter in combined:
776 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.")
777 ntries += 1
779 if ntries > 100:
780 raise ValueError(f"Unable to determine a delimiter for Config {self}")
782 # try another one
783 while True:
784 delimiter = chr(ord(delimiter)+1)
785 if not delimiter.isalnum():
786 break
788 log.debug(f"Using delimiter {delimiter!r}")
790 # Form the keys, escaping the delimiter if necessary
791 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
792 for k in nameTuples]
793 return strings
795 def asArray(self, name):
796 """Get a value as an array.
798 May contain one or more elements.
800 Parameters
801 ----------
802 name : `str`
803 Key to use to retrieve value.
805 Returns
806 -------
807 array : `collections.abc.Sequence`
808 The value corresponding to name, but guaranteed to be returned
809 as a list with at least one element. If the value is a
810 `~collections.abc.Sequence` (and not a `str`) the value itself
811 will be returned, else the value will be the first element.
812 """
813 val = self.get(name)
814 if isinstance(val, str):
815 val = [val]
816 elif not isinstance(val, collections.abc.Sequence):
817 val = [val]
818 return val
820 def __eq__(self, other):
821 if isinstance(other, Config):
822 other = other._data
823 return self._data == other
825 def __ne__(self, other):
826 if isinstance(other, Config):
827 other = other._data
828 return self._data != other
830 #######
831 # i/o #
833 def dump(self, output: Optional[IO] = None, format: str = "yaml") -> Optional[str]:
834 """Write the config to an output stream.
836 Parameters
837 ----------
838 output : `IO`, optional
839 The stream to use for output. If `None` the serialized content
840 will be returned.
841 format : `str`, optional
842 The format to use for the output. Can be "yaml" or "json".
844 Returns
845 -------
846 serialized : `str` or `None`
847 If a stream was given the stream will be used and the return
848 value will be `None`. If the stream was `None` the
849 serialization will be returned as a string.
850 """
851 if format == "yaml":
852 return yaml.safe_dump(self._data, output, default_flow_style=False)
853 elif format == "json":
854 if output is not None:
855 json.dump(self._data, output, ensure_ascii=False)
856 return None
857 else:
858 return json.dumps(self._data, ensure_ascii=False)
859 raise ValueError(f"Unsupported format for Config serialization: {format}")
861 def dumpToUri(self, uri: Union[ButlerURI, str], updateFile: bool = True,
862 defaultFileName: str = "butler.yaml",
863 overwrite: bool = True) -> None:
864 """Write the config to location pointed to by given URI.
866 Currently supports 's3' and 'file' URI schemes.
868 Parameters
869 ----------
870 uri: `str` or `ButlerURI`
871 URI of location where the Config will be written.
872 updateFile : bool, optional
873 If True and uri does not end on a filename with extension, will
874 append `defaultFileName` to the target uri. True by default.
875 defaultFileName : bool, optional
876 The file name that will be appended to target uri if updateFile is
877 True and uri does not end on a file with an extension.
878 overwrite : bool, optional
879 If True the configuration will be written even if it already
880 exists at that location.
881 """
882 # Make local copy of URI or create new one
883 uri = ButlerURI(uri)
885 if updateFile and not uri.getExtension():
886 uri = uri.updatedFile(defaultFileName)
888 # Try to work out the format from the extension
889 ext = uri.getExtension()
890 format = ext[1:].lower()
892 output = self.dump(format=format)
893 assert output is not None, "Config.dump guarantees not-None return when output arg is None"
894 uri.write(output.encode(), overwrite=overwrite)
895 self.configFile = uri
897 @staticmethod
898 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True):
899 """Update specific config parameters.
901 Allows for named parameters to be set to new values in bulk, and
902 for other values to be set by copying from a reference config.
904 Assumes that the supplied config is compatible with ``configType``
905 and will attach the updated values to the supplied config by
906 looking for the related component key. It is assumed that
907 ``config`` and ``full`` are from the same part of the
908 configuration hierarchy.
910 Parameters
911 ----------
912 configType : `ConfigSubset`
913 Config type to use to extract relevant items from ``config``.
914 config : `Config`
915 A `Config` to update. Only the subset understood by
916 the supplied `ConfigSubset` will be modified. Default values
917 will not be inserted and the content will not be validated
918 since mandatory keys are allowed to be missing until
919 populated later by merging.
920 full : `Config`
921 A complete config with all defaults expanded that can be
922 converted to a ``configType``. Read-only and will not be
923 modified by this method. Values are read from here if
924 ``toCopy`` is defined.
926 Repository-specific options that should not be obtained
927 from defaults when Butler instances are constructed
928 should be copied from ``full`` to ``config``.
929 toUpdate : `dict`, optional
930 A `dict` defining the keys to update and the new value to use.
931 The keys and values can be any supported by `Config`
932 assignment.
933 toCopy : `tuple`, optional
934 `tuple` of keys whose values should be copied from ``full``
935 into ``config``.
936 overwrite : `bool`, optional
937 If `False`, do not modify a value in ``config`` if the key
938 already exists. Default is always to overwrite.
940 Raises
941 ------
942 ValueError
943 Neither ``toUpdate`` not ``toCopy`` were defined.
944 """
945 if toUpdate is None and toCopy is None:
946 raise ValueError("One of toUpdate or toCopy parameters must be set.")
948 # If this is a parent configuration then we need to ensure that
949 # the supplied config has the relevant component key in it.
950 # If this is a parent configuration we add in the stub entry
951 # so that the ConfigSubset constructor will do the right thing.
952 # We check full for this since that is guaranteed to be complete.
953 if configType.component in full and configType.component not in config:
954 config[configType.component] = {}
956 # Extract the part of the config we wish to update
957 localConfig = configType(config, mergeDefaults=False, validate=False)
959 if toUpdate:
960 for key, value in toUpdate.items():
961 if key in localConfig and not overwrite:
962 log.debug("Not overriding key '%s' with value '%s' in config %s",
963 key, value, localConfig.__class__.__name__)
964 else:
965 localConfig[key] = value
967 if toCopy:
968 localFullConfig = configType(full, mergeDefaults=False)
969 for key in toCopy:
970 if key in localConfig and not overwrite:
971 log.debug("Not overriding key '%s' from defaults in config %s",
972 key, localConfig.__class__.__name__)
973 else:
974 localConfig[key] = localFullConfig[key]
976 # Reattach to parent if this is a child config
977 if configType.component in config:
978 config[configType.component] = localConfig
979 else:
980 config.update(localConfig)
982 def toDict(self):
983 """Convert a `Config` to a standalone hierarchical `dict`.
985 Returns
986 -------
987 d : `dict`
988 The standalone hierarchical `dict` with any `Config` classes
989 in the hierarchy converted to `dict`.
991 Notes
992 -----
993 This can be useful when passing a Config to some code that
994 expects native Python types.
995 """
996 output = copy.deepcopy(self._data)
997 for k, v in output.items():
998 if isinstance(v, Config):
999 v = v.toDict()
1000 output[k] = v
1001 return output
1004class ConfigSubset(Config):
1005 """Config representing a subset of a more general configuration.
1007 Subclasses define their own component and when given a configuration
1008 that includes that component, the resulting configuration only includes
1009 the subset. For example, your config might contain ``dimensions`` if it's
1010 part of a global config and that subset will be stored. If ``dimensions``
1011 can not be found it is assumed that the entire contents of the
1012 configuration should be used.
1014 Default values are read from the environment or supplied search paths
1015 using the default configuration file name specified in the subclass.
1016 This allows a configuration class to be instantiated without any
1017 additional arguments.
1019 Additional validation can be specified to check for keys that are mandatory
1020 in the configuration.
1022 Parameters
1023 ----------
1024 other : `Config` or `str` or `dict`
1025 Argument specifying the configuration information as understood
1026 by `Config`
1027 validate : `bool`, optional
1028 If `True` required keys will be checked to ensure configuration
1029 consistency.
1030 mergeDefaults : `bool`, optional
1031 If `True` defaults will be read and the supplied config will
1032 be combined with the defaults, with the supplied valiues taking
1033 precedence.
1034 searchPaths : `list` or `tuple`, optional
1035 Explicit additional paths to search for defaults. They should
1036 be supplied in priority order. These paths have higher priority
1037 than those read from the environment in
1038 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1039 the local file system or URIs, `ButlerURI`.
1040 """
1042 component: ClassVar[Optional[str]] = None
1043 """Component to use from supplied config. Can be None. If specified the
1044 key is not required. Can be a full dot-separated path to a component.
1045 """
1047 requiredKeys: ClassVar[Sequence[str]] = ()
1048 """Keys that are required to be specified in the configuration.
1049 """
1051 defaultConfigFile: ClassVar[Optional[str]] = None
1052 """Name of the file containing defaults for this config class.
1053 """
1055 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1057 # Create a blank object to receive the defaults
1058 # Once we have the defaults we then update with the external values
1059 super().__init__()
1061 # Create a standard Config rather than subset
1062 externalConfig = Config(other)
1064 # Select the part we need from it
1065 # To simplify the use of !include we also check for the existence of
1066 # component.component (since the included files can themselves
1067 # include the component name)
1068 if self.component is not None: 1068 ↛ 1077line 1068 didn't jump to line 1077, because the condition on line 1068 was never false
1069 doubled = (self.component, self.component)
1070 # Must check for double depth first
1071 if doubled in externalConfig: 1071 ↛ 1072line 1071 didn't jump to line 1072, because the condition on line 1071 was never true
1072 externalConfig = externalConfig[doubled]
1073 elif self.component in externalConfig:
1074 externalConfig._data = externalConfig._data[self.component]
1076 # Default files read to create this configuration
1077 self.filesRead = []
1079 # Assume we are not looking up child configurations
1080 containerKey = None
1082 # Sometimes we do not want to merge with defaults.
1083 if mergeDefaults:
1085 # Supplied search paths have highest priority
1086 fullSearchPath = []
1087 if searchPaths: 1087 ↛ 1088line 1087 didn't jump to line 1088, because the condition on line 1087 was never true
1088 fullSearchPath.extend(searchPaths)
1090 # Read default paths from enviroment
1091 fullSearchPath.extend(self.defaultSearchPaths())
1093 # There are two places to find defaults for this particular config
1094 # - The "defaultConfigFile" defined in the subclass
1095 # - The class specified in the "cls" element in the config.
1096 # Read cls after merging in case it changes.
1097 if self.defaultConfigFile is not None: 1097 ↛ 1102line 1097 didn't jump to line 1102, because the condition on line 1097 was never false
1098 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1100 # Can have a class specification in the external config (priority)
1101 # or from the defaults.
1102 pytype = None
1103 if "cls" in externalConfig: 1103 ↛ 1104line 1103 didn't jump to line 1104, because the condition on line 1103 was never true
1104 pytype = externalConfig["cls"]
1105 elif "cls" in self: 1105 ↛ 1106line 1105 didn't jump to line 1106, because the condition on line 1105 was never true
1106 pytype = self["cls"]
1108 if pytype is not None: 1108 ↛ 1109line 1108 didn't jump to line 1109, because the condition on line 1108 was never true
1109 try:
1110 cls = doImport(pytype)
1111 except ImportError as e:
1112 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1113 defaultsFile = cls.defaultConfigFile
1114 if defaultsFile is not None:
1115 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1117 # Get the container key in case we need it
1118 try:
1119 containerKey = cls.containerKey
1120 except AttributeError:
1121 pass
1123 # Now update this object with the external values so that the external
1124 # values always override the defaults
1125 self.update(externalConfig)
1127 # If this configuration has child configurations of the same
1128 # config class, we need to expand those defaults as well.
1130 if mergeDefaults and containerKey is not None and containerKey in self: 1130 ↛ 1131line 1130 didn't jump to line 1131, because the condition on line 1130 was never true
1131 for idx, subConfig in enumerate(self[containerKey]):
1132 self[containerKey, idx] = type(self)(other=subConfig, validate=validate,
1133 mergeDefaults=mergeDefaults,
1134 searchPaths=searchPaths)
1136 if validate:
1137 self.validate()
1139 @classmethod
1140 def defaultSearchPaths(cls):
1141 """Read environment to determine search paths to use.
1143 Global defaults, at lowest priority, are found in the ``config``
1144 directory of the butler source tree. Additional defaults can be
1145 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1146 which is a PATH-like variable where paths at the front of the list
1147 have priority over those later.
1149 Returns
1150 -------
1151 paths : `list`
1152 Returns a list of paths to search. The returned order is in
1153 priority with the highest priority paths first. The butler config
1154 configuration resources will not be included here but will
1155 always be searched last.
1157 Notes
1158 -----
1159 The environment variable is split on the standard ``:`` path separator.
1160 This currently makes it incompatible with usage of URIs.
1161 """
1162 # We can pick up defaults from multiple search paths
1163 # We fill defaults by using the butler config path and then
1164 # the config path environment variable in reverse order.
1165 defaultsPaths: List[Union[str, ButlerURI]] = []
1167 if CONFIG_PATH in os.environ: 1167 ↛ 1168line 1167 didn't jump to line 1168, because the condition on line 1167 was never true
1168 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1169 defaultsPaths.extend(externalPaths)
1171 # Add the package defaults as a resource
1172 defaultsPaths.append(ButlerURI(f"resource://{cls.resourcesPackage}/configs",
1173 forceDirectory=True))
1174 return defaultsPaths
1176 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1177 """Search the supplied paths, merging the configuration values.
1179 The values read will override values currently stored in the object.
1180 Every file found in the path will be read, such that the earlier
1181 path entries have higher priority.
1183 Parameters
1184 ----------
1185 searchPaths : `list` of `ButlerURI`, `str`
1186 Paths to search for the supplied configFile. This path
1187 is the priority order, such that files read from the
1188 first path entry will be selected over those read from
1189 a later path. Can contain `str` referring to the local file
1190 system or a URI string.
1191 configFile : `ButlerURI`
1192 File to locate in path. If absolute path it will be read
1193 directly and the search path will not be used. Can be a URI
1194 to an explicit resource (which will ignore the search path)
1195 which is assumed to exist.
1196 """
1197 uri = ButlerURI(configFile)
1198 if uri.isabs() and uri.exists(): 1198 ↛ 1200line 1198 didn't jump to line 1200, because the condition on line 1198 was never true
1199 # Assume this resource exists
1200 self._updateWithOtherConfigFile(configFile)
1201 self.filesRead.append(configFile)
1202 else:
1203 # Reverse order so that high priority entries
1204 # update the object last.
1205 for pathDir in reversed(searchPaths):
1206 if isinstance(pathDir, (str, ButlerURI)): 1206 ↛ 1213line 1206 didn't jump to line 1213, because the condition on line 1206 was never false
1207 pathDir = ButlerURI(pathDir, forceDirectory=True)
1208 file = pathDir.join(configFile)
1209 if file.exists(): 1209 ↛ 1205line 1209 didn't jump to line 1205, because the condition on line 1209 was never false
1210 self.filesRead.append(file)
1211 self._updateWithOtherConfigFile(file)
1212 else:
1213 raise ValueError(f"Unexpected search path type encountered: {pathDir!r}")
1215 def _updateWithOtherConfigFile(self, file):
1216 """Read in some defaults and update.
1218 Update the configuration by reading the supplied file as a config
1219 of this class, and merging such that these values override the
1220 current values. Contents of the external config are not validated.
1222 Parameters
1223 ----------
1224 file : `Config`, `str`, `ButlerURI`, or `dict`
1225 Entity that can be converted to a `ConfigSubset`.
1226 """
1227 # Use this class to read the defaults so that subsetting can happen
1228 # correctly.
1229 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1230 self.update(externalConfig)
1232 def validate(self):
1233 """Check that mandatory keys are present in this configuration.
1235 Ignored if ``requiredKeys`` is empty.
1236 """
1237 # Validation
1238 missing = [k for k in self.requiredKeys if k not in self._data]
1239 if missing: 1239 ↛ 1240line 1239 didn't jump to line 1240, because the condition on line 1239 was never true
1240 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")