Coverage for python/lsst/daf/butler/core/config.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28from dataclasses import dataclass
29import collections
30import copy
31import logging
32import pprint
33import os
34import pkg_resources
35import posixpath
36import yaml
37import sys
38from yaml.representer import Representer
39import io
40from typing import Sequence, Optional, ClassVar
42try:
43 import boto3
44except ImportError:
45 boto3 = None
47from lsst.utils import doImport
48from .location import ButlerURI
49from .s3utils import getS3Client
51yaml.add_representer(collections.defaultdict, Representer.represent_dict)
54# Config module logger
55log = logging.getLogger(__name__)
57# PATH-like environment variable to use for defaults.
58CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
60try:
61 yamlLoader = yaml.CSafeLoader
62except AttributeError:
63 # Not all installations have the C library
64 yamlLoader = yaml.SafeLoader
67class Loader(yamlLoader):
68 """YAML Loader that supports file include directives
70 Uses ``!include`` directive in a YAML file to point to another
71 YAML file to be included. The path in the include directive is relative
72 to the file containing that directive.
74 storageClasses: !include storageClasses.yaml
76 Examples
77 --------
78 >>> with open("document.yaml", "r") as f:
79 data = yaml.load(f, Loader=Loader)
81 Notes
82 -----
83 See https://davidchall.github.io/yaml-includes.html
84 """
86 def __init__(self, stream):
87 super().__init__(stream)
88 # if this is a string and not a stream we may well lack a name
89 try:
90 self._root = ButlerURI(stream.name)
91 except AttributeError:
92 # No choice but to assume a local filesystem
93 self._root = ButlerURI("no-file.yaml")
94 Loader.add_constructor("!include", Loader.include)
96 def include(self, node):
97 if isinstance(node, yaml.ScalarNode):
98 return self.extractFile(self.construct_scalar(node))
100 elif isinstance(node, yaml.SequenceNode):
101 result = []
102 for filename in self.construct_sequence(node):
103 result.append(self.extractFile(filename))
104 return result
106 elif isinstance(node, yaml.MappingNode):
107 result = {}
108 for k, v in self.construct_mapping(node).items():
109 result[k] = self.extractFile(v)
110 return result
112 else:
113 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
114 raise yaml.constructor.ConstructorError
116 def extractFile(self, filename):
117 # It is possible for the !include to point to an explicit URI
118 # instead of a relative URI, therefore we first see if it is
119 # scheme-less or not. If it has a scheme we use it directly
120 # if it is scheme-less we use it relative to the file root.
121 requesteduri = ButlerURI(filename, forceAbsolute=False)
123 if requesteduri.scheme:
124 fileuri = requesteduri
125 else:
126 fileuri = copy.copy(self._root)
127 fileuri.updateFile(filename)
129 log.debug("Opening YAML file via !include: %s", fileuri)
131 if not fileuri.scheme or fileuri.scheme == "file":
132 with open(fileuri.ospath, "r") as f:
133 return yaml.load(f, Loader)
134 elif fileuri.scheme == "s3":
135 if boto3 is None:
136 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
137 s3 = getS3Client()
138 try:
139 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot)
140 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
141 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err
143 # boto3 response is a `StreamingBody`, but not a valid Python
144 # IOStream. Loader will raise an error that the stream has no name.
145 # The name is used to resolve the "!include" filename location to
146 # download. A hackish solution is to name it explicitly.
147 response["Body"].name = fileuri.geturl()
148 return yaml.load(response["Body"], Loader)
151@dataclass
152class Resource:
153 """A package and a resource within that package."""
155 package: str
156 """The package from which this resource is requested."""
158 name: str
159 """Full name of the resource."""
161 def dirname(self) -> ResourceDir:
162 """Returns the enclosing resource parent directory for this resource.
164 Returns
165 -------
166 dir : `ResourceDir`
167 The "directory" corresponding to this resource.
168 """
169 # Resources always use POSIX-style path separators
170 # so do not use os.path
171 dir = posixpath.split(self.name)[0]
172 return ResourceDir(self.package, dir)
174 def exists(self) -> bool:
175 """Check that the resource exists.
177 Returns
178 -------
179 exists : `bool`
180 `True` if the resource exists.
181 """
182 return pkg_resources.resource_exists(self.package, self.name)
185@dataclass
186class ResourceDir:
187 """A "directory" within a package resource."""
189 package: str
190 """The package from which this resource is requested."""
192 dir: str
193 """A directory path to a resource in this package. Not a full path."""
195 def toResource(self, file):
196 """Convert a resource directory to a concrete resource.
198 Parameters
199 ----------
200 file : `str`
201 A file within this resource directory to return a concrete
202 `Resource`.
204 Returns
205 -------
206 resource : `Resource`
207 A full definition of a resource.
208 """
209 # Resources always use posix paths so do not use os.path
210 return Resource(self.package, posixpath.join(self.dir, file))
213class Config(collections.abc.MutableMapping):
214 r"""Implements a datatype that is used by `Butler` for configuration
215 parameters.
217 It is essentially a `dict` with key/value pairs, including nested dicts
218 (as values). In fact, it can be initialized with a `dict`.
219 This is explained next:
221 Config extends the `dict` api so that hierarchical values may be accessed
222 with delimited notation or as a tuple. If a string is given the delimiter
223 is picked up from the first character in that string. For example,
224 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
225 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
226 If the first character is alphanumeric, no delimiter will be used.
227 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
228 Unicode characters can be used as the delimiter for distinctiveness if
229 required.
231 If a key in the hierarchy starts with a non-alphanumeric character care
232 should be used to ensure that either the tuple interface is used or
233 a distinct delimiter is always given in string form.
235 Finally, the delimiter can be escaped if it is part of a key and also
236 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
237 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
238 always better to use a different delimiter in these cases.
240 Note that adding a multi-level key implicitly creates any nesting levels
241 that do not exist, but removing multi-level keys does not automatically
242 remove empty nesting levels. As a result:
244 >>> c = Config()
245 >>> c[".a.b"] = 1
246 >>> del c[".a.b"]
247 >>> c["a"]
248 Config({'a': {}})
250 Storage formats supported:
252 - yaml: read and write is supported.
255 Parameters
256 ----------
257 other : `str` or `Config` or `dict`
258 Other source of configuration, can be:
260 - (`str`) Treated as a path to a config file on disk. Must end with
261 ".yaml".
262 - (`Config`) Copies the other Config's values into this one.
263 - (`dict`) Copies the values from the dict into this Config.
265 If `None` is provided an empty `Config` will be created.
266 """
268 _D: ClassVar[str] = "→"
269 """Default internal delimiter to use for components in the hierarchy when
270 constructing keys for external use (see `Config.names()`)."""
272 includeKey: ClassVar[str] = "includeConfigs"
273 """Key used to indicate that another config should be included at this
274 part of the hierarchy."""
276 resourcesPackage: str = "lsst.daf.butler"
277 """Package to search for default configuration data. The resources
278 themselves will be within a ``configs`` resource hierarchy."""
280 def __init__(self, other=None):
281 self._data = {}
282 self.configFile = None
284 if other is None:
285 return
287 if isinstance(other, Config):
288 self._data = copy.deepcopy(other._data)
289 self.configFile = other.configFile
290 elif isinstance(other, collections.abc.Mapping):
291 self.update(other)
292 elif isinstance(other, str):
293 # if other is a string, assume it is a file path.
294 self.__initFromFile(other)
295 self._processExplicitIncludes()
296 elif isinstance(other, Resource):
297 # Assume this is a package resources request
298 self.__initFromResource(other)
299 else:
300 # if the config specified by other could not be recognized raise
301 # a runtime error.
302 raise RuntimeError(f"A Config could not be loaded from other: {other}")
304 def ppprint(self):
305 """helper function for debugging, prints a config out in a readable
306 way in the debugger.
308 use: pdb> print(myConfigObject.ppprint())
310 Returns
311 -------
312 s : `str`
313 A prettyprint formatted string representing the config
314 """
315 return pprint.pformat(self._data, indent=2, width=1)
317 def __repr__(self):
318 return f"{type(self).__name__}({self._data!r})"
320 def __str__(self):
321 return self.ppprint()
323 def __len__(self):
324 return len(self._data)
326 def __iter__(self):
327 return iter(self._data)
329 def copy(self):
330 return type(self)(self)
332 @classmethod
333 def fromYaml(cls, string: str) -> Config:
334 """Create a new Config instance from a YAML string.
336 Parameters
337 ----------
338 string : `str`
339 String containing content in YAML format
341 Returns
342 -------
343 c : `Config`
344 Newly-constructed Config.
345 """
346 return cls().__initFromYaml(string)
348 def __initFromFile(self, path: str) -> None:
349 """Load a file from a path or an URI.
351 Parameters
352 ----------
353 path : `str`
354 Path or an URI to a persisted config file.
355 """
356 uri = ButlerURI(path)
357 if uri.path.endswith("yaml"):
358 if uri.scheme == "s3":
359 self.__initFromS3YamlFile(uri.geturl())
360 else:
361 self.__initFromYamlFile(uri.ospath)
362 else:
363 raise RuntimeError(f"Unhandled config file type: {uri}")
364 self.configFile = uri
366 def __initFromResource(self, resource: Resource) -> None:
367 """Load a config from a package resource.
369 Parameters
370 ----------
371 resource : `Resource`
372 The resource package and path.
373 """
374 if not resource.exists():
375 raise RuntimeError(f"Package resource {resource} does not exist")
376 if resource.name.endswith(".yaml"):
377 self.__initFromYamlResource(resource)
378 else:
379 raise RuntimeError(f"Unhandled config resource type: {resource}")
380 self.configFile = resource
382 def __initFromYamlResource(self, resource: Resource) -> None:
383 """Load a config from a YAML package resource.
385 Parameters
386 ----------
387 resource : `Resource`
388 The resource package and path.
389 """
390 log.debug("Opening YAML config resource: %s.%s", resource.package, resource.name)
391 with pkg_resources.resource_stream(resource.package, resource.name) as fh:
392 self.__initFromYaml(fh)
394 def __initFromS3YamlFile(self, url):
395 """Load a file at a given S3 Bucket uri and attempts to load it from
396 yaml.
398 Parameters
399 ----------
400 path : `str`
401 To a persisted config file.
402 """
403 if boto3 is None:
404 raise ModuleNotFoundError("boto3 not found."
405 "Are you sure it is installed?")
407 uri = ButlerURI(url)
408 s3 = getS3Client()
409 try:
410 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
411 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
412 raise FileNotFoundError(f"No such file or directory: {uri}") from err
414 # boto3 response is a `StreamingBody`, but not a valid Python IOStream.
415 # Loader will raise an error that the stream has no name. A hackish
416 # solution is to name it explicitly.
417 response["Body"].name = url
418 self.__initFromYaml(response["Body"])
419 response["Body"].close()
421 def __initFromYamlFile(self, path):
422 """Opens a file at a given path and attempts to load it in from yaml.
424 Parameters
425 ----------
426 path : `str`
427 To a persisted config file in YAML format.
428 """
429 log.debug("Opening YAML config file: %s", path)
430 with open(path, "r") as f:
431 self.__initFromYaml(f)
433 def __initFromYaml(self, stream):
434 """Loads a YAML config from any readable stream that contains one.
436 Parameters
437 ----------
438 stream: `IO` or `str`
439 Stream to pass to the YAML loader. Accepts anything that
440 `yaml.load` accepts. This can include a string as well as an
441 IO stream.
443 Raises
444 ------
445 yaml.YAMLError
446 If there is an error loading the file.
447 """
448 content = yaml.load(stream, Loader=Loader)
449 if content is None:
450 content = {}
451 self._data = content
452 return self
454 def _processExplicitIncludes(self):
455 """Scan through the configuration searching for the special
456 includeConfigs directive and process the includes."""
458 # Search paths for config files
459 searchPaths = [os.path.curdir]
460 if self.configFile is not None:
461 if isinstance(self.configFile, str):
462 configDir = os.path.abspath(os.path.dirname(self.configFile))
463 elif isinstance(self.configFile, (ButlerURI, Resource)):
464 configDir = self.configFile.dirname()
465 else:
466 raise RuntimeError(f"Unexpected type for config file: {self.configFile}")
467 searchPaths.append(configDir)
469 # Ensure we know what delimiter to use
470 names = self.nameTuples()
471 for path in names:
472 if path[-1] == self.includeKey:
474 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
475 basePath = path[:-1]
477 # Extract the includes and then delete them from the config
478 includes = self[path]
479 del self[path]
481 # Be consistent and convert to a list
482 if not isinstance(includes, list):
483 includes = [includes]
485 # Read each file assuming it is a reference to a file
486 # The file can be relative to config file or cwd
487 # ConfigSubset search paths are not used
488 # At some point these might be URIs which we will have to
489 # assume resolve explicitly
490 subConfigs = []
491 for fileName in includes:
492 # Expand any shell variables
493 fileName = os.path.expandvars(fileName)
494 found = None
495 if os.path.isabs(fileName):
496 found = fileName
497 else:
498 for dir in searchPaths:
499 # Convert a string directly to a ButlerURI
500 # to unify the response below
501 if isinstance(dir, str):
502 dir = ButlerURI(dir, forceDirectory=True)
504 if isinstance(dir, ResourceDir):
505 resource = dir.toResource(fileName)
506 if resource.exists():
507 found = resource
508 break
509 elif isinstance(dir, ButlerURI):
510 if not dir.scheme:
511 filePath = os.path.join(dir.path, fileName)
512 if os.path.exists(filePath):
513 found = os.path.normpath(os.path.abspath(filePath))
514 break
515 elif dir.scheme == "file":
516 # import private helper function
517 from .location import posix2os
518 # File URIs always use posix path separator
519 filePath = posix2os(posixpath.join(dir.path, fileName))
520 if os.path.exists(filePath):
521 found = os.path.normpath(os.path.abspath(filePath))
522 break
523 else:
524 # For remote resource either we assume
525 # the resource always exists even though
526 # it likely does not and we pass it
527 # directly to the Config constructor here.
528 # Else we uses s3utils.s3CheckFileExists
529 # Either way a network call is needed.
530 # For now no-one is using this
531 # functionality and there are no S3 tests
532 # for it so defer implementation.
533 raise RuntimeError("Can not currently follow includeConfigs to "
534 f"{dir}")
535 else:
536 log.warning("Do not understand search path entry '%s' of type %s",
537 dir, type(dir).__name__)
538 if not found:
539 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
541 # Read the referenced Config as a Config
542 subConfigs.append(type(self)(found))
544 # Now we need to merge these sub configs with the current
545 # information that was present in this node in the config
546 # tree with precedence given to the explicit values
547 newConfig = subConfigs.pop(0)
548 for sc in subConfigs:
549 newConfig.update(sc)
551 # Explicit values take precedence
552 if not basePath:
553 # This is an include at the root config
554 newConfig.update(self)
555 # Replace the current config
556 self._data = newConfig._data
557 else:
558 newConfig.update(self[basePath])
559 # And reattach to the base config
560 self[basePath] = newConfig
562 @staticmethod
563 def _splitIntoKeys(key):
564 r"""Split the argument for get/set/in into a hierarchical list.
566 Parameters
567 ----------
568 key : `str` or iterable
569 Argument given to get/set/in. If an iterable is provided it will
570 be converted to a list. If the first character of the string
571 is not an alphanumeric character then it will be used as the
572 delimiter for the purposes of splitting the remainder of the
573 string. If the delimiter is also in one of the keys then it
574 can be escaped using ``\``. There is no default delimiter.
576 Returns
577 -------
578 keys : `list`
579 Hierarchical keys as a `list`.
580 """
581 if isinstance(key, str):
582 if not key[0].isalnum():
583 d = key[0]
584 key = key[1:]
585 else:
586 return [key, ]
587 escaped = f"\\{d}"
588 temp = None
589 if escaped in key:
590 # Complain at the attempt to escape the escape
591 doubled = fr"\{escaped}"
592 if doubled in key:
593 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})"
594 " is not yet supported.")
595 # Replace with a character that won't be in the string
596 temp = "\r"
597 if temp in key or d == temp:
598 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as"
599 " delimiter if escaping the delimiter")
600 key = key.replace(escaped, temp)
601 hierarchy = key.split(d)
602 if temp:
603 hierarchy = [h.replace(temp, d) for h in hierarchy]
604 return hierarchy
605 elif isinstance(key, collections.abc.Iterable):
606 return list(key)
607 else:
608 # Not sure what this is so try it anyway
609 return [key, ]
611 def _getKeyHierarchy(self, name):
612 """Retrieve the key hierarchy for accessing the Config
614 Parameters
615 ----------
616 name : `str` or `tuple`
617 Delimited string or `tuple` of hierarchical keys.
619 Returns
620 -------
621 hierarchy : `list` of `str`
622 Hierarchy to use as a `list`. If the name is available directly
623 as a key in the Config it will be used regardless of the presence
624 of any nominal delimiter.
625 """
626 if name in self._data:
627 keys = [name, ]
628 else:
629 keys = self._splitIntoKeys(name)
630 return keys
632 def _findInHierarchy(self, keys, create=False):
633 """Look for hierarchy of keys in Config
635 Parameters
636 ----------
637 keys : `list` or `tuple`
638 Keys to search in hierarchy.
639 create : `bool`, optional
640 If `True`, if a part of the hierarchy does not exist, insert an
641 empty `dict` into the hierarchy.
643 Returns
644 -------
645 hierarchy : `list`
646 List of the value corresponding to each key in the supplied
647 hierarchy. Only keys that exist in the hierarchy will have
648 a value.
649 complete : `bool`
650 `True` if the full hierarchy exists and the final element
651 in ``hierarchy`` is the value of relevant value.
652 """
653 d = self._data
655 def checkNextItem(k, d, create):
656 """See if k is in d and if it is return the new child"""
657 nextVal = None
658 isThere = False
659 if d is None:
660 # We have gone past the end of the hierarchy
661 pass
662 elif isinstance(d, collections.abc.Sequence):
663 # Check sequence first because for lists
664 # __contains__ checks whether value is found in list
665 # not whether the index exists in list. When we traverse
666 # the hierarchy we are interested in the index.
667 try:
668 nextVal = d[int(k)]
669 isThere = True
670 except IndexError:
671 pass
672 except ValueError:
673 isThere = k in d
674 elif k in d:
675 nextVal = d[k]
676 isThere = True
677 elif create:
678 d[k] = {}
679 nextVal = d[k]
680 isThere = True
681 return nextVal, isThere
683 hierarchy = []
684 complete = True
685 for k in keys:
686 d, isThere = checkNextItem(k, d, create)
687 if isThere:
688 hierarchy.append(d)
689 else:
690 complete = False
691 break
693 return hierarchy, complete
695 def __getitem__(self, name):
696 # Override the split for the simple case where there is an exact
697 # match. This allows `Config.items()` to work via a simple
698 # __iter__ implementation that returns top level keys of
699 # self._data.
700 keys = self._getKeyHierarchy(name)
702 hierarchy, complete = self._findInHierarchy(keys)
703 if not complete:
704 raise KeyError(f"{name} not found")
705 data = hierarchy[-1]
707 if isinstance(data, collections.abc.Mapping):
708 data = Config(data)
709 # Ensure that child configs inherit the parent internal delimiter
710 if self._D != Config._D:
711 data._D = self._D
712 return data
714 def __setitem__(self, name, value):
715 keys = self._getKeyHierarchy(name)
716 last = keys.pop()
717 if isinstance(value, Config):
718 value = copy.deepcopy(value._data)
720 hierarchy, complete = self._findInHierarchy(keys, create=True)
721 if hierarchy:
722 data = hierarchy[-1]
723 else:
724 data = self._data
726 try:
727 data[last] = value
728 except TypeError:
729 data[int(last)] = value
731 def __contains__(self, key):
732 keys = self._getKeyHierarchy(key)
733 hierarchy, complete = self._findInHierarchy(keys)
734 return complete
736 def __delitem__(self, key):
737 keys = self._getKeyHierarchy(key)
738 last = keys.pop()
739 hierarchy, complete = self._findInHierarchy(keys)
740 if complete:
741 if hierarchy:
742 data = hierarchy[-1]
743 else:
744 data = self._data
745 del data[last]
746 else:
747 raise KeyError(f"{key} not found in Config")
749 def update(self, other):
750 """Like dict.update, but will add or modify keys in nested dicts,
751 instead of overwriting the nested dict entirely.
753 For example, for the given code:
754 foo = {"a": {"b": 1}}
755 foo.update({"a": {"c": 2}})
757 Parameters
758 ----------
759 other : `dict` or `Config`
760 Source of configuration:
762 - If foo is a dict, then after the update foo == {"a": {"c": 2}}
763 - But if foo is a Config, then after the update
764 foo == {"a": {"b": 1, "c": 2}}
765 """
766 def doUpdate(d, u):
767 if not isinstance(u, collections.abc.Mapping) or \
768 not isinstance(d, collections.abc.Mapping):
769 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
770 for k, v in u.items():
771 if isinstance(v, collections.abc.Mapping):
772 d[k] = doUpdate(d.get(k, {}), v)
773 else:
774 d[k] = v
775 return d
776 doUpdate(self._data, other)
778 def merge(self, other):
779 """Like Config.update, but will add keys & values from other that
780 DO NOT EXIST in self.
782 Keys and values that already exist in self will NOT be overwritten.
784 Parameters
785 ----------
786 other : `dict` or `Config`
787 Source of configuration:
788 """
789 otherCopy = copy.deepcopy(other)
790 otherCopy.update(self)
791 self._data = otherCopy._data
793 def nameTuples(self, topLevelOnly=False):
794 """Get tuples representing the name hierarchies of all keys.
796 The tuples returned from this method are guaranteed to be usable
797 to access items in the configuration object.
799 Parameters
800 ----------
801 topLevelOnly : `bool`, optional
802 If False, the default, a full hierarchy of names is returned.
803 If True, only the top level are returned.
805 Returns
806 -------
807 names : `list` of `tuple` of `str`
808 List of all names present in the `Config` where each element
809 in the list is a `tuple` of strings representing the hierarchy.
810 """
811 if topLevelOnly:
812 return list((k,) for k in self)
814 def getKeysAsTuples(d, keys, base):
815 if isinstance(d, collections.abc.Sequence):
816 theseKeys = range(len(d))
817 else:
818 theseKeys = d.keys()
819 for key in theseKeys:
820 val = d[key]
821 levelKey = base + (key,) if base is not None else (key,)
822 keys.append(levelKey)
823 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \
824 and not isinstance(val, str):
825 getKeysAsTuples(val, keys, levelKey)
826 keys = []
827 getKeysAsTuples(self._data, keys, None)
828 return keys
830 def names(self, topLevelOnly=False, delimiter=None):
831 """Get a delimited name of all the keys in the hierarchy.
833 The values returned from this method are guaranteed to be usable
834 to access items in the configuration object.
836 Parameters
837 ----------
838 topLevelOnly : `bool`, optional
839 If False, the default, a full hierarchy of names is returned.
840 If True, only the top level are returned.
841 delimiter : `str`, optional
842 Delimiter to use when forming the keys. If the delimiter is
843 present in any of the keys, it will be escaped in the returned
844 names. If `None` given a delimiter will be automatically provided.
845 The delimiter can not be alphanumeric.
847 Returns
848 -------
849 names : `list` of `str`
850 List of all names present in the `Config`.
852 Notes
853 -----
854 This is different than the built-in method `dict.keys`, which will
855 return only the first level keys.
857 Raises
858 ------
859 ValueError:
860 The supplied delimiter is alphanumeric.
861 """
862 if topLevelOnly:
863 return list(self.keys())
865 # Get all the tuples of hierarchical keys
866 nameTuples = self.nameTuples()
868 if delimiter is not None and delimiter.isalnum():
869 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
871 if delimiter is None:
872 # Start with something, and ensure it does not need to be
873 # escaped (it is much easier to understand if not escaped)
874 delimiter = self._D
876 # Form big string for easy check of delimiter clash
877 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
879 # Try a delimiter and keep trying until we get something that
880 # works.
881 ntries = 0
882 while delimiter in combined:
883 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.")
884 ntries += 1
886 if ntries > 100:
887 raise ValueError(f"Unable to determine a delimiter for Config {self}")
889 # try another one
890 while True:
891 delimiter = chr(ord(delimiter)+1)
892 if not delimiter.isalnum():
893 break
895 log.debug(f"Using delimiter {delimiter!r}")
897 # Form the keys, escaping the delimiter if necessary
898 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
899 for k in nameTuples]
900 return strings
902 def asArray(self, name):
903 """Get a value as an array.
905 May contain one or more elements.
907 Parameters
908 ----------
909 name : `str`
910 Key to use to retrieve value.
912 Returns
913 -------
914 array : `collections.abc.Sequence`
915 The value corresponding to name, but guaranteed to be returned
916 as a list with at least one element. If the value is a
917 `~collections.abc.Sequence` (and not a `str`) the value itself
918 will be returned, else the value will be the first element.
919 """
920 val = self.get(name)
921 if isinstance(val, str):
922 val = [val]
923 elif not isinstance(val, collections.abc.Sequence):
924 val = [val]
925 return val
927 def __eq__(self, other):
928 if isinstance(other, Config):
929 other = other._data
930 return self._data == other
932 def __ne__(self, other):
933 if isinstance(other, Config):
934 other = other._data
935 return self._data != other
937 #######
938 # i/o #
940 def dump(self, output):
941 """Writes the config to a yaml stream.
943 Parameters
944 ----------
945 output
946 The YAML stream to use for output.
947 """
948 yaml.safe_dump(self._data, output, default_flow_style=False)
950 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml",
951 overwrite=True):
952 """Writes the config to location pointed to by given URI.
954 Currently supports 's3' and 'file' URI schemes.
956 Parameters
957 ----------
958 uri: `str` or `ButlerURI`
959 URI of location where the Config will be written.
960 updateFile : bool, optional
961 If True and uri does not end on a filename with extension, will
962 append `defaultFileName` to the target uri. True by default.
963 defaultFileName : bool, optional
964 The file name that will be appended to target uri if updateFile is
965 True and uri does not end on a file with an extension.
966 overwrite : bool, optional
967 If True the configuration will be written even if it already
968 exists at that location.
969 """
970 if isinstance(uri, str):
971 uri = ButlerURI(uri)
973 if not uri.scheme or uri.scheme == "file":
974 if os.path.isdir(uri.path) and updateFile:
975 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName))
976 self.dumpToFile(uri.ospath, overwrite=overwrite)
977 elif uri.scheme == "s3":
978 if not uri.dirLike and "." not in uri.basename():
979 uri = ButlerURI(uri.geturl(), forceDirectory=True)
980 uri.updateFile(defaultFileName)
981 self.dumpToS3File(uri, overwrite=overwrite)
982 else:
983 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
985 def dumpToFile(self, path, *, overwrite=True):
986 """Writes the config to a file.
988 Parameters
989 ----------
990 path : `str`
991 Path to the file to use for output.
992 overwrite : `bool`, optional
993 If True any existing file will be over written.
995 Notes
996 -----
997 The name of the config file is stored in the Config object.
999 Raises
1000 ------
1001 FileExistsError
1002 Raised if the file already exists but overwrite is False.
1003 """
1004 if overwrite:
1005 mode = "w"
1006 else:
1007 mode = "x"
1008 with open(path, mode) as f:
1009 self.dump(f)
1010 self.configFile = ButlerURI(path)
1012 def dumpToS3File(self, uri, *, overwrite=True):
1013 """Writes the config to a file in S3 Bucket.
1015 Parameters
1016 ----------
1017 uri : `ButlerURI`
1018 S3 URI where the configuration should be stored.
1019 overwrite : `bool`, optional
1020 If False, a check will be made to see if the key already
1021 exists.
1023 Raises
1024 ------
1025 FileExistsError
1026 Raised if the configuration already exists at this location
1027 and overwrite is set to `False`.
1029 Notes
1030 -----
1031 The name of the config output location is stored in the Config object.
1032 """
1033 if boto3 is None:
1034 raise ModuleNotFoundError("Could not find boto3. "
1035 "Are you sure it is installed?")
1037 if uri.scheme != "s3":
1038 raise ValueError(f"Must provide S3 URI not {uri}")
1040 s3 = getS3Client()
1042 if not overwrite:
1043 from .s3utils import s3CheckFileExists
1044 if s3CheckFileExists(uri, client=s3)[0]:
1045 raise FileExistsError(f"Config already exists at {uri}")
1047 bucket = uri.netloc
1048 key = uri.relativeToPathRoot
1050 with io.StringIO() as stream:
1051 self.dump(stream)
1052 stream.seek(0)
1053 s3.put_object(Bucket=bucket, Key=key, Body=stream.read())
1055 self.configFile = uri
1057 @staticmethod
1058 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True):
1059 """Generic helper function for updating specific config parameters.
1061 Allows for named parameters to be set to new values in bulk, and
1062 for other values to be set by copying from a reference config.
1064 Assumes that the supplied config is compatible with ``configType``
1065 and will attach the updated values to the supplied config by
1066 looking for the related component key. It is assumed that
1067 ``config`` and ``full`` are from the same part of the
1068 configuration hierarchy.
1070 Parameters
1071 ----------
1072 configType : `ConfigSubset`
1073 Config type to use to extract relevant items from ``config``.
1074 config : `Config`
1075 A `Config` to update. Only the subset understood by
1076 the supplied `ConfigSubset` will be modified. Default values
1077 will not be inserted and the content will not be validated
1078 since mandatory keys are allowed to be missing until
1079 populated later by merging.
1080 full : `Config`
1081 A complete config with all defaults expanded that can be
1082 converted to a ``configType``. Read-only and will not be
1083 modified by this method. Values are read from here if
1084 ``toCopy`` is defined.
1086 Repository-specific options that should not be obtained
1087 from defaults when Butler instances are constructed
1088 should be copied from ``full`` to ``config``.
1089 toUpdate : `dict`, optional
1090 A `dict` defining the keys to update and the new value to use.
1091 The keys and values can be any supported by `Config`
1092 assignment.
1093 toCopy : `tuple`, optional
1094 `tuple` of keys whose values should be copied from ``full``
1095 into ``config``.
1096 overwrite : `bool`, optional
1097 If `False`, do not modify a value in ``config`` if the key
1098 already exists. Default is always to overwrite.
1100 Raises
1101 ------
1102 ValueError
1103 Neither ``toUpdate`` not ``toCopy`` were defined.
1104 """
1105 if toUpdate is None and toCopy is None:
1106 raise ValueError("One of toUpdate or toCopy parameters must be set.")
1108 # If this is a parent configuration then we need to ensure that
1109 # the supplied config has the relevant component key in it.
1110 # If this is a parent configuration we add in the stub entry
1111 # so that the ConfigSubset constructor will do the right thing.
1112 # We check full for this since that is guaranteed to be complete.
1113 if configType.component in full and configType.component not in config:
1114 config[configType.component] = {}
1116 # Extract the part of the config we wish to update
1117 localConfig = configType(config, mergeDefaults=False, validate=False)
1119 if toUpdate:
1120 for key, value in toUpdate.items():
1121 if key in localConfig and not overwrite:
1122 log.debug("Not overriding key '%s' with value '%s' in config %s",
1123 key, value, localConfig.__class__.__name__)
1124 else:
1125 localConfig[key] = value
1127 if toCopy:
1128 localFullConfig = configType(full, mergeDefaults=False)
1129 for key in toCopy:
1130 if key in localConfig and not overwrite:
1131 log.debug("Not overriding key '%s' from defaults in config %s",
1132 key, localConfig.__class__.__name__)
1133 else:
1134 localConfig[key] = localFullConfig[key]
1136 # Reattach to parent if this is a child config
1137 if configType.component in config:
1138 config[configType.component] = localConfig
1139 else:
1140 config.update(localConfig)
1142 def toDict(self):
1143 """Convert a `Config` to a standalone hierarchical `dict`.
1145 Returns
1146 -------
1147 d : `dict`
1148 The standalone hierarchical `dict` with any `Config` classes
1149 in the hierarchy converted to `dict`.
1151 Notes
1152 -----
1153 This can be useful when passing a Config to some code that
1154 expects native Python types.
1155 """
1156 output = copy.deepcopy(self._data)
1157 for k, v in output.items():
1158 if isinstance(v, Config):
1159 v = v.toDict()
1160 output[k] = v
1161 return output
1164class ConfigSubset(Config):
1165 """Config representing a subset of a more general configuration.
1167 Subclasses define their own component and when given a configuration
1168 that includes that component, the resulting configuration only includes
1169 the subset. For example, your config might contain ``dimensions`` if it's
1170 part of a global config and that subset will be stored. If ``dimensions``
1171 can not be found it is assumed that the entire contents of the
1172 configuration should be used.
1174 Default values are read from the environment or supplied search paths
1175 using the default configuration file name specified in the subclass.
1176 This allows a configuration class to be instantiated without any
1177 additional arguments.
1179 Additional validation can be specified to check for keys that are mandatory
1180 in the configuration.
1182 Parameters
1183 ----------
1184 other : `Config` or `str` or `dict`
1185 Argument specifying the configuration information as understood
1186 by `Config`
1187 validate : `bool`, optional
1188 If `True` required keys will be checked to ensure configuration
1189 consistency.
1190 mergeDefaults : `bool`, optional
1191 If `True` defaults will be read and the supplied config will
1192 be combined with the defaults, with the supplied valiues taking
1193 precedence.
1194 searchPaths : `list` or `tuple`, optional
1195 Explicit additional paths to search for defaults. They should
1196 be supplied in priority order. These paths have higher priority
1197 than those read from the environment in
1198 `ConfigSubset.defaultSearchPaths()`. Paths can be `str` referring to
1199 the local file system, or `ResourceDir`.
1200 """
1202 component: ClassVar[Optional[str]] = None
1203 """Component to use from supplied config. Can be None. If specified the
1204 key is not required. Can be a full dot-separated path to a component.
1205 """
1207 requiredKeys: ClassVar[Sequence[str]] = ()
1208 """Keys that are required to be specified in the configuration.
1209 """
1211 defaultConfigFile: ClassVar[Optional[str]] = None
1212 """Name of the file containing defaults for this config class.
1213 """
1215 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1217 # Create a blank object to receive the defaults
1218 # Once we have the defaults we then update with the external values
1219 super().__init__()
1221 # Create a standard Config rather than subset
1222 externalConfig = Config(other)
1224 # Select the part we need from it
1225 # To simplify the use of !include we also check for the existence of
1226 # component.component (since the included files can themselves
1227 # include the component name)
1228 if self.component is not None:
1229 doubled = (self.component, self.component)
1230 # Must check for double depth first
1231 if doubled in externalConfig:
1232 externalConfig = externalConfig[doubled]
1233 elif self.component in externalConfig:
1234 externalConfig._data = externalConfig._data[self.component]
1236 # Default files read to create this configuration
1237 self.filesRead = []
1239 # Assume we are not looking up child configurations
1240 containerKey = None
1242 # Sometimes we do not want to merge with defaults.
1243 if mergeDefaults:
1245 # Supplied search paths have highest priority
1246 fullSearchPath = []
1247 if searchPaths:
1248 fullSearchPath.extend(searchPaths)
1250 # Read default paths from enviroment
1251 fullSearchPath.extend(self.defaultSearchPaths())
1253 # There are two places to find defaults for this particular config
1254 # - The "defaultConfigFile" defined in the subclass
1255 # - The class specified in the "cls" element in the config.
1256 # Read cls after merging in case it changes.
1257 if self.defaultConfigFile is not None:
1258 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1260 # Can have a class specification in the external config (priority)
1261 # or from the defaults.
1262 pytype = None
1263 if "cls" in externalConfig:
1264 pytype = externalConfig["cls"]
1265 elif "cls" in self:
1266 pytype = self["cls"]
1268 if pytype is not None:
1269 try:
1270 cls = doImport(pytype)
1271 except ImportError as e:
1272 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1273 defaultsFile = cls.defaultConfigFile
1274 if defaultsFile is not None:
1275 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1277 # Get the container key in case we need it
1278 try:
1279 containerKey = cls.containerKey
1280 except AttributeError:
1281 pass
1283 # Now update this object with the external values so that the external
1284 # values always override the defaults
1285 self.update(externalConfig)
1287 # If this configuration has child configurations of the same
1288 # config class, we need to expand those defaults as well.
1290 if mergeDefaults and containerKey is not None and containerKey in self:
1291 for idx, subConfig in enumerate(self[containerKey]):
1292 self[containerKey, idx] = type(self)(other=subConfig, validate=validate,
1293 mergeDefaults=mergeDefaults,
1294 searchPaths=searchPaths)
1296 if validate:
1297 self.validate()
1299 @classmethod
1300 def defaultSearchPaths(cls):
1301 """Read the environment to determine search paths to use for global
1302 defaults.
1304 Global defaults, at lowest priority, are found in the ``config``
1305 directory of the butler source tree. Additional defaults can be
1306 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1307 which is a PATH-like variable where paths at the front of the list
1308 have priority over those later.
1310 Returns
1311 -------
1312 paths : `list`
1313 Returns a list of paths to search. The returned order is in
1314 priority with the highest priority paths first. The butler config
1315 configuration resources will not be included here but will
1316 always be searched last.
1318 Notes
1319 -----
1320 The environment variable is split on the standard ``:`` path separator.
1321 This currently makes it incompatible with usage of URIs.
1322 """
1323 # We can pick up defaults from multiple search paths
1324 # We fill defaults by using the butler config path and then
1325 # the config path environment variable in reverse order.
1326 defaultsPaths = []
1328 if CONFIG_PATH in os.environ:
1329 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1330 defaultsPaths.extend(externalPaths)
1332 # Add the package defaults as a resource
1333 defaultsPaths.append(ResourceDir(cls.resourcesPackage, "configs"))
1335 return defaultsPaths
1337 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1338 """Search the supplied paths, merging the configuration values
1340 The values read will override values currently stored in the object.
1341 Every file found in the path will be read, such that the earlier
1342 path entries have higher priority.
1344 Parameters
1345 ----------
1346 searchPaths : `list`
1347 Paths to search for the supplied configFile. This path
1348 is the priority order, such that files read from the
1349 first path entry will be selected over those read from
1350 a later path. Can contain `str` referring to the local file
1351 system or `ResourceDir`.
1352 configFile : `str`
1353 File to locate in path. If absolute path it will be read
1354 directly and the search path will not be used. Can be a URI
1355 to an explicit resource (which will ignore the search path)
1356 which is assumed to exist.
1357 """
1358 uri = ButlerURI(configFile)
1359 if uri.scheme:
1360 # Assume this resource exists
1361 self._updateWithOtherConfigFile(configFile)
1362 self.filesRead.append(configFile)
1363 elif os.path.isabs(configFile) and os.path.exists(configFile):
1364 self.filesRead.append(configFile)
1365 self._updateWithOtherConfigFile(configFile)
1366 else:
1367 # Reverse order so that high priority entries
1368 # update the object last.
1369 for pathDir in reversed(searchPaths):
1370 if isinstance(pathDir, str):
1371 file = os.path.join(pathDir, configFile)
1372 if os.path.exists(file):
1373 self.filesRead.append(file)
1374 self._updateWithOtherConfigFile(file)
1375 elif isinstance(pathDir, ResourceDir):
1376 resource = pathDir.toResource(configFile)
1377 if resource.exists():
1378 self.filesRead.append(resource)
1379 self._updateWithOtherConfigFile(resource)
1381 def _updateWithOtherConfigFile(self, file):
1382 """Read in some defaults and update.
1384 Update the configuration by reading the supplied file as a config
1385 of this class, and merging such that these values override the
1386 current values. Contents of the external config are not validated.
1388 Parameters
1389 ----------
1390 file : `Config`, `str`, or `dict`
1391 Entity that can be converted to a `ConfigSubset`.
1392 """
1393 # Use this class to read the defaults so that subsetting can happen
1394 # correctly.
1395 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1396 self.update(externalConfig)
1398 def validate(self):
1399 """Check that mandatory keys are present in this configuration.
1401 Ignored if ``requiredKeys`` is empty."""
1402 # Validation
1403 missing = [k for k in self.requiredKeys if k not in self._data]
1404 if missing:
1405 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")