Coverage for python/lsst/daf/butler/core/config.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Configuration control."""
26__all__ = ("Config", "ConfigSubset")
28import collections
29import copy
30import logging
31import pprint
32import os
33import yaml
34import sys
35from yaml.representer import Representer
36import io
37from typing import Sequence, Optional, ClassVar
39try:
40 import boto3
41except ImportError:
42 boto3 = None
44import lsst.utils
45from lsst.utils import doImport
46from .location import ButlerURI
47from .s3utils import getS3Client
49yaml.add_representer(collections.defaultdict, Representer.represent_dict)
52# Config module logger
53log = logging.getLogger(__name__)
55# PATH-like environment variable to use for defaults.
56CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
59class Loader(yaml.CSafeLoader):
60 """YAML Loader that supports file include directives
62 Uses ``!include`` directive in a YAML file to point to another
63 YAML file to be included. The path in the include directive is relative
64 to the file containing that directive.
66 storageClasses: !include storageClasses.yaml
68 Examples
69 --------
70 >>> with open("document.yaml", "r") as f:
71 data = yaml.load(f, Loader=Loader)
73 Notes
74 -----
75 See https://davidchall.github.io/yaml-includes.html
76 """
78 def __init__(self, stream):
79 super().__init__(stream)
80 # if this is a string and not a stream we may well lack a name
81 try:
82 self._root = ButlerURI(stream.name)
83 except AttributeError:
84 # No choice but to assume a local filesystem
85 self._root = ButlerURI("no-file.yaml")
86 Loader.add_constructor("!include", Loader.include)
88 def include(self, node):
89 if isinstance(node, yaml.ScalarNode):
90 return self.extractFile(self.construct_scalar(node))
92 elif isinstance(node, yaml.SequenceNode):
93 result = []
94 for filename in self.construct_sequence(node):
95 result.append(self.extractFile(filename))
96 return result
98 elif isinstance(node, yaml.MappingNode):
99 result = {}
100 for k, v in self.construct_mapping(node).items():
101 result[k] = self.extractFile(v)
102 return result
104 else:
105 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
106 raise yaml.constructor.ConstructorError
108 def extractFile(self, filename):
109 # It is possible for the !include to point to an explicit URI
110 # instead of a relative URI, therefore we first see if it is
111 # scheme-less or not. If it has a scheme we use it directly
112 # if it is scheme-less we use it relative to the file root.
113 requesteduri = ButlerURI(filename, forceAbsolute=False)
115 if requesteduri.scheme:
116 fileuri = requesteduri
117 else:
118 fileuri = copy.copy(self._root)
119 fileuri.updateFile(filename)
121 log.debug("Opening YAML file via !include: %s", fileuri)
123 if not fileuri.scheme or fileuri.scheme == "file":
124 with open(fileuri.ospath, "r") as f:
125 return yaml.load(f, Loader)
126 elif fileuri.scheme == "s3":
127 if boto3 is None:
128 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
129 s3 = getS3Client()
130 try:
131 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot)
132 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
133 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err
135 # boto3 response is a `StreamingBody`, but not a valid Python
136 # IOStream. Loader will raise an error that the stream has no name.
137 # The name is used to resolve the "!include" filename location to
138 # download. A hackish solution is to name it explicitly.
139 response["Body"].name = fileuri.geturl()
140 return yaml.load(response["Body"], Loader)
143class Config(collections.abc.MutableMapping):
144 r"""Implements a datatype that is used by `Butler` for configuration
145 parameters.
147 It is essentially a `dict` with key/value pairs, including nested dicts
148 (as values). In fact, it can be initialized with a `dict`.
149 This is explained next:
151 Config extends the `dict` api so that hierarchical values may be accessed
152 with delimited notation or as a tuple. If a string is given the delimiter
153 is picked up from the first character in that string. For example,
154 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
155 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
156 If the first character is alphanumeric, no delimiter will be used.
157 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
158 Unicode characters can be used as the delimiter for distinctiveness if
159 required.
161 If a key in the hierarchy starts with a non-alphanumeric character care
162 should be used to ensure that either the tuple interface is used or
163 a distinct delimiter is always given in string form.
165 Finally, the delimiter can be escaped if it is part of a key and also
166 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
167 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
168 always better to use a different delimiter in these cases.
170 Note that adding a multi-level key implicitly creates any nesting levels
171 that do not exist, but removing multi-level keys does not automatically
172 remove empty nesting levels. As a result:
174 >>> c = Config()
175 >>> c[".a.b"] = 1
176 >>> del c[".a.b"]
177 >>> c["a"]
178 Config({'a': {}})
180 Storage formats supported:
182 - yaml: read and write is supported.
185 Parameters
186 ----------
187 other : `str` or `Config` or `dict`
188 Other source of configuration, can be:
190 - (`str`) Treated as a path to a config file on disk. Must end with
191 ".yaml".
192 - (`Config`) Copies the other Config's values into this one.
193 - (`dict`) Copies the values from the dict into this Config.
195 If `None` is provided an empty `Config` will be created.
196 """
198 _D: ClassVar[str] = "→"
199 """Default internal delimiter to use for components in the hierarchy when
200 constructing keys for external use (see `Config.names()`)."""
202 includeKey: ClassVar[str] = "includeConfigs"
203 """Key used to indicate that another config should be included at this
204 part of the hierarchy."""
206 def __init__(self, other=None):
207 self._data = {}
208 self.configFile = None
210 if other is None:
211 return
213 if isinstance(other, Config):
214 self._data = copy.deepcopy(other._data)
215 self.configFile = other.configFile
216 elif isinstance(other, collections.abc.Mapping):
217 self.update(other)
218 elif isinstance(other, str):
219 # if other is a string, assume it is a file path.
220 self.__initFromFile(other)
221 self._processExplicitIncludes()
222 else:
223 # if the config specified by other could not be recognized raise
224 # a runtime error.
225 raise RuntimeError("A Config could not be loaded from other:%s" % other)
227 def ppprint(self):
228 """helper function for debugging, prints a config out in a readable
229 way in the debugger.
231 use: pdb> print(myConfigObject.ppprint())
233 Returns
234 -------
235 s : `str`
236 A prettyprint formatted string representing the config
237 """
238 return pprint.pformat(self._data, indent=2, width=1)
240 def __repr__(self):
241 return f"{type(self).__name__}({self._data!r})"
243 def __str__(self):
244 return self.ppprint()
246 def __len__(self):
247 return len(self._data)
249 def __iter__(self):
250 return iter(self._data)
252 def copy(self):
253 return type(self)(self)
255 @classmethod
256 def fromYaml(cls, string: str) -> Config:
257 """Create a new Config instance from a YAML string.
259 Parameters
260 ----------
261 string : `str`
262 String containing content in YAML format
264 Returns
265 -------
266 c : `Config`
267 Newly-constructed Config.
268 """
269 return cls().__initFromYaml(string)
271 def __initFromFile(self, path):
272 """Load a file from a path or an URI.
274 Parameters
275 ----------
276 path : `str`
277 Path or an URI to a persisted config file.
278 """
279 uri = ButlerURI(path)
280 if uri.path.endswith("yaml"):
281 if uri.scheme == "s3":
282 self.__initFromS3YamlFile(uri.geturl())
283 else:
284 self.__initFromYamlFile(uri.ospath)
285 else:
286 raise RuntimeError("Unhandled config file type:%s" % uri)
287 self.configFile = str(path)
289 def __initFromS3YamlFile(self, url):
290 """Load a file at a given S3 Bucket uri and attempts to load it from
291 yaml.
293 Parameters
294 ----------
295 path : `str`
296 To a persisted config file.
297 """
298 if boto3 is None:
299 raise ModuleNotFoundError("boto3 not found."
300 "Are you sure it is installed?")
302 uri = ButlerURI(url)
303 s3 = getS3Client()
304 try:
305 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
306 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
307 raise FileNotFoundError(f"No such file or directory: {uri}") from err
309 # boto3 response is a `StreamingBody`, but not a valid Python IOStream.
310 # Loader will raise an error that the stream has no name. A hackish
311 # solution is to name it explicitly.
312 response["Body"].name = url
313 self.__initFromYaml(response["Body"])
314 response["Body"].close()
316 def __initFromYamlFile(self, path):
317 """Opens a file at a given path and attempts to load it in from yaml.
319 Parameters
320 ----------
321 path : `str`
322 To a persisted config file in YAML format.
323 """
324 log.debug("Opening YAML config file: %s", path)
325 with open(path, "r") as f:
326 self.__initFromYaml(f)
328 def __initFromYaml(self, stream):
329 """Loads a YAML config from any readable stream that contains one.
331 Parameters
332 ----------
333 stream: `IO` or `str`
334 Stream to pass to the YAML loader. Accepts anything that
335 `yaml.load` accepts. This can include a string as well as an
336 IO stream.
338 Raises
339 ------
340 yaml.YAMLError
341 If there is an error loading the file.
342 """
343 content = yaml.load(stream, Loader=Loader)
344 if content is None:
345 content = {}
346 self._data = content
347 return self
349 def _processExplicitIncludes(self):
350 """Scan through the configuration searching for the special
351 includeConfigs directive and process the includes."""
353 # Search paths for config files
354 searchPaths = [os.path.curdir]
355 if self.configFile is not None:
356 searchPaths.append(os.path.abspath(os.path.dirname(self.configFile)))
358 # Ensure we know what delimiter to use
359 names = self.nameTuples()
360 for path in names:
361 if path[-1] == self.includeKey:
363 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
364 basePath = path[:-1]
366 # Extract the includes and then delete them from the config
367 includes = self[path]
368 del self[path]
370 # Be consistent and convert to a list
371 if not isinstance(includes, list):
372 includes = [includes]
374 # Read each file assuming it is a reference to a file
375 # The file can be relative to config file or cwd
376 # ConfigSubset search paths are not used
377 # At some point these might be URIs which we will have to
378 # assume resolve explicitly
379 subConfigs = []
380 for fileName in includes:
381 # Expand any shell variables
382 fileName = os.path.expandvars(fileName)
383 found = None
384 if os.path.isabs(fileName):
385 found = fileName
386 else:
387 for dir in searchPaths:
388 filePath = os.path.join(dir, fileName)
389 if os.path.exists(filePath):
390 found = os.path.normpath(os.path.abspath(filePath))
391 break
392 if not found:
393 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
395 # Read the referenced Config as a Config
396 subConfigs.append(type(self)(found))
398 # Now we need to merge these sub configs with the current
399 # information that was present in this node in the config
400 # tree with precedence given to the explicit values
401 newConfig = subConfigs.pop(0)
402 for sc in subConfigs:
403 newConfig.update(sc)
405 # Explicit values take precedence
406 if not basePath:
407 # This is an include at the root config
408 newConfig.update(self)
409 # Replace the current config
410 self._data = newConfig._data
411 else:
412 newConfig.update(self[basePath])
413 # And reattach to the base config
414 self[basePath] = newConfig
416 @staticmethod
417 def _splitIntoKeys(key):
418 r"""Split the argument for get/set/in into a hierarchical list.
420 Parameters
421 ----------
422 key : `str` or iterable
423 Argument given to get/set/in. If an iterable is provided it will
424 be converted to a list. If the first character of the string
425 is not an alphanumeric character then it will be used as the
426 delimiter for the purposes of splitting the remainder of the
427 string. If the delimiter is also in one of the keys then it
428 can be escaped using ``\``. There is no default delimiter.
430 Returns
431 -------
432 keys : `list`
433 Hierarchical keys as a `list`.
434 """
435 if isinstance(key, str):
436 if not key[0].isalnum():
437 d = key[0]
438 key = key[1:]
439 else:
440 return [key, ]
441 escaped = f"\\{d}"
442 temp = None
443 if escaped in key:
444 # Complain at the attempt to escape the escape
445 doubled = fr"\{escaped}"
446 if doubled in key:
447 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})"
448 " is not yet supported.")
449 # Replace with a character that won't be in the string
450 temp = "\r"
451 if temp in key or d == temp:
452 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as"
453 " delimiter if escaping the delimiter")
454 key = key.replace(escaped, temp)
455 hierarchy = key.split(d)
456 if temp:
457 hierarchy = [h.replace(temp, d) for h in hierarchy]
458 return hierarchy
459 elif isinstance(key, collections.abc.Iterable):
460 return list(key)
461 else:
462 # Not sure what this is so try it anyway
463 return [key, ]
465 def _getKeyHierarchy(self, name):
466 """Retrieve the key hierarchy for accessing the Config
468 Parameters
469 ----------
470 name : `str` or `tuple`
471 Delimited string or `tuple` of hierarchical keys.
473 Returns
474 -------
475 hierarchy : `list` of `str`
476 Hierarchy to use as a `list`. If the name is available directly
477 as a key in the Config it will be used regardless of the presence
478 of any nominal delimiter.
479 """
480 if name in self._data:
481 keys = [name, ]
482 else:
483 keys = self._splitIntoKeys(name)
484 return keys
486 def _findInHierarchy(self, keys, create=False):
487 """Look for hierarchy of keys in Config
489 Parameters
490 ----------
491 keys : `list` or `tuple`
492 Keys to search in hierarchy.
493 create : `bool`, optional
494 If `True`, if a part of the hierarchy does not exist, insert an
495 empty `dict` into the hierarchy.
497 Returns
498 -------
499 hierarchy : `list`
500 List of the value corresponding to each key in the supplied
501 hierarchy. Only keys that exist in the hierarchy will have
502 a value.
503 complete : `bool`
504 `True` if the full hierarchy exists and the final element
505 in ``hierarchy`` is the value of relevant value.
506 """
507 d = self._data
509 def checkNextItem(k, d, create):
510 """See if k is in d and if it is return the new child"""
511 nextVal = None
512 isThere = False
513 if d is None:
514 # We have gone past the end of the hierarchy
515 pass
516 elif isinstance(d, collections.abc.Sequence):
517 # Check sequence first because for lists
518 # __contains__ checks whether value is found in list
519 # not whether the index exists in list. When we traverse
520 # the hierarchy we are interested in the index.
521 try:
522 nextVal = d[int(k)]
523 isThere = True
524 except IndexError:
525 pass
526 except ValueError:
527 isThere = k in d
528 elif k in d:
529 nextVal = d[k]
530 isThere = True
531 elif create:
532 d[k] = {}
533 nextVal = d[k]
534 isThere = True
535 return nextVal, isThere
537 hierarchy = []
538 complete = True
539 for k in keys:
540 d, isThere = checkNextItem(k, d, create)
541 if isThere:
542 hierarchy.append(d)
543 else:
544 complete = False
545 break
547 return hierarchy, complete
549 def __getitem__(self, name):
550 # Override the split for the simple case where there is an exact
551 # match. This allows `Config.items()` to work via a simple
552 # __iter__ implementation that returns top level keys of
553 # self._data.
554 keys = self._getKeyHierarchy(name)
556 hierarchy, complete = self._findInHierarchy(keys)
557 if not complete:
558 raise KeyError(f"{name} not found")
559 data = hierarchy[-1]
561 if isinstance(data, collections.abc.Mapping):
562 data = Config(data)
563 # Ensure that child configs inherit the parent internal delimiter
564 if self._D != Config._D:
565 data._D = self._D
566 return data
568 def __setitem__(self, name, value):
569 keys = self._getKeyHierarchy(name)
570 last = keys.pop()
571 if isinstance(value, Config):
572 value = copy.deepcopy(value._data)
574 hierarchy, complete = self._findInHierarchy(keys, create=True)
575 if hierarchy:
576 data = hierarchy[-1]
577 else:
578 data = self._data
580 try:
581 data[last] = value
582 except TypeError:
583 data[int(last)] = value
585 def __contains__(self, key):
586 keys = self._getKeyHierarchy(key)
587 hierarchy, complete = self._findInHierarchy(keys)
588 return complete
590 def __delitem__(self, key):
591 keys = self._getKeyHierarchy(key)
592 last = keys.pop()
593 hierarchy, complete = self._findInHierarchy(keys)
594 if complete:
595 if hierarchy:
596 data = hierarchy[-1]
597 else:
598 data = self._data
599 del data[last]
600 else:
601 raise KeyError(f"{key} not found in Config")
603 def update(self, other):
604 """Like dict.update, but will add or modify keys in nested dicts,
605 instead of overwriting the nested dict entirely.
607 For example, for the given code:
608 foo = {"a": {"b": 1}}
609 foo.update({"a": {"c": 2}})
611 Parameters
612 ----------
613 other : `dict` or `Config`
614 Source of configuration:
616 - If foo is a dict, then after the update foo == {"a": {"c": 2}}
617 - But if foo is a Config, then after the update
618 foo == {"a": {"b": 1, "c": 2}}
619 """
620 def doUpdate(d, u):
621 if not isinstance(u, collections.abc.Mapping) or \
622 not isinstance(d, collections.abc.Mapping):
623 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
624 for k, v in u.items():
625 if isinstance(v, collections.abc.Mapping):
626 d[k] = doUpdate(d.get(k, {}), v)
627 else:
628 d[k] = v
629 return d
630 doUpdate(self._data, other)
632 def merge(self, other):
633 """Like Config.update, but will add keys & values from other that
634 DO NOT EXIST in self.
636 Keys and values that already exist in self will NOT be overwritten.
638 Parameters
639 ----------
640 other : `dict` or `Config`
641 Source of configuration:
642 """
643 otherCopy = copy.deepcopy(other)
644 otherCopy.update(self)
645 self._data = otherCopy._data
647 def nameTuples(self, topLevelOnly=False):
648 """Get tuples representing the name hierarchies of all keys.
650 The tuples returned from this method are guaranteed to be usable
651 to access items in the configuration object.
653 Parameters
654 ----------
655 topLevelOnly : `bool`, optional
656 If False, the default, a full hierarchy of names is returned.
657 If True, only the top level are returned.
659 Returns
660 -------
661 names : `list` of `tuple` of `str`
662 List of all names present in the `Config` where each element
663 in the list is a `tuple` of strings representing the hierarchy.
664 """
665 if topLevelOnly:
666 return list((k,) for k in self)
668 def getKeysAsTuples(d, keys, base):
669 if isinstance(d, collections.abc.Sequence):
670 theseKeys = range(len(d))
671 else:
672 theseKeys = d.keys()
673 for key in theseKeys:
674 val = d[key]
675 levelKey = base + (key,) if base is not None else (key,)
676 keys.append(levelKey)
677 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \
678 and not isinstance(val, str):
679 getKeysAsTuples(val, keys, levelKey)
680 keys = []
681 getKeysAsTuples(self._data, keys, None)
682 return keys
684 def names(self, topLevelOnly=False, delimiter=None):
685 """Get a delimited name of all the keys in the hierarchy.
687 The values returned from this method are guaranteed to be usable
688 to access items in the configuration object.
690 Parameters
691 ----------
692 topLevelOnly : `bool`, optional
693 If False, the default, a full hierarchy of names is returned.
694 If True, only the top level are returned.
695 delimiter : `str`, optional
696 Delimiter to use when forming the keys. If the delimiter is
697 present in any of the keys, it will be escaped in the returned
698 names. If `None` given a delimiter will be automatically provided.
699 The delimiter can not be alphanumeric.
701 Returns
702 -------
703 names : `list` of `str`
704 List of all names present in the `Config`.
706 Notes
707 -----
708 This is different than the built-in method `dict.keys`, which will
709 return only the first level keys.
711 Raises
712 ------
713 ValueError:
714 The supplied delimiter is alphanumeric.
715 """
716 if topLevelOnly:
717 return list(self.keys())
719 # Get all the tuples of hierarchical keys
720 nameTuples = self.nameTuples()
722 if delimiter is not None and delimiter.isalnum():
723 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
725 if delimiter is None:
726 # Start with something, and ensure it does not need to be
727 # escaped (it is much easier to understand if not escaped)
728 delimiter = self._D
730 # Form big string for easy check of delimiter clash
731 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
733 # Try a delimiter and keep trying until we get something that
734 # works.
735 ntries = 0
736 while delimiter in combined:
737 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.")
738 ntries += 1
740 if ntries > 100:
741 raise ValueError(f"Unable to determine a delimiter for Config {self}")
743 # try another one
744 while True:
745 delimiter = chr(ord(delimiter)+1)
746 if not delimiter.isalnum():
747 break
749 log.debug(f"Using delimiter {delimiter!r}")
751 # Form the keys, escaping the delimiter if necessary
752 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
753 for k in nameTuples]
754 return strings
756 def asArray(self, name):
757 """Get a value as an array.
759 May contain one or more elements.
761 Parameters
762 ----------
763 name : `str`
764 Key to use to retrieve value.
766 Returns
767 -------
768 array : `collections.abc.Sequence`
769 The value corresponding to name, but guaranteed to be returned
770 as a list with at least one element. If the value is a
771 `~collections.abc.Sequence` (and not a `str`) the value itself
772 will be returned, else the value will be the first element.
773 """
774 val = self.get(name)
775 if isinstance(val, str):
776 val = [val]
777 elif not isinstance(val, collections.abc.Sequence):
778 val = [val]
779 return val
781 def __eq__(self, other):
782 if isinstance(other, Config):
783 other = other._data
784 return self._data == other
786 def __ne__(self, other):
787 if isinstance(other, Config):
788 other = other._data
789 return self._data != other
791 #######
792 # i/o #
794 def dump(self, output):
795 """Writes the config to a yaml stream.
797 Parameters
798 ----------
799 output
800 The YAML stream to use for output.
801 """
802 yaml.safe_dump(self._data, output, default_flow_style=False)
804 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml",
805 overwrite=True):
806 """Writes the config to location pointed to by given URI.
808 Currently supports 's3' and 'file' URI schemes.
810 Parameters
811 ----------
812 uri: `str` or `ButlerURI`
813 URI of location where the Config will be written.
814 updateFile : bool, optional
815 If True and uri does not end on a filename with extension, will
816 append `defaultFileName` to the target uri. True by default.
817 defaultFileName : bool, optional
818 The file name that will be appended to target uri if updateFile is
819 True and uri does not end on a file with an extension.
820 overwrite : bool, optional
821 If True the configuration will be written even if it already
822 exists at that location.
823 """
824 if isinstance(uri, str):
825 uri = ButlerURI(uri)
827 if not uri.scheme or uri.scheme == "file":
828 if os.path.isdir(uri.path) and updateFile:
829 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName))
830 self.dumpToFile(uri.ospath, overwrite=overwrite)
831 elif uri.scheme == "s3":
832 if not uri.dirLike and "." not in uri.basename():
833 uri = ButlerURI(uri.geturl(), forceDirectory=True)
834 uri.updateFile(defaultFileName)
835 self.dumpToS3File(uri, overwrite=overwrite)
836 else:
837 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
839 def dumpToFile(self, path, *, overwrite=True):
840 """Writes the config to a file.
842 Parameters
843 ----------
844 path : `str`
845 Path to the file to use for output.
846 overwrite : `bool`, optional
847 If True any existing file will be over written.
849 Notes
850 -----
851 The name of the config file is stored in the Config object.
853 Raises
854 ------
855 FileExistsError
856 Raised if the file already exists but overwrite is False.
857 """
858 if overwrite:
859 mode = "w"
860 else:
861 mode = "x"
862 with open(path, mode) as f:
863 self.dump(f)
864 self.configFile = path
866 def dumpToS3File(self, uri, *, overwrite=True):
867 """Writes the config to a file in S3 Bucket.
869 Parameters
870 ----------
871 uri : `ButlerURI`
872 S3 URI where the configuration should be stored.
873 overwrite : `bool`, optional
874 If False, a check will be made to see if the key already
875 exists.
877 Raises
878 ------
879 FileExistsError
880 Raised if the configuration already exists at this location
881 and overwrite is set to `False`.
882 """
883 if boto3 is None:
884 raise ModuleNotFoundError("Could not find boto3. "
885 "Are you sure it is installed?")
887 if uri.scheme != "s3":
888 raise ValueError(f"Must provide S3 URI not {uri}")
890 s3 = getS3Client()
892 if not overwrite:
893 from .s3utils import s3CheckFileExists
894 if s3CheckFileExists(uri, client=s3)[0]:
895 raise FileExistsError(f"Config already exists at {uri}")
897 bucket = uri.netloc
898 key = uri.relativeToPathRoot
900 with io.StringIO() as stream:
901 self.dump(stream)
902 stream.seek(0)
903 s3.put_object(Bucket=bucket, Key=key, Body=stream.read())
905 @staticmethod
906 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True):
907 """Generic helper function for updating specific config parameters.
909 Allows for named parameters to be set to new values in bulk, and
910 for other values to be set by copying from a reference config.
912 Assumes that the supplied config is compatible with ``configType``
913 and will attach the updated values to the supplied config by
914 looking for the related component key. It is assumed that
915 ``config`` and ``full`` are from the same part of the
916 configuration hierarchy.
918 Parameters
919 ----------
920 configType : `ConfigSubset`
921 Config type to use to extract relevant items from ``config``.
922 config : `Config`
923 A `Config` to update. Only the subset understood by
924 the supplied `ConfigSubset` will be modified. Default values
925 will not be inserted and the content will not be validated
926 since mandatory keys are allowed to be missing until
927 populated later by merging.
928 full : `Config`
929 A complete config with all defaults expanded that can be
930 converted to a ``configType``. Read-only and will not be
931 modified by this method. Values are read from here if
932 ``toCopy`` is defined.
934 Repository-specific options that should not be obtained
935 from defaults when Butler instances are constructed
936 should be copied from ``full`` to ``config``.
937 toUpdate : `dict`, optional
938 A `dict` defining the keys to update and the new value to use.
939 The keys and values can be any supported by `Config`
940 assignment.
941 toCopy : `tuple`, optional
942 `tuple` of keys whose values should be copied from ``full``
943 into ``config``.
944 overwrite : `bool`, optional
945 If `False`, do not modify a value in ``config`` if the key
946 already exists. Default is always to overwrite.
948 Raises
949 ------
950 ValueError
951 Neither ``toUpdate`` not ``toCopy`` were defined.
952 """
953 if toUpdate is None and toCopy is None:
954 raise ValueError("One of toUpdate or toCopy parameters must be set.")
956 # If this is a parent configuration then we need to ensure that
957 # the supplied config has the relevant component key in it.
958 # If this is a parent configuration we add in the stub entry
959 # so that the ConfigSubset constructor will do the right thing.
960 # We check full for this since that is guaranteed to be complete.
961 if configType.component in full and configType.component not in config:
962 config[configType.component] = {}
964 # Extract the part of the config we wish to update
965 localConfig = configType(config, mergeDefaults=False, validate=False)
967 if toUpdate:
968 for key, value in toUpdate.items():
969 if key in localConfig and not overwrite:
970 log.debug("Not overriding key '%s' with value '%s' in config %s",
971 key, value, localConfig.__class__.__name__)
972 else:
973 localConfig[key] = value
975 if toCopy:
976 localFullConfig = configType(full, mergeDefaults=False)
977 for key in toCopy:
978 if key in localConfig and not overwrite:
979 log.debug("Not overriding key '%s' from defaults in config %s",
980 key, localConfig.__class__.__name__)
981 else:
982 localConfig[key] = localFullConfig[key]
984 # Reattach to parent if this is a child config
985 if configType.component in config:
986 config[configType.component] = localConfig
987 else:
988 config.update(localConfig)
991class ConfigSubset(Config):
992 """Config representing a subset of a more general configuration.
994 Subclasses define their own component and when given a configuration
995 that includes that component, the resulting configuration only includes
996 the subset. For example, your config might contain ``dimensions`` if it's
997 part of a global config and that subset will be stored. If ``dimensions``
998 can not be found it is assumed that the entire contents of the
999 configuration should be used.
1001 Default values are read from the environment or supplied search paths
1002 using the default configuration file name specified in the subclass.
1003 This allows a configuration class to be instantiated without any
1004 additional arguments.
1006 Additional validation can be specified to check for keys that are mandatory
1007 in the configuration.
1009 Parameters
1010 ----------
1011 other : `Config` or `str` or `dict`
1012 Argument specifying the configuration information as understood
1013 by `Config`
1014 validate : `bool`, optional
1015 If `True` required keys will be checked to ensure configuration
1016 consistency.
1017 mergeDefaults : `bool`, optional
1018 If `True` defaults will be read and the supplied config will
1019 be combined with the defaults, with the supplied valiues taking
1020 precedence.
1021 searchPaths : `list` or `tuple`, optional
1022 Explicit additional paths to search for defaults. They should
1023 be supplied in priority order. These paths have higher priority
1024 than those read from the environment in
1025 `ConfigSubset.defaultSearchPaths()`.
1026 """
1028 component: ClassVar[Optional[str]] = None
1029 """Component to use from supplied config. Can be None. If specified the
1030 key is not required. Can be a full dot-separated path to a component.
1031 """
1033 requiredKeys: ClassVar[Sequence[str]] = ()
1034 """Keys that are required to be specified in the configuration.
1035 """
1037 defaultConfigFile: ClassVar[Optional[str]] = None
1038 """Name of the file containing defaults for this config class.
1039 """
1041 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1043 # Create a blank object to receive the defaults
1044 # Once we have the defaults we then update with the external values
1045 super().__init__()
1047 # Create a standard Config rather than subset
1048 externalConfig = Config(other)
1050 # Select the part we need from it
1051 # To simplify the use of !include we also check for the existence of
1052 # component.component (since the included files can themselves
1053 # include the component name)
1054 if self.component is not None:
1055 doubled = (self.component, self.component)
1056 # Must check for double depth first
1057 if doubled in externalConfig:
1058 externalConfig = externalConfig[doubled]
1059 elif self.component in externalConfig:
1060 externalConfig._data = externalConfig._data[self.component]
1062 # Default files read to create this configuration
1063 self.filesRead = []
1065 # Assume we are not looking up child configurations
1066 containerKey = None
1068 # Sometimes we do not want to merge with defaults.
1069 if mergeDefaults:
1071 # Supplied search paths have highest priority
1072 fullSearchPath = []
1073 if searchPaths:
1074 fullSearchPath.extend(searchPaths)
1076 # Read default paths from enviroment
1077 fullSearchPath.extend(self.defaultSearchPaths())
1079 # There are two places to find defaults for this particular config
1080 # - The "defaultConfigFile" defined in the subclass
1081 # - The class specified in the "cls" element in the config.
1082 # Read cls after merging in case it changes.
1083 if self.defaultConfigFile is not None:
1084 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1086 # Can have a class specification in the external config (priority)
1087 # or from the defaults.
1088 pytype = None
1089 if "cls" in externalConfig:
1090 pytype = externalConfig["cls"]
1091 elif "cls" in self:
1092 pytype = self["cls"]
1094 if pytype is not None:
1095 try:
1096 cls = doImport(pytype)
1097 except ImportError as e:
1098 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1099 defaultsFile = cls.defaultConfigFile
1100 if defaultsFile is not None:
1101 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1103 # Get the container key in case we need it
1104 try:
1105 containerKey = cls.containerKey
1106 except AttributeError:
1107 pass
1109 # Now update this object with the external values so that the external
1110 # values always override the defaults
1111 self.update(externalConfig)
1113 # If this configuration has child configurations of the same
1114 # config class, we need to expand those defaults as well.
1116 if mergeDefaults and containerKey is not None and containerKey in self:
1117 for idx, subConfig in enumerate(self[containerKey]):
1118 self[containerKey, idx] = type(self)(other=subConfig, validate=validate,
1119 mergeDefaults=mergeDefaults,
1120 searchPaths=searchPaths)
1122 if validate:
1123 self.validate()
1125 @classmethod
1126 def defaultSearchPaths(cls):
1127 """Read the environment to determine search paths to use for global
1128 defaults.
1130 Global defaults, at lowest priority, are found in the ``config``
1131 directory of the butler source tree. Additional defaults can be
1132 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1133 which is a PATH-like variable where paths at the front of the list
1134 have priority over those later.
1136 Returns
1137 -------
1138 paths : `list`
1139 Returns a list of paths to search. The returned order is in
1140 priority with the highest priority paths first. The butler config
1141 directory will always be at the end of the list.
1142 """
1143 # We can pick up defaults from multiple search paths
1144 # We fill defaults by using the butler config path and then
1145 # the config path environment variable in reverse order.
1146 defaultsPaths = []
1148 if CONFIG_PATH in os.environ:
1149 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1150 defaultsPaths.extend(externalPaths)
1152 # Find the butler configs
1153 defaultsPaths.append(os.path.join(lsst.utils.getPackageDir("daf_butler"), "config"))
1155 return defaultsPaths
1157 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1158 """Search the supplied paths, merging the configuration values
1160 The values read will override values currently stored in the object.
1161 Every file found in the path will be read, such that the earlier
1162 path entries have higher priority.
1164 Parameters
1165 ----------
1166 searchPaths : `list`
1167 Paths to search for the supplied configFile. This path
1168 is the priority order, such that files read from the
1169 first path entry will be selected over those read from
1170 a later path.
1171 configFile : `str`
1172 File to locate in path. If absolute path it will be read
1173 directly and the search path will not be used.
1174 """
1175 if os.path.isabs(configFile):
1176 if os.path.exists(configFile):
1177 self.filesRead.append(configFile)
1178 self._updateWithOtherConfigFile(configFile)
1179 else:
1180 # Reverse order so that high priority entries
1181 # update the object last.
1182 for pathDir in reversed(searchPaths):
1183 file = os.path.join(pathDir, configFile)
1184 if os.path.exists(file):
1185 self.filesRead.append(file)
1186 self._updateWithOtherConfigFile(file)
1188 def _updateWithOtherConfigFile(self, file):
1189 """Read in some defaults and update.
1191 Update the configuration by reading the supplied file as a config
1192 of this class, and merging such that these values override the
1193 current values. Contents of the external config are not validated.
1195 Parameters
1196 ----------
1197 file : `Config`, `str`, or `dict`
1198 Entity that can be converted to a `ConfigSubset`.
1199 """
1200 # Use this class to read the defaults so that subsetting can happen
1201 # correctly.
1202 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1203 self.update(externalConfig)
1205 def validate(self):
1206 """Check that mandatory keys are present in this configuration.
1208 Ignored if ``requiredKeys`` is empty."""
1209 # Validation
1210 missing = [k for k in self.requiredKeys if k not in self._data]
1211 if missing:
1212 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")