Coverage for python/lsst/daf/butler/core/config.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Configuration control."""
24__all__ = ("Config", "ConfigSubset")
26import collections
27import copy
28import logging
29import pprint
30import os
31import yaml
32import sys
33from yaml.representer import Representer
34import io
35from typing import Sequence, Optional, ClassVar
37try:
38 import boto3
39except ImportError:
40 boto3 = None
42import lsst.utils
43from lsst.utils import doImport
44from .location import ButlerURI
45from .s3utils import getS3Client
47yaml.add_representer(collections.defaultdict, Representer.represent_dict)
50# Config module logger
51log = logging.getLogger(__name__)
53# PATH-like environment variable to use for defaults.
54CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
57class Loader(yaml.CSafeLoader):
58 """YAML Loader that supports file include directives
60 Uses ``!include`` directive in a YAML file to point to another
61 YAML file to be included. The path in the include directive is relative
62 to the file containing that directive.
64 storageClasses: !include storageClasses.yaml
66 Examples
67 --------
68 >>> with open("document.yaml", "r") as f:
69 data = yaml.load(f, Loader=Loader)
71 Notes
72 -----
73 See https://davidchall.github.io/yaml-includes.html
74 """
76 def __init__(self, stream):
77 super().__init__(stream)
78 self._root = ButlerURI(stream.name)
79 Loader.add_constructor("!include", Loader.include)
81 def include(self, node):
82 if isinstance(node, yaml.ScalarNode):
83 return self.extractFile(self.construct_scalar(node))
85 elif isinstance(node, yaml.SequenceNode):
86 result = []
87 for filename in self.construct_sequence(node):
88 result.append(self.extractFile(filename))
89 return result
91 elif isinstance(node, yaml.MappingNode):
92 result = {}
93 for k, v in self.construct_mapping(node).items():
94 result[k] = self.extractFile(v)
95 return result
97 else:
98 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
99 raise yaml.constructor.ConstructorError
101 def extractFile(self, filename):
102 fileuri = copy.copy(self._root)
103 fileuri.updateFile(filename)
104 log.debug("Opening YAML file via !include: %s", fileuri)
106 if not fileuri.scheme or fileuri.scheme == "file":
107 with open(fileuri.ospath, "r") as f:
108 return yaml.load(f, Loader)
109 elif fileuri.scheme == "s3":
110 if boto3 is None:
111 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
112 s3 = getS3Client()
113 try:
114 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot)
115 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
116 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err
118 # boto3 response is a `StreamingBody`, but not a valid Python
119 # IOStream. Loader will raise an error that the stream has no name.
120 # The name is used to resolve the "!include" filename location to
121 # download. A hackish solution is to name it explicitly.
122 response["Body"].name = fileuri.geturl()
123 return yaml.load(response["Body"], Loader)
126class Config(collections.abc.MutableMapping):
127 r"""Implements a datatype that is used by `Butler` for configuration
128 parameters.
130 It is essentially a `dict` with key/value pairs, including nested dicts
131 (as values). In fact, it can be initialized with a `dict`.
132 This is explained next:
134 Config extends the `dict` api so that hierarchical values may be accessed
135 with delimited notation or as a tuple. If a string is given the delimiter
136 is picked up from the first character in that string. For example,
137 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
138 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
139 If the first character is alphanumeric, no delimiter will be used.
140 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
141 Unicode characters can be used as the delimiter for distinctiveness if
142 required.
144 If a key in the hierarchy starts with a non-alphanumeric character care
145 should be used to ensure that either the tuple interface is used or
146 a distinct delimiter is always given in string form.
148 Finally, the delimiter can be escaped if it is part of a key and also
149 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
150 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
151 always better to use a different delimiter in these cases.
153 Note that adding a multi-level key implicitly creates any nesting levels
154 that do not exist, but removing multi-level keys does not automatically
155 remove empty nesting levels. As a result:
157 >>> c = Config()
158 >>> c[".a.b"] = 1
159 >>> del c[".a.b"]
160 >>> c["a"]
161 Config({'a': {}})
163 Storage formats supported:
165 - yaml: read and write is supported.
168 Parameters
169 ----------
170 other : `str` or `Config` or `dict`
171 Other source of configuration, can be:
173 - (`str`) Treated as a path to a config file on disk. Must end with
174 ".yaml".
175 - (`Config`) Copies the other Config's values into this one.
176 - (`dict`) Copies the values from the dict into this Config.
178 If `None` is provided an empty `Config` will be created.
179 """
181 _D: ClassVar[str] = "→"
182 """Default internal delimiter to use for components in the hierarchy when
183 constructing keys for external use (see `Config.names()`)."""
185 includeKey: ClassVar[str] = "includeConfigs"
186 """Key used to indicate that another config should be included at this
187 part of the hierarchy."""
189 def __init__(self, other=None):
190 self._data = {}
191 self.configFile = None
193 if other is None:
194 return
196 if isinstance(other, Config):
197 self._data = copy.deepcopy(other._data)
198 self.configFile = other.configFile
199 elif isinstance(other, collections.abc.Mapping):
200 self.update(other)
201 elif isinstance(other, str):
202 # if other is a string, assume it is a file path.
203 self.__initFromFile(other)
204 self._processExplicitIncludes()
205 else:
206 # if the config specified by other could not be recognized raise
207 # a runtime error.
208 raise RuntimeError("A Config could not be loaded from other:%s" % other)
210 def ppprint(self):
211 """helper function for debugging, prints a config out in a readable
212 way in the debugger.
214 use: pdb> print(myConfigObject.ppprint())
216 Returns
217 -------
218 s : `str`
219 A prettyprint formatted string representing the config
220 """
221 return pprint.pformat(self._data, indent=2, width=1)
223 def __repr__(self):
224 return f"{type(self).__name__}({self._data!r})"
226 def __str__(self):
227 return self.ppprint()
229 def __len__(self):
230 return len(self._data)
232 def __iter__(self):
233 return iter(self._data)
235 def copy(self):
236 return type(self)(self)
238 def __initFromFile(self, path):
239 """Load a file from a path or an URI.
241 Parameters
242 ----------
243 path : `str`
244 Path or an URI to a persisted config file.
245 """
246 uri = ButlerURI(path)
247 if uri.path.endswith("yaml"):
248 if uri.scheme == "s3":
249 self.__initFromS3YamlFile(uri.geturl())
250 else:
251 self.__initFromYamlFile(uri.ospath)
252 else:
253 raise RuntimeError("Unhandled config file type:%s" % uri)
254 self.configFile = str(path)
256 def __initFromS3YamlFile(self, url):
257 """Load a file at a given S3 Bucket uri and attempts to load it from
258 yaml.
260 Parameters
261 ----------
262 path : `str`
263 To a persisted config file.
264 """
265 if boto3 is None:
266 raise ModuleNotFoundError("boto3 not found."
267 "Are you sure it is installed?")
269 uri = ButlerURI(url)
270 s3 = getS3Client()
271 try:
272 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
273 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
274 raise FileNotFoundError(f"No such file or directory: {uri}") from err
276 # boto3 response is a `StreamingBody`, but not a valid Python IOStream.
277 # Loader will raise an error that the stream has no name. A hackish
278 # solution is to name it explicitly.
279 response["Body"].name = url
280 self.__initFromYaml(response["Body"])
281 response["Body"].close()
283 def __initFromYamlFile(self, path):
284 """Opens a file at a given path and attempts to load it in from yaml.
286 Parameters
287 ----------
288 path : `str`
289 To a persisted config file in YAML format.
290 """
291 log.debug("Opening YAML config file: %s", path)
292 with open(path, "r") as f:
293 self.__initFromYaml(f)
295 def __initFromYaml(self, stream):
296 """Loads a YAML config from any readable stream that contains one.
298 Parameters
299 ----------
300 stream
301 To a persisted config file in YAML format.
303 Raises
304 ------
305 yaml.YAMLError
306 If there is an error loading the file.
307 """
308 content = yaml.load(stream, Loader=Loader)
309 if content is None:
310 content = {}
311 self._data = content
312 return self
314 def _processExplicitIncludes(self):
315 """Scan through the configuration searching for the special
316 includeConfigs directive and process the includes."""
318 # Search paths for config files
319 searchPaths = [os.path.curdir]
320 if self.configFile is not None:
321 searchPaths.append(os.path.abspath(os.path.dirname(self.configFile)))
323 # Ensure we know what delimiter to use
324 names = self.nameTuples()
325 for path in names:
326 if path[-1] == self.includeKey:
328 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
329 basePath = path[:-1]
331 # Extract the includes and then delete them from the config
332 includes = self[path]
333 del self[path]
335 # Be consistent and convert to a list
336 if not isinstance(includes, list):
337 includes = [includes]
339 # Read each file assuming it is a reference to a file
340 # The file can be relative to config file or cwd
341 # ConfigSubset search paths are not used
342 # At some point these might be URIs which we will have to
343 # assume resolve explicitly
344 subConfigs = []
345 for fileName in includes:
346 # Expand any shell variables
347 fileName = os.path.expandvars(fileName)
348 found = None
349 if os.path.isabs(fileName):
350 found = fileName
351 else:
352 for dir in searchPaths:
353 filePath = os.path.join(dir, fileName)
354 if os.path.exists(filePath):
355 found = os.path.normpath(os.path.abspath(filePath))
356 break
357 if not found:
358 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
360 # Read the referenced Config as a Config
361 subConfigs.append(type(self)(found))
363 # Now we need to merge these sub configs with the current
364 # information that was present in this node in the config
365 # tree with precedence given to the explicit values
366 newConfig = subConfigs.pop(0)
367 for sc in subConfigs:
368 newConfig.update(sc)
370 # Explicit values take precedence
371 if not basePath:
372 # This is an include at the root config
373 newConfig.update(self)
374 # Replace the current config
375 self._data = newConfig._data
376 else:
377 newConfig.update(self[basePath])
378 # And reattach to the base config
379 self[basePath] = newConfig
381 @staticmethod
382 def _splitIntoKeys(key):
383 r"""Split the argument for get/set/in into a hierarchical list.
385 Parameters
386 ----------
387 key : `str` or iterable
388 Argument given to get/set/in. If an iterable is provided it will
389 be converted to a list. If the first character of the string
390 is not an alphanumeric character then it will be used as the
391 delimiter for the purposes of splitting the remainder of the
392 string. If the delimiter is also in one of the keys then it
393 can be escaped using ``\``. There is no default delimiter.
395 Returns
396 -------
397 keys : `list`
398 Hierarchical keys as a `list`.
399 """
400 if isinstance(key, str):
401 if not key[0].isalnum():
402 d = key[0]
403 key = key[1:]
404 else:
405 return [key, ]
406 escaped = f"\\{d}"
407 temp = None
408 if escaped in key:
409 # Complain at the attempt to escape the escape
410 doubled = fr"\{escaped}"
411 if doubled in key:
412 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})"
413 " is not yet supported.")
414 # Replace with a character that won't be in the string
415 temp = "\r"
416 if temp in key or d == temp:
417 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as"
418 " delimiter if escaping the delimiter")
419 key = key.replace(escaped, temp)
420 hierarchy = key.split(d)
421 if temp:
422 hierarchy = [h.replace(temp, d) for h in hierarchy]
423 return hierarchy
424 elif isinstance(key, collections.abc.Iterable):
425 return list(key)
426 else:
427 # Not sure what this is so try it anyway
428 return [key, ]
430 def _getKeyHierarchy(self, name):
431 """Retrieve the key hierarchy for accessing the Config
433 Parameters
434 ----------
435 name : `str` or `tuple`
436 Delimited string or `tuple` of hierarchical keys.
438 Returns
439 -------
440 hierarchy : `list` of `str`
441 Hierarchy to use as a `list`. If the name is available directly
442 as a key in the Config it will be used regardless of the presence
443 of any nominal delimiter.
444 """
445 if name in self._data:
446 keys = [name, ]
447 else:
448 keys = self._splitIntoKeys(name)
449 return keys
451 def _findInHierarchy(self, keys, create=False):
452 """Look for hierarchy of keys in Config
454 Parameters
455 ----------
456 keys : `list` or `tuple`
457 Keys to search in hierarchy.
458 create : `bool`, optional
459 If `True`, if a part of the hierarchy does not exist, insert an
460 empty `dict` into the hierarchy.
462 Returns
463 -------
464 hierarchy : `list`
465 List of the value corresponding to each key in the supplied
466 hierarchy. Only keys that exist in the hierarchy will have
467 a value.
468 complete : `bool`
469 `True` if the full hierarchy exists and the final element
470 in ``hierarchy`` is the value of relevant value.
471 """
472 d = self._data
474 def checkNextItem(k, d, create):
475 """See if k is in d and if it is return the new child"""
476 nextVal = None
477 isThere = False
478 if d is None:
479 # We have gone past the end of the hierarchy
480 pass
481 elif isinstance(d, collections.abc.Sequence):
482 # Check sequence first because for lists
483 # __contains__ checks whether value is found in list
484 # not whether the index exists in list. When we traverse
485 # the hierarchy we are interested in the index.
486 try:
487 nextVal = d[int(k)]
488 isThere = True
489 except IndexError:
490 pass
491 except ValueError:
492 isThere = k in d
493 elif k in d:
494 nextVal = d[k]
495 isThere = True
496 elif create:
497 d[k] = {}
498 nextVal = d[k]
499 isThere = True
500 return nextVal, isThere
502 hierarchy = []
503 complete = True
504 for k in keys:
505 d, isThere = checkNextItem(k, d, create)
506 if isThere:
507 hierarchy.append(d)
508 else:
509 complete = False
510 break
512 return hierarchy, complete
514 def __getitem__(self, name):
515 # Override the split for the simple case where there is an exact
516 # match. This allows `Config.items()` to work via a simple
517 # __iter__ implementation that returns top level keys of
518 # self._data.
519 keys = self._getKeyHierarchy(name)
521 hierarchy, complete = self._findInHierarchy(keys)
522 if not complete:
523 raise KeyError(f"{name} not found")
524 data = hierarchy[-1]
526 if isinstance(data, collections.abc.Mapping):
527 data = Config(data)
528 # Ensure that child configs inherit the parent internal delimiter
529 if self._D != Config._D:
530 data._D = self._D
531 return data
533 def __setitem__(self, name, value):
534 keys = self._getKeyHierarchy(name)
535 last = keys.pop()
536 if isinstance(value, Config):
537 value = copy.deepcopy(value._data)
539 hierarchy, complete = self._findInHierarchy(keys, create=True)
540 if hierarchy:
541 data = hierarchy[-1]
542 else:
543 data = self._data
545 try:
546 data[last] = value
547 except TypeError:
548 data[int(last)] = value
550 def __contains__(self, key):
551 keys = self._getKeyHierarchy(key)
552 hierarchy, complete = self._findInHierarchy(keys)
553 return complete
555 def __delitem__(self, key):
556 keys = self._getKeyHierarchy(key)
557 last = keys.pop()
558 hierarchy, complete = self._findInHierarchy(keys)
559 if complete:
560 if hierarchy:
561 data = hierarchy[-1]
562 else:
563 data = self._data
564 del data[last]
565 else:
566 raise KeyError(f"{key} not found in Config")
568 def update(self, other):
569 """Like dict.update, but will add or modify keys in nested dicts,
570 instead of overwriting the nested dict entirely.
572 For example, for the given code:
573 foo = {"a": {"b": 1}}
574 foo.update({"a": {"c": 2}})
576 Parameters
577 ----------
578 other : `dict` or `Config`
579 Source of configuration:
581 - If foo is a dict, then after the update foo == {"a": {"c": 2}}
582 - But if foo is a Config, then after the update
583 foo == {"a": {"b": 1, "c": 2}}
584 """
585 def doUpdate(d, u):
586 if not isinstance(u, collections.abc.Mapping) or \
587 not isinstance(d, collections.abc.Mapping):
588 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
589 for k, v in u.items():
590 if isinstance(v, collections.abc.Mapping):
591 d[k] = doUpdate(d.get(k, {}), v)
592 else:
593 d[k] = v
594 return d
595 doUpdate(self._data, other)
597 def merge(self, other):
598 """Like Config.update, but will add keys & values from other that
599 DO NOT EXIST in self.
601 Keys and values that already exist in self will NOT be overwritten.
603 Parameters
604 ----------
605 other : `dict` or `Config`
606 Source of configuration:
607 """
608 otherCopy = copy.deepcopy(other)
609 otherCopy.update(self)
610 self._data = otherCopy._data
612 def nameTuples(self, topLevelOnly=False):
613 """Get tuples representing the name hierarchies of all keys.
615 The tuples returned from this method are guaranteed to be usable
616 to access items in the configuration object.
618 Parameters
619 ----------
620 topLevelOnly : `bool`, optional
621 If False, the default, a full hierarchy of names is returned.
622 If True, only the top level are returned.
624 Returns
625 -------
626 names : `list` of `tuple` of `str`
627 List of all names present in the `Config` where each element
628 in the list is a `tuple` of strings representing the hierarchy.
629 """
630 if topLevelOnly:
631 return list((k,) for k in self)
633 def getKeysAsTuples(d, keys, base):
634 if isinstance(d, collections.abc.Sequence):
635 theseKeys = range(len(d))
636 else:
637 theseKeys = d.keys()
638 for key in theseKeys:
639 val = d[key]
640 levelKey = base + (key,) if base is not None else (key,)
641 keys.append(levelKey)
642 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \
643 and not isinstance(val, str):
644 getKeysAsTuples(val, keys, levelKey)
645 keys = []
646 getKeysAsTuples(self._data, keys, None)
647 return keys
649 def names(self, topLevelOnly=False, delimiter=None):
650 """Get a delimited name of all the keys in the hierarchy.
652 The values returned from this method are guaranteed to be usable
653 to access items in the configuration object.
655 Parameters
656 ----------
657 topLevelOnly : `bool`, optional
658 If False, the default, a full hierarchy of names is returned.
659 If True, only the top level are returned.
660 delimiter : `str`, optional
661 Delimiter to use when forming the keys. If the delimiter is
662 present in any of the keys, it will be escaped in the returned
663 names. If `None` given a delimiter will be automatically provided.
664 The delimiter can not be alphanumeric.
666 Returns
667 -------
668 names : `list` of `str`
669 List of all names present in the `Config`.
671 Notes
672 -----
673 This is different than the built-in method `dict.keys`, which will
674 return only the first level keys.
676 Raises
677 ------
678 ValueError:
679 The supplied delimiter is alphanumeric.
680 """
681 if topLevelOnly:
682 return list(self.keys())
684 # Get all the tuples of hierarchical keys
685 nameTuples = self.nameTuples()
687 if delimiter is not None and delimiter.isalnum():
688 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
690 if delimiter is None:
691 # Start with something, and ensure it does not need to be
692 # escaped (it is much easier to understand if not escaped)
693 delimiter = self._D
695 # Form big string for easy check of delimiter clash
696 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
698 # Try a delimiter and keep trying until we get something that
699 # works.
700 ntries = 0
701 while delimiter in combined:
702 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.")
703 ntries += 1
705 if ntries > 100:
706 raise ValueError(f"Unable to determine a delimiter for Config {self}")
708 # try another one
709 while True:
710 delimiter = chr(ord(delimiter)+1)
711 if not delimiter.isalnum():
712 break
714 log.debug(f"Using delimiter {delimiter!r}")
716 # Form the keys, escaping the delimiter if necessary
717 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
718 for k in nameTuples]
719 return strings
721 def asArray(self, name):
722 """Get a value as an array.
724 May contain one or more elements.
726 Parameters
727 ----------
728 name : `str`
729 Key to use to retrieve value.
731 Returns
732 -------
733 array : `collections.abc.Sequence`
734 The value corresponding to name, but guaranteed to be returned
735 as a list with at least one element. If the value is a
736 `~collections.abc.Sequence` (and not a `str`) the value itself
737 will be returned, else the value will be the first element.
738 """
739 val = self.get(name)
740 if isinstance(val, str):
741 val = [val]
742 elif not isinstance(val, collections.abc.Sequence):
743 val = [val]
744 return val
746 def __eq__(self, other):
747 if isinstance(other, Config):
748 other = other._data
749 return self._data == other
751 def __ne__(self, other):
752 if isinstance(other, Config):
753 other = other._data
754 return self._data != other
756 #######
757 # i/o #
759 def dump(self, output):
760 """Writes the config to a yaml stream.
762 Parameters
763 ----------
764 output
765 The YAML stream to use for output.
766 """
767 yaml.safe_dump(self._data, output, default_flow_style=False)
769 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml",
770 overwrite=True):
771 """Writes the config to location pointed to by given URI.
773 Currently supports 's3' and 'file' URI schemes.
775 Parameters
776 ----------
777 uri: `str` or `ButlerURI`
778 URI of location where the Config will be written.
779 updateFile : bool, optional
780 If True and uri does not end on a filename with extension, will
781 append `defaultFileName` to the target uri. True by default.
782 defaultFileName : bool, optional
783 The file name that will be appended to target uri if updateFile is
784 True and uri does not end on a file with an extension.
785 overwrite : bool, optional
786 If True the configuration will be written even if it already
787 exists at that location.
788 """
789 if isinstance(uri, str):
790 uri = ButlerURI(uri)
792 if not uri.scheme or uri.scheme == "file":
793 if os.path.isdir(uri.path) and updateFile:
794 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName))
795 self.dumpToFile(uri.ospath, overwrite=overwrite)
796 elif uri.scheme == "s3":
797 if not uri.dirLike and "." not in uri.basename():
798 uri = ButlerURI(uri.geturl(), forceDirectory=True)
799 uri.updateFile(defaultFileName)
800 self.dumpToS3File(uri, overwrite=overwrite)
801 else:
802 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
804 def dumpToFile(self, path, *, overwrite=True):
805 """Writes the config to a file.
807 Parameters
808 ----------
809 path : `str`
810 Path to the file to use for output.
811 overwrite : `bool`, optional
812 If True any existing file will be over written.
814 Notes
815 -----
816 The name of the config file is stored in the Config object.
818 Raises
819 ------
820 FileExistsError
821 Raised if the file already exists but overwrite is False.
822 """
823 if overwrite:
824 mode = "w"
825 else:
826 mode = "x"
827 with open(path, mode) as f:
828 self.dump(f)
829 self.configFile = path
831 def dumpToS3File(self, uri, *, overwrite=True):
832 """Writes the config to a file in S3 Bucket.
834 Parameters
835 ----------
836 uri : `ButlerURI`
837 S3 URI where the configuration should be stored.
838 overwrite : `bool`, optional
839 If False, a check will be made to see if the key already
840 exists.
842 Raises
843 ------
844 FileExistsError
845 Raised if the configuration already exists at this location
846 and overwrite is set to `False`.
847 """
848 if boto3 is None:
849 raise ModuleNotFoundError("Could not find boto3. "
850 "Are you sure it is installed?")
852 if uri.scheme != "s3":
853 raise ValueError(f"Must provide S3 URI not {uri}")
855 s3 = getS3Client()
857 if not overwrite:
858 from .s3utils import s3CheckFileExists
859 if s3CheckFileExists(uri, client=s3)[0]:
860 raise FileExistsError(f"Config already exists at {uri}")
862 bucket = uri.netloc
863 key = uri.relativeToPathRoot
865 with io.StringIO() as stream:
866 self.dump(stream)
867 stream.seek(0)
868 s3.put_object(Bucket=bucket, Key=key, Body=stream.read())
870 @staticmethod
871 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True):
872 """Generic helper function for updating specific config parameters.
874 Allows for named parameters to be set to new values in bulk, and
875 for other values to be set by copying from a reference config.
877 Assumes that the supplied config is compatible with ``configType``
878 and will attach the updated values to the supplied config by
879 looking for the related component key. It is assumed that
880 ``config`` and ``full`` are from the same part of the
881 configuration hierarchy.
883 Parameters
884 ----------
885 configType : `ConfigSubset`
886 Config type to use to extract relevant items from ``config``.
887 config : `Config`
888 A `Config` to update. Only the subset understood by
889 the supplied `ConfigSubset` will be modified. Default values
890 will not be inserted and the content will not be validated
891 since mandatory keys are allowed to be missing until
892 populated later by merging.
893 full : `Config`
894 A complete config with all defaults expanded that can be
895 converted to a ``configType``. Read-only and will not be
896 modified by this method. Values are read from here if
897 ``toCopy`` is defined.
899 Repository-specific options that should not be obtained
900 from defaults when Butler instances are constructed
901 should be copied from ``full`` to ``config``.
902 toUpdate : `dict`, optional
903 A `dict` defining the keys to update and the new value to use.
904 The keys and values can be any supported by `Config`
905 assignment.
906 toCopy : `tuple`, optional
907 `tuple` of keys whose values should be copied from ``full``
908 into ``config``.
909 overwrite : `bool`, optional
910 If `False`, do not modify a value in ``config`` if the key
911 already exists. Default is always to overwrite.
913 Raises
914 ------
915 ValueError
916 Neither ``toUpdate`` not ``toCopy`` were defined.
917 """
918 if toUpdate is None and toCopy is None:
919 raise ValueError("One of toUpdate or toCopy parameters must be set.")
921 # If this is a parent configuration then we need to ensure that
922 # the supplied config has the relevant component key in it.
923 # If this is a parent configuration we add in the stub entry
924 # so that the ConfigSubset constructor will do the right thing.
925 # We check full for this since that is guaranteed to be complete.
926 if configType.component in full and configType.component not in config:
927 config[configType.component] = {}
929 # Extract the part of the config we wish to update
930 localConfig = configType(config, mergeDefaults=False, validate=False)
932 if toUpdate:
933 for key, value in toUpdate.items():
934 if key in localConfig and not overwrite:
935 log.debug("Not overriding key '%s' with value '%s' in config %s",
936 key, value, localConfig.__class__.__name__)
937 else:
938 localConfig[key] = value
940 if toCopy:
941 localFullConfig = configType(full, mergeDefaults=False)
942 for key in toCopy:
943 if key in localConfig and not overwrite:
944 log.debug("Not overriding key '%s' from defaults in config %s",
945 key, localConfig.__class__.__name__)
946 else:
947 localConfig[key] = localFullConfig[key]
949 # Reattach to parent if this is a child config
950 if configType.component in config:
951 config[configType.component] = localConfig
952 else:
953 config.update(localConfig)
956class ConfigSubset(Config):
957 """Config representing a subset of a more general configuration.
959 Subclasses define their own component and when given a configuration
960 that includes that component, the resulting configuration only includes
961 the subset. For example, your config might contain ``dimensions`` if it's
962 part of a global config and that subset will be stored. If ``dimensions``
963 can not be found it is assumed that the entire contents of the
964 configuration should be used.
966 Default values are read from the environment or supplied search paths
967 using the default configuration file name specified in the subclass.
968 This allows a configuration class to be instantiated without any
969 additional arguments.
971 Additional validation can be specified to check for keys that are mandatory
972 in the configuration.
974 Parameters
975 ----------
976 other : `Config` or `str` or `dict`
977 Argument specifying the configuration information as understood
978 by `Config`
979 validate : `bool`, optional
980 If `True` required keys will be checked to ensure configuration
981 consistency.
982 mergeDefaults : `bool`, optional
983 If `True` defaults will be read and the supplied config will
984 be combined with the defaults, with the supplied valiues taking
985 precedence.
986 searchPaths : `list` or `tuple`, optional
987 Explicit additional paths to search for defaults. They should
988 be supplied in priority order. These paths have higher priority
989 than those read from the environment in
990 `ConfigSubset.defaultSearchPaths()`.
991 """
993 component: ClassVar[Optional[str]] = None
994 """Component to use from supplied config. Can be None. If specified the
995 key is not required. Can be a full dot-separated path to a component.
996 """
998 requiredKeys: ClassVar[Sequence[str]] = ()
999 """Keys that are required to be specified in the configuration.
1000 """
1002 defaultConfigFile: ClassVar[Optional[str]] = None
1003 """Name of the file containing defaults for this config class.
1004 """
1006 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1008 # Create a blank object to receive the defaults
1009 # Once we have the defaults we then update with the external values
1010 super().__init__()
1012 # Create a standard Config rather than subset
1013 externalConfig = Config(other)
1015 # Select the part we need from it
1016 # To simplify the use of !include we also check for the existence of
1017 # component.component (since the included files can themselves
1018 # include the component name)
1019 if self.component is not None:
1020 doubled = (self.component, self.component)
1021 # Must check for double depth first
1022 if doubled in externalConfig:
1023 externalConfig = externalConfig[doubled]
1024 elif self.component in externalConfig:
1025 externalConfig._data = externalConfig._data[self.component]
1027 # Default files read to create this configuration
1028 self.filesRead = []
1030 # Assume we are not looking up child configurations
1031 containerKey = None
1033 # Sometimes we do not want to merge with defaults.
1034 if mergeDefaults:
1036 # Supplied search paths have highest priority
1037 fullSearchPath = []
1038 if searchPaths:
1039 fullSearchPath.extend(searchPaths)
1041 # Read default paths from enviroment
1042 fullSearchPath.extend(self.defaultSearchPaths())
1044 # There are two places to find defaults for this particular config
1045 # - The "defaultConfigFile" defined in the subclass
1046 # - The class specified in the "cls" element in the config.
1047 # Read cls after merging in case it changes.
1048 if self.defaultConfigFile is not None:
1049 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1051 # Can have a class specification in the external config (priority)
1052 # or from the defaults.
1053 pytype = None
1054 if "cls" in externalConfig:
1055 pytype = externalConfig["cls"]
1056 elif "cls" in self:
1057 pytype = self["cls"]
1059 if pytype is not None:
1060 try:
1061 cls = doImport(pytype)
1062 except ImportError as e:
1063 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1064 defaultsFile = cls.defaultConfigFile
1065 if defaultsFile is not None:
1066 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1068 # Get the container key in case we need it
1069 try:
1070 containerKey = cls.containerKey
1071 except AttributeError:
1072 pass
1074 # Now update this object with the external values so that the external
1075 # values always override the defaults
1076 self.update(externalConfig)
1078 # If this configuration has child configurations of the same
1079 # config class, we need to expand those defaults as well.
1081 if mergeDefaults and containerKey is not None and containerKey in self:
1082 for idx, subConfig in enumerate(self[containerKey]):
1083 self[containerKey, idx] = type(self)(other=subConfig, validate=validate,
1084 mergeDefaults=mergeDefaults,
1085 searchPaths=searchPaths)
1087 if validate:
1088 self.validate()
1090 @classmethod
1091 def defaultSearchPaths(cls):
1092 """Read the environment to determine search paths to use for global
1093 defaults.
1095 Global defaults, at lowest priority, are found in the ``config``
1096 directory of the butler source tree. Additional defaults can be
1097 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1098 which is a PATH-like variable where paths at the front of the list
1099 have priority over those later.
1101 Returns
1102 -------
1103 paths : `list`
1104 Returns a list of paths to search. The returned order is in
1105 priority with the highest priority paths first. The butler config
1106 directory will always be at the end of the list.
1107 """
1108 # We can pick up defaults from multiple search paths
1109 # We fill defaults by using the butler config path and then
1110 # the config path environment variable in reverse order.
1111 defaultsPaths = []
1113 if CONFIG_PATH in os.environ:
1114 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1115 defaultsPaths.extend(externalPaths)
1117 # Find the butler configs
1118 defaultsPaths.append(os.path.join(lsst.utils.getPackageDir("daf_butler"), "config"))
1120 return defaultsPaths
1122 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1123 """Search the supplied paths, merging the configuration values
1125 The values read will override values currently stored in the object.
1126 Every file found in the path will be read, such that the earlier
1127 path entries have higher priority.
1129 Parameters
1130 ----------
1131 searchPaths : `list`
1132 Paths to search for the supplied configFile. This path
1133 is the priority order, such that files read from the
1134 first path entry will be selected over those read from
1135 a later path.
1136 configFile : `str`
1137 File to locate in path. If absolute path it will be read
1138 directly and the search path will not be used.
1139 """
1140 if os.path.isabs(configFile):
1141 if os.path.exists(configFile):
1142 self.filesRead.append(configFile)
1143 self._updateWithOtherConfigFile(configFile)
1144 else:
1145 # Reverse order so that high priority entries
1146 # update the object last.
1147 for pathDir in reversed(searchPaths):
1148 file = os.path.join(pathDir, configFile)
1149 if os.path.exists(file):
1150 self.filesRead.append(file)
1151 self._updateWithOtherConfigFile(file)
1153 def _updateWithOtherConfigFile(self, file):
1154 """Read in some defaults and update.
1156 Update the configuration by reading the supplied file as a config
1157 of this class, and merging such that these values override the
1158 current values. Contents of the external config are not validated.
1160 Parameters
1161 ----------
1162 file : `Config`, `str`, or `dict`
1163 Entity that can be converted to a `ConfigSubset`.
1164 """
1165 # Use this class to read the defaults so that subsetting can happen
1166 # correctly.
1167 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1168 self.update(externalConfig)
1170 def validate(self):
1171 """Check that mandatory keys are present in this configuration.
1173 Ignored if ``requiredKeys`` is empty."""
1174 # Validation
1175 missing = [k for k in self.requiredKeys if k not in self._data]
1176 if missing:
1177 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")