Coverage for python/lsst/daf/butler/core/config.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Configuration control."""
24__all__ = ("Config", "ConfigSubset")
26import collections
27import copy
28import logging
29import pprint
30import os
31import yaml
32import sys
33from yaml.representer import Representer
34import io
35from typing import Sequence, Optional, ClassVar
37try:
38 import boto3
39except ImportError:
40 boto3 = None
42import lsst.utils
43from lsst.utils import doImport
44from .location import ButlerURI
46yaml.add_representer(collections.defaultdict, Representer.represent_dict)
49# Config module logger
50log = logging.getLogger(__name__)
52# PATH-like environment variable to use for defaults.
53CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
56class Loader(yaml.CSafeLoader):
57 """YAML Loader that supports file include directives
59 Uses ``!include`` directive in a YAML file to point to another
60 YAML file to be included. The path in the include directive is relative
61 to the file containing that directive.
63 storageClasses: !include storageClasses.yaml
65 Examples
66 --------
67 >>> with open("document.yaml", "r") as f:
68 data = yaml.load(f, Loader=Loader)
70 Notes
71 -----
72 See https://davidchall.github.io/yaml-includes.html
73 """
75 def __init__(self, stream):
76 super().__init__(stream)
77 self._root = ButlerURI(stream.name)
78 Loader.add_constructor("!include", Loader.include)
80 def include(self, node):
81 if isinstance(node, yaml.ScalarNode):
82 return self.extractFile(self.construct_scalar(node))
84 elif isinstance(node, yaml.SequenceNode):
85 result = []
86 for filename in self.construct_sequence(node):
87 result.append(self.extractFile(filename))
88 return result
90 elif isinstance(node, yaml.MappingNode):
91 result = {}
92 for k, v in self.construct_mapping(node).items():
93 result[k] = self.extractFile(v)
94 return result
96 else:
97 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
98 raise yaml.constructor.ConstructorError
100 def extractFile(self, filename):
101 fileuri = copy.copy(self._root)
102 fileuri.updateFile(filename)
103 log.debug("Opening YAML file via !include: %s", fileuri)
105 if not fileuri.scheme or fileuri.scheme == "file":
106 with open(fileuri.ospath, "r") as f:
107 return yaml.load(f, Loader)
108 elif fileuri.scheme == "s3":
109 if boto3 is None:
110 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
111 s3 = boto3.client("s3")
112 try:
113 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot)
114 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
115 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err
117 # boto3 response is a `StreamingBody`, but not a valid Python
118 # IOStream. Loader will raise an error that the stream has no name.
119 # The name is used to resolve the "!include" filename location to
120 # download. A hackish solution is to name it explicitly.
121 response["Body"].name = fileuri.geturl()
122 return yaml.load(response["Body"], Loader)
125class Config(collections.abc.MutableMapping):
126 r"""Implements a datatype that is used by `Butler` for configuration
127 parameters.
129 It is essentially a `dict` with key/value pairs, including nested dicts
130 (as values). In fact, it can be initialized with a `dict`.
131 This is explained next:
133 Config extends the `dict` api so that hierarchical values may be accessed
134 with delimited notation or as a tuple. If a string is given the delimiter
135 is picked up from the first character in that string. For example,
136 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
137 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
138 If the first character is alphanumeric, no delimiter will be used.
139 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
140 Unicode characters can be used as the delimiter for distinctiveness if
141 required.
143 If a key in the hierarchy starts with a non-alphanumeric character care
144 should be used to ensure that either the tuple interface is used or
145 a distinct delimiter is always given in string form.
147 Finally, the delimiter can be escaped if it is part of a key and also
148 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
149 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
150 always better to use a different delimiter in these cases.
152 Note that adding a multi-level key implicitly creates any nesting levels
153 that do not exist, but removing multi-level keys does not automatically
154 remove empty nesting levels. As a result:
156 >>> c = Config()
157 >>> c[".a.b"] = 1
158 >>> del c[".a.b"]
159 >>> c["a"]
160 Config({'a': {}})
162 Storage formats supported:
164 - yaml: read and write is supported.
167 Parameters
168 ----------
169 other : `str` or `Config` or `dict`
170 Other source of configuration, can be:
172 - (`str`) Treated as a path to a config file on disk. Must end with
173 ".yaml".
174 - (`Config`) Copies the other Config's values into this one.
175 - (`dict`) Copies the values from the dict into this Config.
177 If `None` is provided an empty `Config` will be created.
178 """
180 _D: ClassVar[str] = "→"
181 """Default internal delimiter to use for components in the hierarchy when
182 constructing keys for external use (see `Config.names()`)."""
184 includeKey: ClassVar[str] = "includeConfigs"
185 """Key used to indicate that another config should be included at this
186 part of the hierarchy."""
188 def __init__(self, other=None):
189 self._data = {}
190 self.configFile = None
192 if other is None:
193 return
195 if isinstance(other, Config):
196 self._data = copy.deepcopy(other._data)
197 self.configFile = other.configFile
198 elif isinstance(other, collections.abc.Mapping):
199 self.update(other)
200 elif isinstance(other, str):
201 # if other is a string, assume it is a file path.
202 self.__initFromFile(other)
203 self._processExplicitIncludes()
204 else:
205 # if the config specified by other could not be recognized raise
206 # a runtime error.
207 raise RuntimeError("A Config could not be loaded from other:%s" % other)
209 def ppprint(self):
210 """helper function for debugging, prints a config out in a readable
211 way in the debugger.
213 use: pdb> print(myConfigObject.ppprint())
215 Returns
216 -------
217 s : `str`
218 A prettyprint formatted string representing the config
219 """
220 return pprint.pformat(self._data, indent=2, width=1)
222 def __repr__(self):
223 return f"{type(self).__name__}({self._data!r})"
225 def __str__(self):
226 return self.ppprint()
228 def __len__(self):
229 return len(self._data)
231 def __iter__(self):
232 return iter(self._data)
234 def copy(self):
235 return type(self)(self)
237 def __initFromFile(self, path):
238 """Load a file from a path or an URI.
240 Parameters
241 ----------
242 path : `str`
243 Path or an URI to a persisted config file.
244 """
245 uri = ButlerURI(path)
246 if uri.path.endswith("yaml"):
247 if uri.scheme == "s3":
248 self.__initFromS3YamlFile(uri.geturl())
249 else:
250 self.__initFromYamlFile(uri.ospath)
251 else:
252 raise RuntimeError("Unhandled config file type:%s" % uri)
253 self.configFile = str(path)
255 def __initFromS3YamlFile(self, url):
256 """Load a file at a given S3 Bucket uri and attempts to load it from
257 yaml.
259 Parameters
260 ----------
261 path : `str`
262 To a persisted config file.
263 """
264 if boto3 is None:
265 raise ModuleNotFoundError("boto3 not found."
266 "Are you sure it is installed?")
268 uri = ButlerURI(url)
269 s3 = boto3.client("s3")
270 try:
271 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
272 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
273 raise FileNotFoundError(f"No such file or directory: {uri}") from err
275 # boto3 response is a `StreamingBody`, but not a valid Python IOStream.
276 # Loader will raise an error that the stream has no name. A hackish
277 # solution is to name it explicitly.
278 response["Body"].name = url
279 self.__initFromYaml(response["Body"])
280 response["Body"].close()
282 def __initFromYamlFile(self, path):
283 """Opens a file at a given path and attempts to load it in from yaml.
285 Parameters
286 ----------
287 path : `str`
288 To a persisted config file in YAML format.
289 """
290 log.debug("Opening YAML config file: %s", path)
291 with open(path, "r") as f:
292 self.__initFromYaml(f)
294 def __initFromYaml(self, stream):
295 """Loads a YAML config from any readable stream that contains one.
297 Parameters
298 ----------
299 stream
300 To a persisted config file in YAML format.
302 Raises
303 ------
304 yaml.YAMLError
305 If there is an error loading the file.
306 """
307 content = yaml.load(stream, Loader=Loader)
308 if content is None:
309 content = {}
310 self._data = content
311 return self
313 def _processExplicitIncludes(self):
314 """Scan through the configuration searching for the special
315 includeConfigs directive and process the includes."""
317 # Search paths for config files
318 searchPaths = [os.path.curdir]
319 if self.configFile is not None:
320 searchPaths.append(os.path.abspath(os.path.dirname(self.configFile)))
322 # Ensure we know what delimiter to use
323 names = self.nameTuples()
324 for path in names:
325 if path[-1] == self.includeKey:
327 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
328 basePath = path[:-1]
330 # Extract the includes and then delete them from the config
331 includes = self[path]
332 del self[path]
334 # Be consistent and convert to a list
335 if not isinstance(includes, list):
336 includes = [includes]
338 # Read each file assuming it is a reference to a file
339 # The file can be relative to config file or cwd
340 # ConfigSubset search paths are not used
341 # At some point these might be URIs which we will have to
342 # assume resolve explicitly
343 subConfigs = []
344 for fileName in includes:
345 # Expand any shell variables
346 fileName = os.path.expandvars(fileName)
347 found = None
348 if os.path.isabs(fileName):
349 found = fileName
350 else:
351 for dir in searchPaths:
352 filePath = os.path.join(dir, fileName)
353 if os.path.exists(filePath):
354 found = os.path.normpath(os.path.abspath(filePath))
355 break
356 if not found:
357 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
359 # Read the referenced Config as a Config
360 subConfigs.append(type(self)(found))
362 # Now we need to merge these sub configs with the current
363 # information that was present in this node in the config
364 # tree with precedence given to the explicit values
365 newConfig = subConfigs.pop(0)
366 for sc in subConfigs:
367 newConfig.update(sc)
369 # Explicit values take precedence
370 if not basePath:
371 # This is an include at the root config
372 newConfig.update(self)
373 # Replace the current config
374 self._data = newConfig._data
375 else:
376 newConfig.update(self[basePath])
377 # And reattach to the base config
378 self[basePath] = newConfig
380 @staticmethod
381 def _splitIntoKeys(key):
382 r"""Split the argument for get/set/in into a hierarchical list.
384 Parameters
385 ----------
386 key : `str` or iterable
387 Argument given to get/set/in. If an iterable is provided it will
388 be converted to a list. If the first character of the string
389 is not an alphanumeric character then it will be used as the
390 delimiter for the purposes of splitting the remainder of the
391 string. If the delimiter is also in one of the keys then it
392 can be escaped using ``\``. There is no default delimiter.
394 Returns
395 -------
396 keys : `list`
397 Hierarchical keys as a `list`.
398 """
399 if isinstance(key, str):
400 if not key[0].isalnum():
401 d = key[0]
402 key = key[1:]
403 else:
404 return [key, ]
405 escaped = f"\\{d}"
406 temp = None
407 if escaped in key:
408 # Complain at the attempt to escape the escape
409 doubled = fr"\{escaped}"
410 if doubled in key:
411 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})"
412 " is not yet supported.")
413 # Replace with a character that won't be in the string
414 temp = "\r"
415 if temp in key or d == temp:
416 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as"
417 " delimiter if escaping the delimiter")
418 key = key.replace(escaped, temp)
419 hierarchy = key.split(d)
420 if temp:
421 hierarchy = [h.replace(temp, d) for h in hierarchy]
422 return hierarchy
423 elif isinstance(key, collections.abc.Iterable):
424 return list(key)
425 else:
426 # Not sure what this is so try it anyway
427 return [key, ]
429 def _getKeyHierarchy(self, name):
430 """Retrieve the key hierarchy for accessing the Config
432 Parameters
433 ----------
434 name : `str` or `tuple`
435 Delimited string or `tuple` of hierarchical keys.
437 Returns
438 -------
439 hierarchy : `list` of `str`
440 Hierarchy to use as a `list`. If the name is available directly
441 as a key in the Config it will be used regardless of the presence
442 of any nominal delimiter.
443 """
444 if name in self._data:
445 keys = [name, ]
446 else:
447 keys = self._splitIntoKeys(name)
448 return keys
450 def _findInHierarchy(self, keys, create=False):
451 """Look for hierarchy of keys in Config
453 Parameters
454 ----------
455 keys : `list` or `tuple`
456 Keys to search in hierarchy.
457 create : `bool`, optional
458 If `True`, if a part of the hierarchy does not exist, insert an
459 empty `dict` into the hierarchy.
461 Returns
462 -------
463 hierarchy : `list`
464 List of the value corresponding to each key in the supplied
465 hierarchy. Only keys that exist in the hierarchy will have
466 a value.
467 complete : `bool`
468 `True` if the full hierarchy exists and the final element
469 in ``hierarchy`` is the value of relevant value.
470 """
471 d = self._data
473 def checkNextItem(k, d, create):
474 """See if k is in d and if it is return the new child"""
475 nextVal = None
476 isThere = False
477 if d is None:
478 # We have gone past the end of the hierarchy
479 pass
480 elif isinstance(d, collections.abc.Sequence):
481 # Check sequence first because for lists
482 # __contains__ checks whether value is found in list
483 # not whether the index exists in list. When we traverse
484 # the hierarchy we are interested in the index.
485 try:
486 nextVal = d[int(k)]
487 isThere = True
488 except IndexError:
489 pass
490 except ValueError:
491 isThere = k in d
492 elif k in d:
493 nextVal = d[k]
494 isThere = True
495 elif create:
496 d[k] = {}
497 nextVal = d[k]
498 isThere = True
499 return nextVal, isThere
501 hierarchy = []
502 complete = True
503 for k in keys:
504 d, isThere = checkNextItem(k, d, create)
505 if isThere:
506 hierarchy.append(d)
507 else:
508 complete = False
509 break
511 return hierarchy, complete
513 def __getitem__(self, name):
514 # Override the split for the simple case where there is an exact
515 # match. This allows `Config.items()` to work via a simple
516 # __iter__ implementation that returns top level keys of
517 # self._data.
518 keys = self._getKeyHierarchy(name)
520 hierarchy, complete = self._findInHierarchy(keys)
521 if not complete:
522 raise KeyError(f"{name} not found")
523 data = hierarchy[-1]
525 if isinstance(data, collections.abc.Mapping):
526 data = Config(data)
527 # Ensure that child configs inherit the parent internal delimiter
528 if self._D != Config._D:
529 data._D = self._D
530 return data
532 def __setitem__(self, name, value):
533 keys = self._getKeyHierarchy(name)
534 last = keys.pop()
535 if isinstance(value, Config):
536 value = copy.deepcopy(value._data)
538 hierarchy, complete = self._findInHierarchy(keys, create=True)
539 if hierarchy:
540 data = hierarchy[-1]
541 else:
542 data = self._data
544 try:
545 data[last] = value
546 except TypeError:
547 data[int(last)] = value
549 def __contains__(self, key):
550 keys = self._getKeyHierarchy(key)
551 hierarchy, complete = self._findInHierarchy(keys)
552 return complete
554 def __delitem__(self, key):
555 keys = self._getKeyHierarchy(key)
556 last = keys.pop()
557 hierarchy, complete = self._findInHierarchy(keys)
558 if complete:
559 if hierarchy:
560 data = hierarchy[-1]
561 else:
562 data = self._data
563 del data[last]
564 else:
565 raise KeyError(f"{key} not found in Config")
567 def update(self, other):
568 """Like dict.update, but will add or modify keys in nested dicts,
569 instead of overwriting the nested dict entirely.
571 For example, for the given code:
572 foo = {"a": {"b": 1}}
573 foo.update({"a": {"c": 2}})
575 Parameters
576 ----------
577 other : `dict` or `Config`
578 Source of configuration:
580 - If foo is a dict, then after the update foo == {"a": {"c": 2}}
581 - But if foo is a Config, then after the update
582 foo == {"a": {"b": 1, "c": 2}}
583 """
584 def doUpdate(d, u):
585 if not isinstance(u, collections.abc.Mapping) or \
586 not isinstance(d, collections.abc.Mapping):
587 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
588 for k, v in u.items():
589 if isinstance(v, collections.abc.Mapping):
590 d[k] = doUpdate(d.get(k, {}), v)
591 else:
592 d[k] = v
593 return d
594 doUpdate(self._data, other)
596 def merge(self, other):
597 """Like Config.update, but will add keys & values from other that
598 DO NOT EXIST in self.
600 Keys and values that already exist in self will NOT be overwritten.
602 Parameters
603 ----------
604 other : `dict` or `Config`
605 Source of configuration:
606 """
607 otherCopy = copy.deepcopy(other)
608 otherCopy.update(self)
609 self._data = otherCopy._data
611 def nameTuples(self, topLevelOnly=False):
612 """Get tuples representing the name hierarchies of all keys.
614 The tuples returned from this method are guaranteed to be usable
615 to access items in the configuration object.
617 Parameters
618 ----------
619 topLevelOnly : `bool`, optional
620 If False, the default, a full hierarchy of names is returned.
621 If True, only the top level are returned.
623 Returns
624 -------
625 names : `list` of `tuple` of `str`
626 List of all names present in the `Config` where each element
627 in the list is a `tuple` of strings representing the hierarchy.
628 """
629 if topLevelOnly:
630 return list((k,) for k in self)
632 def getKeysAsTuples(d, keys, base):
633 if isinstance(d, collections.abc.Sequence):
634 theseKeys = range(len(d))
635 else:
636 theseKeys = d.keys()
637 for key in theseKeys:
638 val = d[key]
639 levelKey = base + (key,) if base is not None else (key,)
640 keys.append(levelKey)
641 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \
642 and not isinstance(val, str):
643 getKeysAsTuples(val, keys, levelKey)
644 keys = []
645 getKeysAsTuples(self._data, keys, None)
646 return keys
648 def names(self, topLevelOnly=False, delimiter=None):
649 """Get a delimited name of all the keys in the hierarchy.
651 The values returned from this method are guaranteed to be usable
652 to access items in the configuration object.
654 Parameters
655 ----------
656 topLevelOnly : `bool`, optional
657 If False, the default, a full hierarchy of names is returned.
658 If True, only the top level are returned.
659 delimiter : `str`, optional
660 Delimiter to use when forming the keys. If the delimiter is
661 present in any of the keys, it will be escaped in the returned
662 names. If `None` given a delimiter will be automatically provided.
663 The delimiter can not be alphanumeric.
665 Returns
666 -------
667 names : `list` of `str`
668 List of all names present in the `Config`.
670 Notes
671 -----
672 This is different than the built-in method `dict.keys`, which will
673 return only the first level keys.
675 Raises
676 ------
677 ValueError:
678 The supplied delimiter is alphanumeric.
679 """
680 if topLevelOnly:
681 return list(self.keys())
683 # Get all the tuples of hierarchical keys
684 nameTuples = self.nameTuples()
686 if delimiter is not None and delimiter.isalnum():
687 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
689 if delimiter is None:
690 # Start with something, and ensure it does not need to be
691 # escaped (it is much easier to understand if not escaped)
692 delimiter = self._D
694 # Form big string for easy check of delimiter clash
695 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
697 # Try a delimiter and keep trying until we get something that
698 # works.
699 ntries = 0
700 while delimiter in combined:
701 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.")
702 ntries += 1
704 if ntries > 100:
705 raise ValueError(f"Unable to determine a delimiter for Config {self}")
707 # try another one
708 while True:
709 delimiter = chr(ord(delimiter)+1)
710 if not delimiter.isalnum():
711 break
713 log.debug(f"Using delimiter {delimiter!r}")
715 # Form the keys, escaping the delimiter if necessary
716 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
717 for k in nameTuples]
718 return strings
720 def asArray(self, name):
721 """Get a value as an array.
723 May contain one or more elements.
725 Parameters
726 ----------
727 name : `str`
728 Key to use to retrieve value.
730 Returns
731 -------
732 array : `collections.abc.Sequence`
733 The value corresponding to name, but guaranteed to be returned
734 as a list with at least one element. If the value is a
735 `~collections.abc.Sequence` (and not a `str`) the value itself
736 will be returned, else the value will be the first element.
737 """
738 val = self.get(name)
739 if isinstance(val, str):
740 val = [val]
741 elif not isinstance(val, collections.abc.Sequence):
742 val = [val]
743 return val
745 def __eq__(self, other):
746 if isinstance(other, Config):
747 other = other._data
748 return self._data == other
750 def __ne__(self, other):
751 if isinstance(other, Config):
752 other = other._data
753 return self._data != other
755 #######
756 # i/o #
758 def dump(self, output):
759 """Writes the config to a yaml stream.
761 Parameters
762 ----------
763 output
764 The YAML stream to use for output.
765 """
766 yaml.safe_dump(self._data, output, default_flow_style=False)
768 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml",
769 overwrite=True):
770 """Writes the config to location pointed to by given URI.
772 Currently supports 's3' and 'file' URI schemes.
774 Parameters
775 ----------
776 uri: `str` or `ButlerURI`
777 URI of location where the Config will be written.
778 updateFile : bool, optional
779 If True and uri does not end on a filename with extension, will
780 append `defaultFileName` to the target uri. True by default.
781 defaultFileName : bool, optional
782 The file name that will be appended to target uri if updateFile is
783 True and uri does not end on a file with an extension.
784 overwrite : bool, optional
785 If True the configuration will be written even if it already
786 exists at that location.
787 """
788 if isinstance(uri, str):
789 uri = ButlerURI(uri)
791 if not uri.scheme or uri.scheme == "file":
792 if os.path.isdir(uri.path) and updateFile:
793 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName))
794 self.dumpToFile(uri.ospath, overwrite=overwrite)
795 elif uri.scheme == "s3":
796 if not uri.dirLike and "." not in uri.basename():
797 uri = ButlerURI(uri.geturl(), forceDirectory=True)
798 uri.updateFile(defaultFileName)
799 self.dumpToS3File(uri, overwrite=overwrite)
800 else:
801 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
803 def dumpToFile(self, path, *, overwrite=True):
804 """Writes the config to a file.
806 Parameters
807 ----------
808 path : `str`
809 Path to the file to use for output.
810 overwrite : `bool`, optional
811 If True any existing file will be over written.
813 Notes
814 -----
815 The name of the config file is stored in the Config object.
817 Raises
818 ------
819 FileExistsError
820 Raised if the file already exists but overwrite is False.
821 """
822 if overwrite:
823 mode = "w"
824 else:
825 mode = "x"
826 with open(path, mode) as f:
827 self.dump(f)
828 self.configFile = path
830 def dumpToS3File(self, uri, *, overwrite=True):
831 """Writes the config to a file in S3 Bucket.
833 Parameters
834 ----------
835 uri : `ButlerURI`
836 S3 URI where the configuration should be stored.
837 overwrite : `bool`, optional
838 If False, a check will be made to see if the key already
839 exists.
841 Raises
842 ------
843 FileExistsError
844 Raised if the configuration already exists at this location
845 and overwrite is set to `False`.
846 """
847 if boto3 is None:
848 raise ModuleNotFoundError("Could not find boto3. "
849 "Are you sure it is installed?")
851 if uri.scheme != "s3":
852 raise ValueError(f"Must provide S3 URI not {uri}")
854 s3 = boto3.client("s3")
856 if not overwrite:
857 from .s3utils import s3CheckFileExists
858 if s3CheckFileExists(uri, client=s3)[0]:
859 raise FileExistsError(f"Config already exists at {uri}")
861 bucket = uri.netloc
862 key = uri.relativeToPathRoot
864 with io.StringIO() as stream:
865 self.dump(stream)
866 stream.seek(0)
867 s3.put_object(Bucket=bucket, Key=key, Body=stream.read())
869 @staticmethod
870 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True):
871 """Generic helper function for updating specific config parameters.
873 Allows for named parameters to be set to new values in bulk, and
874 for other values to be set by copying from a reference config.
876 Assumes that the supplied config is compatible with ``configType``
877 and will attach the updated values to the supplied config by
878 looking for the related component key. It is assumed that
879 ``config`` and ``full`` are from the same part of the
880 configuration hierarchy.
882 Parameters
883 ----------
884 configType : `ConfigSubset`
885 Config type to use to extract relevant items from ``config``.
886 config : `Config`
887 A `Config` to update. Only the subset understood by
888 the supplied `ConfigSubset` will be modified. Default values
889 will not be inserted and the content will not be validated
890 since mandatory keys are allowed to be missing until
891 populated later by merging.
892 full : `Config`
893 A complete config with all defaults expanded that can be
894 converted to a ``configType``. Read-only and will not be
895 modified by this method. Values are read from here if
896 ``toCopy`` is defined.
898 Repository-specific options that should not be obtained
899 from defaults when Butler instances are constructed
900 should be copied from ``full`` to ``config``.
901 toUpdate : `dict`, optional
902 A `dict` defining the keys to update and the new value to use.
903 The keys and values can be any supported by `Config`
904 assignment.
905 toCopy : `tuple`, optional
906 `tuple` of keys whose values should be copied from ``full``
907 into ``config``.
908 overwrite : `bool`, optional
909 If `False`, do not modify a value in ``config`` if the key
910 already exists. Default is always to overwrite.
912 Raises
913 ------
914 ValueError
915 Neither ``toUpdate`` not ``toCopy`` were defined.
916 """
917 if toUpdate is None and toCopy is None:
918 raise ValueError("One of toUpdate or toCopy parameters must be set.")
920 # If this is a parent configuration then we need to ensure that
921 # the supplied config has the relevant component key in it.
922 # If this is a parent configuration we add in the stub entry
923 # so that the ConfigSubset constructor will do the right thing.
924 # We check full for this since that is guaranteed to be complete.
925 if configType.component in full and configType.component not in config:
926 config[configType.component] = {}
928 # Extract the part of the config we wish to update
929 localConfig = configType(config, mergeDefaults=False, validate=False)
931 if toUpdate:
932 for key, value in toUpdate.items():
933 if key in localConfig and not overwrite:
934 log.debug("Not overriding key '%s' with value '%s' in config %s",
935 key, value, localConfig.__class__.__name__)
936 else:
937 localConfig[key] = value
939 if toCopy:
940 localFullConfig = configType(full, mergeDefaults=False)
941 for key in toCopy:
942 if key in localConfig and not overwrite:
943 log.debug("Not overriding key '%s' from defaults in config %s",
944 key, localConfig.__class__.__name__)
945 else:
946 localConfig[key] = localFullConfig[key]
948 # Reattach to parent if this is a child config
949 if configType.component in config:
950 config[configType.component] = localConfig
951 else:
952 config.update(localConfig)
955class ConfigSubset(Config):
956 """Config representing a subset of a more general configuration.
958 Subclasses define their own component and when given a configuration
959 that includes that component, the resulting configuration only includes
960 the subset. For example, your config might contain ``dimensions`` if it's
961 part of a global config and that subset will be stored. If ``dimensions``
962 can not be found it is assumed that the entire contents of the
963 configuration should be used.
965 Default values are read from the environment or supplied search paths
966 using the default configuration file name specified in the subclass.
967 This allows a configuration class to be instantiated without any
968 additional arguments.
970 Additional validation can be specified to check for keys that are mandatory
971 in the configuration.
973 Parameters
974 ----------
975 other : `Config` or `str` or `dict`
976 Argument specifying the configuration information as understood
977 by `Config`
978 validate : `bool`, optional
979 If `True` required keys will be checked to ensure configuration
980 consistency.
981 mergeDefaults : `bool`, optional
982 If `True` defaults will be read and the supplied config will
983 be combined with the defaults, with the supplied valiues taking
984 precedence.
985 searchPaths : `list` or `tuple`, optional
986 Explicit additional paths to search for defaults. They should
987 be supplied in priority order. These paths have higher priority
988 than those read from the environment in
989 `ConfigSubset.defaultSearchPaths()`.
990 """
992 component: ClassVar[Optional[str]] = None
993 """Component to use from supplied config. Can be None. If specified the
994 key is not required. Can be a full dot-separated path to a component.
995 """
997 requiredKeys: ClassVar[Sequence[str]] = ()
998 """Keys that are required to be specified in the configuration.
999 """
1001 defaultConfigFile: ClassVar[Optional[str]] = None
1002 """Name of the file containing defaults for this config class.
1003 """
1005 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1007 # Create a blank object to receive the defaults
1008 # Once we have the defaults we then update with the external values
1009 super().__init__()
1011 # Create a standard Config rather than subset
1012 externalConfig = Config(other)
1014 # Select the part we need from it
1015 # To simplify the use of !include we also check for the existence of
1016 # component.component (since the included files can themselves
1017 # include the component name)
1018 if self.component is not None:
1019 doubled = (self.component, self.component)
1020 # Must check for double depth first
1021 if doubled in externalConfig:
1022 externalConfig = externalConfig[doubled]
1023 elif self.component in externalConfig:
1024 externalConfig._data = externalConfig._data[self.component]
1026 # Default files read to create this configuration
1027 self.filesRead = []
1029 # Assume we are not looking up child configurations
1030 containerKey = None
1032 # Sometimes we do not want to merge with defaults.
1033 if mergeDefaults:
1035 # Supplied search paths have highest priority
1036 fullSearchPath = []
1037 if searchPaths:
1038 fullSearchPath.extend(searchPaths)
1040 # Read default paths from enviroment
1041 fullSearchPath.extend(self.defaultSearchPaths())
1043 # There are two places to find defaults for this particular config
1044 # - The "defaultConfigFile" defined in the subclass
1045 # - The class specified in the "cls" element in the config.
1046 # Read cls after merging in case it changes.
1047 if self.defaultConfigFile is not None:
1048 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1050 # Can have a class specification in the external config (priority)
1051 # or from the defaults.
1052 pytype = None
1053 if "cls" in externalConfig:
1054 pytype = externalConfig["cls"]
1055 elif "cls" in self:
1056 pytype = self["cls"]
1058 if pytype is not None:
1059 try:
1060 cls = doImport(pytype)
1061 except ImportError as e:
1062 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1063 defaultsFile = cls.defaultConfigFile
1064 if defaultsFile is not None:
1065 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1067 # Get the container key in case we need it
1068 try:
1069 containerKey = cls.containerKey
1070 except AttributeError:
1071 pass
1073 # Now update this object with the external values so that the external
1074 # values always override the defaults
1075 self.update(externalConfig)
1077 # If this configuration has child configurations of the same
1078 # config class, we need to expand those defaults as well.
1080 if mergeDefaults and containerKey is not None and containerKey in self:
1081 for idx, subConfig in enumerate(self[containerKey]):
1082 self[containerKey, idx] = type(self)(other=subConfig, validate=validate,
1083 mergeDefaults=mergeDefaults,
1084 searchPaths=searchPaths)
1086 if validate:
1087 self.validate()
1089 @classmethod
1090 def defaultSearchPaths(cls):
1091 """Read the environment to determine search paths to use for global
1092 defaults.
1094 Global defaults, at lowest priority, are found in the ``config``
1095 directory of the butler source tree. Additional defaults can be
1096 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1097 which is a PATH-like variable where paths at the front of the list
1098 have priority over those later.
1100 Returns
1101 -------
1102 paths : `list`
1103 Returns a list of paths to search. The returned order is in
1104 priority with the highest priority paths first. The butler config
1105 directory will always be at the end of the list.
1106 """
1107 # We can pick up defaults from multiple search paths
1108 # We fill defaults by using the butler config path and then
1109 # the config path environment variable in reverse order.
1110 defaultsPaths = []
1112 if CONFIG_PATH in os.environ:
1113 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1114 defaultsPaths.extend(externalPaths)
1116 # Find the butler configs
1117 defaultsPaths.append(os.path.join(lsst.utils.getPackageDir("daf_butler"), "config"))
1119 return defaultsPaths
1121 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1122 """Search the supplied paths, merging the configuration values
1124 The values read will override values currently stored in the object.
1125 Every file found in the path will be read, such that the earlier
1126 path entries have higher priority.
1128 Parameters
1129 ----------
1130 searchPaths : `list`
1131 Paths to search for the supplied configFile. This path
1132 is the priority order, such that files read from the
1133 first path entry will be selected over those read from
1134 a later path.
1135 configFile : `str`
1136 File to locate in path. If absolute path it will be read
1137 directly and the search path will not be used.
1138 """
1139 if os.path.isabs(configFile):
1140 if os.path.exists(configFile):
1141 self.filesRead.append(configFile)
1142 self._updateWithOtherConfigFile(configFile)
1143 else:
1144 # Reverse order so that high priority entries
1145 # update the object last.
1146 for pathDir in reversed(searchPaths):
1147 file = os.path.join(pathDir, configFile)
1148 if os.path.exists(file):
1149 self.filesRead.append(file)
1150 self._updateWithOtherConfigFile(file)
1152 def _updateWithOtherConfigFile(self, file):
1153 """Read in some defaults and update.
1155 Update the configuration by reading the supplied file as a config
1156 of this class, and merging such that these values override the
1157 current values. Contents of the external config are not validated.
1159 Parameters
1160 ----------
1161 file : `Config`, `str`, or `dict`
1162 Entity that can be converted to a `ConfigSubset`.
1163 """
1164 # Use this class to read the defaults so that subsetting can happen
1165 # correctly.
1166 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1167 self.update(externalConfig)
1169 def validate(self):
1170 """Check that mandatory keys are present in this configuration.
1172 Ignored if ``requiredKeys`` is empty."""
1173 # Validation
1174 missing = [k for k in self.requiredKeys if k not in self._data]
1175 if missing:
1176 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")