Coverage for python/lsst/daf/butler/core/config.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Configuration control."""
24__all__ = ("Config", "ConfigSubset")
26import collections
27import copy
28import logging
29import pprint
30import os
31import yaml
32import sys
33from yaml.representer import Representer
34import io
35from typing import Sequence, Optional, ClassVar
37try:
38 import boto3
39except ImportError:
40 boto3 = None
42import lsst.utils
43from lsst.utils import doImport
44from .location import ButlerURI
46yaml.add_representer(collections.defaultdict, Representer.represent_dict)
49# Config module logger
50log = logging.getLogger(__name__)
52# PATH-like environment variable to use for defaults.
53CONFIG_PATH = "DAF_BUTLER_CONFIG_PATH"
56class Loader(yaml.CSafeLoader):
57 """YAML Loader that supports file include directives
59 Uses ``!include`` directive in a YAML file to point to another
60 YAML file to be included. The path in the include directive is relative
61 to the file containing that directive.
63 storageClasses: !include storageClasses.yaml
65 Examples
66 --------
67 >>> with open("document.yaml", "r") as f:
68 data = yaml.load(f, Loader=Loader)
70 Notes
71 -----
72 See https://davidchall.github.io/yaml-includes.html
73 """
75 def __init__(self, stream):
76 super().__init__(stream)
77 self._root = ButlerURI(stream.name)
78 Loader.add_constructor("!include", Loader.include)
80 def include(self, node):
81 if isinstance(node, yaml.ScalarNode):
82 return self.extractFile(self.construct_scalar(node))
84 elif isinstance(node, yaml.SequenceNode):
85 result = []
86 for filename in self.construct_sequence(node):
87 result.append(self.extractFile(filename))
88 return result
90 elif isinstance(node, yaml.MappingNode):
91 result = {}
92 for k, v in self.construct_mapping(node).items():
93 result[k] = self.extractFile(v)
94 return result
96 else:
97 print("Error:: unrecognised node type in !include statement", file=sys.stderr)
98 raise yaml.constructor.ConstructorError
100 def extractFile(self, filename):
101 fileuri = copy.copy(self._root)
102 fileuri.updateFile(filename)
103 log.debug("Opening YAML file via !include: %s", fileuri)
105 if not fileuri.scheme or fileuri.scheme == "file":
106 with open(fileuri.ospath, "r") as f:
107 return yaml.load(f, Loader)
108 elif fileuri.scheme == "s3":
109 if boto3 is None:
110 raise ModuleNotFoundError("Could not find boto3. Are you sure it is installed?")
111 s3 = boto3.client("s3")
112 try:
113 response = s3.get_object(Bucket=fileuri.netloc, Key=fileuri.relativeToPathRoot)
114 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
115 raise FileNotFoundError(f'No such file or directory: {fileuri}') from err
117 # boto3 response is a `StreamingBody`, but not a valid Python
118 # IOStream. Loader will raise an error that the stream has no name.
119 # The name is used to resolve the "!include" filename location to
120 # download. A hackish solution is to name it explicitly.
121 response["Body"].name = fileuri.geturl()
122 return yaml.load(response["Body"], Loader)
125class Config(collections.abc.MutableMapping):
126 r"""Implements a datatype that is used by `Butler` for configuration
127 parameters.
129 It is essentially a `dict` with key/value pairs, including nested dicts
130 (as values). In fact, it can be initialized with a `dict`.
131 This is explained next:
133 Config extends the `dict` api so that hierarchical values may be accessed
134 with delimited notation or as a tuple. If a string is given the delimiter
135 is picked up from the first character in that string. For example,
136 ``foo.getValue(".a.b.c")``, ``foo["a"]["b"]["c"]``, ``foo["a", "b", "c"]``,
137 ``foo[".a.b.c"]``, and ``foo["/a/b/c"]`` all achieve the same outcome.
138 If the first character is alphanumeric, no delimiter will be used.
139 ``foo["a.b.c"]`` will be a single key ``a.b.c`` as will ``foo[":a.b.c"]``.
140 Unicode characters can be used as the delimiter for distinctiveness if
141 required.
143 If a key in the hierarchy starts with a non-alphanumeric character care
144 should be used to ensure that either the tuple interface is used or
145 a distinct delimiter is always given in string form.
147 Finally, the delimiter can be escaped if it is part of a key and also
148 has to be used as a delimiter. For example, ``foo[r".a.b\.c"]`` results in
149 a two element hierarchy of ``a`` and ``b.c``. For hard-coded strings it is
150 always better to use a different delimiter in these cases.
152 Note that adding a multi-level key implicitly creates any nesting levels
153 that do not exist, but removing multi-level keys does not automatically
154 remove empty nesting levels. As a result:
156 >>> c = Config()
157 >>> c[".a.b"] = 1
158 >>> del c[".a.b"]
159 >>> c["a"]
160 Config({'a': {}})
162 Storage formats supported:
164 - yaml: read and write is supported.
167 Parameters
168 ----------
169 other : `str` or `Config` or `dict`
170 Other source of configuration, can be:
172 - (`str`) Treated as a path to a config file on disk. Must end with
173 ".yaml".
174 - (`Config`) Copies the other Config's values into this one.
175 - (`dict`) Copies the values from the dict into this Config.
177 If `None` is provided an empty `Config` will be created.
178 """
180 _D: ClassVar[str] = "→"
181 """Default internal delimiter to use for components in the hierarchy when
182 constructing keys for external use (see `Config.names()`)."""
184 includeKey: ClassVar[str] = "includeConfigs"
185 """Key used to indicate that another config should be included at this
186 part of the hierarchy."""
188 def __init__(self, other=None):
189 self._data = {}
190 self.configFile = None
192 if other is None:
193 return
195 if isinstance(other, Config):
196 self._data = copy.deepcopy(other._data)
197 self.configFile = other.configFile
198 elif isinstance(other, collections.abc.Mapping):
199 self.update(other)
200 elif isinstance(other, str):
201 # if other is a string, assume it is a file path.
202 self.__initFromFile(other)
203 self._processExplicitIncludes()
204 else:
205 # if the config specified by other could not be recognized raise
206 # a runtime error.
207 raise RuntimeError("A Config could not be loaded from other:%s" % other)
209 def ppprint(self):
210 """helper function for debugging, prints a config out in a readable
211 way in the debugger.
213 use: pdb> print(myConfigObject.ppprint())
215 Returns
216 -------
217 s : `str`
218 A prettyprint formatted string representing the config
219 """
220 return pprint.pformat(self._data, indent=2, width=1)
222 def __repr__(self):
223 return f"{type(self).__name__}({self._data!r})"
225 def __str__(self):
226 return self.ppprint()
228 def __len__(self):
229 return len(self._data)
231 def __iter__(self):
232 return iter(self._data)
234 def copy(self):
235 return type(self)(self)
237 def __initFromFile(self, path):
238 """Load a file from a path or an URI.
240 Parameters
241 ----------
242 path : `str`
243 Path or an URI to a persisted config file.
244 """
245 uri = ButlerURI(path)
246 if uri.path.endswith("yaml"):
247 if uri.scheme == "s3":
248 self.__initFromS3YamlFile(uri.geturl())
249 else:
250 self.__initFromYamlFile(uri.ospath)
251 else:
252 raise RuntimeError("Unhandled config file type:%s" % uri)
253 self.configFile = str(path)
255 def __initFromS3YamlFile(self, url):
256 """Load a file at a given S3 Bucket uri and attempts to load it from
257 yaml.
259 Parameters
260 ----------
261 path : `str`
262 To a persisted config file.
263 """
264 if boto3 is None:
265 raise ModuleNotFoundError("boto3 not found."
266 "Are you sure it is installed?")
268 uri = ButlerURI(url)
269 s3 = boto3.client("s3")
270 try:
271 response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
272 except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
273 raise FileNotFoundError(f"No such file or directory: {uri}") from err
275 # boto3 response is a `StreamingBody`, but not a valid Python IOStream.
276 # Loader will raise an error that the stream has no name. A hackish
277 # solution is to name it explicitly.
278 response["Body"].name = url
279 self.__initFromYaml(response["Body"])
280 response["Body"].close()
282 def __initFromYamlFile(self, path):
283 """Opens a file at a given path and attempts to load it in from yaml.
285 Parameters
286 ----------
287 path : `str`
288 To a persisted config file in YAML format.
289 """
290 log.debug("Opening YAML config file: %s", path)
291 with open(path, "r") as f:
292 self.__initFromYaml(f)
294 def __initFromYaml(self, stream):
295 """Loads a YAML config from any readable stream that contains one.
297 Parameters
298 ----------
299 stream
300 To a persisted config file in YAML format.
302 Raises
303 ------
304 yaml.YAMLError
305 If there is an error loading the file.
306 """
307 content = yaml.load(stream, Loader=Loader)
308 if content is None:
309 content = {}
310 self._data = content
311 return self
313 def _processExplicitIncludes(self):
314 """Scan through the configuration searching for the special
315 includeConfigs directive and process the includes."""
317 # Search paths for config files
318 searchPaths = [os.path.curdir]
319 if self.configFile is not None:
320 searchPaths.append(os.path.abspath(os.path.dirname(self.configFile)))
322 # Ensure we know what delimiter to use
323 names = self.nameTuples()
324 for path in names:
325 if path[-1] == self.includeKey:
327 log.debug("Processing file include directive at %s", self._D + self._D.join(path))
328 basePath = path[:-1]
330 # Extract the includes and then delete them from the config
331 includes = self[path]
332 del self[path]
334 # Be consistent and convert to a list
335 if not isinstance(includes, list):
336 includes = [includes]
338 # Read each file assuming it is a reference to a file
339 # The file can be relative to config file or cwd
340 # ConfigSubset search paths are not used
341 # At some point these might be URIs which we will have to
342 # assume resolve explicitly
343 subConfigs = []
344 for fileName in includes:
345 # Expand any shell variables
346 fileName = os.path.expandvars(fileName)
347 found = None
348 if os.path.isabs(fileName):
349 found = fileName
350 else:
351 for dir in searchPaths:
352 filePath = os.path.join(dir, fileName)
353 if os.path.exists(filePath):
354 found = os.path.normpath(os.path.abspath(filePath))
355 break
356 if not found:
357 raise RuntimeError(f"Unable to find referenced include file: {fileName}")
359 # Read the referenced Config as a Config
360 subConfigs.append(type(self)(found))
362 # Now we need to merge these sub configs with the current
363 # information that was present in this node in the config
364 # tree with precedence given to the explicit values
365 newConfig = subConfigs.pop(0)
366 for sc in subConfigs:
367 newConfig.update(sc)
369 # Explicit values take precedence
370 if not basePath:
371 # This is an include at the root config
372 newConfig.update(self)
373 # Replace the current config
374 self._data = newConfig._data
375 else:
376 newConfig.update(self[basePath])
377 # And reattach to the base config
378 self[basePath] = newConfig
380 @staticmethod
381 def _splitIntoKeys(key):
382 r"""Split the argument for get/set/in into a hierarchical list.
384 Parameters
385 ----------
386 key : `str` or iterable
387 Argument given to get/set/in. If an iterable is provided it will
388 be converted to a list. If the first character of the string
389 is not an alphanumeric character then it will be used as the
390 delimiter for the purposes of splitting the remainder of the
391 string. If the delimiter is also in one of the keys then it
392 can be escaped using ``\``. There is no default delimiter.
394 Returns
395 -------
396 keys : `list`
397 Hierarchical keys as a `list`.
398 """
399 if isinstance(key, str):
400 if not key[0].isalnum():
401 d = key[0]
402 key = key[1:]
403 else:
404 return [key, ]
405 escaped = f"\\{d}"
406 temp = None
407 if escaped in key:
408 # Complain at the attempt to escape the escape
409 doubled = fr"\{escaped}"
410 if doubled in key:
411 raise ValueError(f"Escaping an escaped delimiter ({doubled} in {key})"
412 " is not yet supported.")
413 # Replace with a character that won't be in the string
414 temp = "\r"
415 if temp in key or d == temp:
416 raise ValueError(f"Can not use character {temp!r} in hierarchical key or as"
417 " delimiter if escaping the delimiter")
418 key = key.replace(escaped, temp)
419 hierarchy = key.split(d)
420 if temp:
421 hierarchy = [h.replace(temp, d) for h in hierarchy]
422 return hierarchy
423 elif isinstance(key, collections.abc.Iterable):
424 return list(key)
425 else:
426 # Not sure what this is so try it anyway
427 return [key, ]
429 def _getKeyHierarchy(self, name):
430 """Retrieve the key hierarchy for accessing the Config
432 Parameters
433 ----------
434 name : `str` or `tuple`
435 Delimited string or `tuple` of hierarchical keys.
437 Returns
438 -------
439 hierarchy : `list` of `str`
440 Hierarchy to use as a `list`. If the name is available directly
441 as a key in the Config it will be used regardless of the presence
442 of any nominal delimiter.
443 """
444 if name in self._data:
445 keys = [name, ]
446 else:
447 keys = self._splitIntoKeys(name)
448 return keys
450 def _findInHierarchy(self, keys, create=False):
451 """Look for hierarchy of keys in Config
453 Parameters
454 ----------
455 keys : `list` or `tuple`
456 Keys to search in hierarchy.
457 create : `bool`, optional
458 If `True`, if a part of the hierarchy does not exist, insert an
459 empty `dict` into the hierarchy.
461 Returns
462 -------
463 hierarchy : `list`
464 List of the value corresponding to each key in the supplied
465 hierarchy. Only keys that exist in the hierarchy will have
466 a value.
467 complete : `bool`
468 `True` if the full hierarchy exists and the final element
469 in ``hierarchy`` is the value of relevant value.
470 """
471 d = self._data
473 def checkNextItem(k, d, create):
474 """See if k is in d and if it is return the new child"""
475 nextVal = None
476 isThere = False
477 if d is None:
478 # We have gone past the end of the hierarchy
479 pass
480 elif isinstance(d, collections.abc.Sequence):
481 # Check sequence first because for lists
482 # __contains__ checks whether value is found in list
483 # not whether the index exists in list. When we traverse
484 # the hierarchy we are interested in the index.
485 try:
486 nextVal = d[int(k)]
487 isThere = True
488 except IndexError:
489 pass
490 except ValueError:
491 isThere = k in d
492 elif k in d:
493 nextVal = d[k]
494 isThere = True
495 elif create:
496 d[k] = {}
497 nextVal = d[k]
498 isThere = True
499 return nextVal, isThere
501 hierarchy = []
502 complete = True
503 for k in keys:
504 d, isThere = checkNextItem(k, d, create)
505 if isThere:
506 hierarchy.append(d)
507 else:
508 complete = False
509 break
511 return hierarchy, complete
513 def __getitem__(self, name):
514 # Override the split for the simple case where there is an exact
515 # match. This allows `Config.items()` to work via a simple
516 # __iter__ implementation that returns top level keys of
517 # self._data.
518 keys = self._getKeyHierarchy(name)
520 hierarchy, complete = self._findInHierarchy(keys)
521 if not complete:
522 raise KeyError(f"{name} not found")
523 data = hierarchy[-1]
525 if isinstance(data, collections.abc.Mapping):
526 data = Config(data)
527 # Ensure that child configs inherit the parent internal delimiter
528 if self._D != Config._D:
529 data._D = self._D
530 return data
532 def __setitem__(self, name, value):
533 keys = self._getKeyHierarchy(name)
534 last = keys.pop()
535 if isinstance(value, Config):
536 value = copy.deepcopy(value._data)
538 hierarchy, complete = self._findInHierarchy(keys, create=True)
539 if hierarchy:
540 data = hierarchy[-1]
541 else:
542 data = self._data
544 try:
545 data[last] = value
546 except TypeError:
547 data[int(last)] = value
549 def __contains__(self, key):
550 keys = self._getKeyHierarchy(key)
551 hierarchy, complete = self._findInHierarchy(keys)
552 return complete
554 def __delitem__(self, key):
555 keys = self._getKeyHierarchy(key)
556 last = keys.pop()
557 hierarchy, complete = self._findInHierarchy(keys)
558 if complete:
559 if hierarchy:
560 data = hierarchy[-1]
561 else:
562 data = self._data
563 del data[last]
564 else:
565 raise KeyError(f"{key} not found in Config")
567 def update(self, other):
568 """Like dict.update, but will add or modify keys in nested dicts,
569 instead of overwriting the nested dict entirely.
571 For example, for the given code:
572 foo = {"a": {"b": 1}}
573 foo.update({"a": {"c": 2}})
575 Parameters
576 ----------
577 other : `dict` or `Config`
578 Source of configuration:
580 - If foo is a dict, then after the update foo == {"a": {"c": 2}}
581 - But if foo is a Config, then after the update
582 foo == {"a": {"b": 1, "c": 2}}
583 """
584 def doUpdate(d, u):
585 if not isinstance(u, collections.abc.Mapping) or \
586 not isinstance(d, collections.abc.Mapping):
587 raise RuntimeError("Only call update with Mapping, not {}".format(type(d)))
588 for k, v in u.items():
589 if isinstance(v, collections.abc.Mapping):
590 d[k] = doUpdate(d.get(k, {}), v)
591 else:
592 d[k] = v
593 return d
594 doUpdate(self._data, other)
596 def merge(self, other):
597 """Like Config.update, but will add keys & values from other that
598 DO NOT EXIST in self.
600 Keys and values that already exist in self will NOT be overwritten.
602 Parameters
603 ----------
604 other : `dict` or `Config`
605 Source of configuration:
606 """
607 otherCopy = copy.deepcopy(other)
608 otherCopy.update(self)
609 self._data = otherCopy._data
611 def nameTuples(self, topLevelOnly=False):
612 """Get tuples representing the name hierarchies of all keys.
614 The tuples returned from this method are guaranteed to be usable
615 to access items in the configuration object.
617 Parameters
618 ----------
619 topLevelOnly : `bool`, optional
620 If False, the default, a full hierarchy of names is returned.
621 If True, only the top level are returned.
623 Returns
624 -------
625 names : `list` of `tuple` of `str`
626 List of all names present in the `Config` where each element
627 in the list is a `tuple` of strings representing the hierarchy.
628 """
629 if topLevelOnly:
630 return list((k,) for k in self)
632 def getKeysAsTuples(d, keys, base):
633 if isinstance(d, collections.abc.Sequence):
634 theseKeys = range(len(d))
635 else:
636 theseKeys = d.keys()
637 for key in theseKeys:
638 val = d[key]
639 levelKey = base + (key,) if base is not None else (key,)
640 keys.append(levelKey)
641 if isinstance(val, (collections.abc.Mapping, collections.abc.Sequence)) \
642 and not isinstance(val, str):
643 getKeysAsTuples(val, keys, levelKey)
644 keys = []
645 getKeysAsTuples(self._data, keys, None)
646 return keys
648 def names(self, topLevelOnly=False, delimiter=None):
649 """Get a delimited name of all the keys in the hierarchy.
651 The values returned from this method are guaranteed to be usable
652 to access items in the configuration object.
654 Parameters
655 ----------
656 topLevelOnly : `bool`, optional
657 If False, the default, a full hierarchy of names is returned.
658 If True, only the top level are returned.
659 delimiter : `str`, optional
660 Delimiter to use when forming the keys. If the delimiter is
661 present in any of the keys, it will be escaped in the returned
662 names. If `None` given a delimiter will be automatically provided.
663 The delimiter can not be alphanumeric.
665 Returns
666 -------
667 names : `list` of `str`
668 List of all names present in the `Config`.
670 Notes
671 -----
672 This is different than the built-in method `dict.keys`, which will
673 return only the first level keys.
675 Raises
676 ------
677 ValueError:
678 The supplied delimiter is alphanumeric.
679 """
680 if topLevelOnly:
681 return list(self.keys())
683 # Get all the tuples of hierarchical keys
684 nameTuples = self.nameTuples()
686 if delimiter is not None and delimiter.isalnum():
687 raise ValueError(f"Supplied delimiter ({delimiter!r}) must not be alphanumeric.")
689 if delimiter is None:
690 # Start with something, and ensure it does not need to be
691 # escaped (it is much easier to understand if not escaped)
692 delimiter = self._D
694 # Form big string for easy check of delimiter clash
695 combined = "".join("".join(str(s) for s in k) for k in nameTuples)
697 # Try a delimiter and keep trying until we get something that
698 # works.
699 ntries = 0
700 while delimiter in combined:
701 log.debug(f"Delimiter '{delimiter}' could not be used. Trying another.")
702 ntries += 1
704 if ntries > 100:
705 raise ValueError(f"Unable to determine a delimiter for Config {self}")
707 # try another one
708 while True:
709 delimiter = chr(ord(delimiter)+1)
710 if not delimiter.isalnum():
711 break
713 log.debug(f"Using delimiter {delimiter!r}")
715 # Form the keys, escaping the delimiter if necessary
716 strings = [delimiter + delimiter.join(str(s).replace(delimiter, f"\\{delimiter}") for s in k)
717 for k in nameTuples]
718 return strings
720 def asArray(self, name):
721 """Get a value as an array.
723 May contain one or more elements.
725 Parameters
726 ----------
727 name : `str`
728 Key to use to retrieve value.
730 Returns
731 -------
732 array : `collections.abc.Sequence`
733 The value corresponding to name, but guaranteed to be returned
734 as a list with at least one element. If the value is a
735 `~collections.abc.Sequence` (and not a `str`) the value itself
736 will be returned, else the value will be the first element.
737 """
738 val = self.get(name)
739 if isinstance(val, str):
740 val = [val]
741 elif not isinstance(val, collections.abc.Sequence):
742 val = [val]
743 return val
745 def __eq__(self, other):
746 if isinstance(other, Config):
747 other = other._data
748 return self._data == other
750 def __ne__(self, other):
751 if isinstance(other, Config):
752 other = other._data
753 return self._data != other
755 #######
756 # i/o #
758 def dump(self, output):
759 """Writes the config to a yaml stream.
761 Parameters
762 ----------
763 output
764 The YAML stream to use for output.
765 """
766 # First a set of known keys is handled and written to the stream in a
767 # specific order for readability.
768 # After the expected/ordered keys are weritten to the stream the
769 # remainder of the keys are written to the stream.
770 data = copy.copy(self._data)
771 keys = []
772 for key in keys:
773 try:
774 yaml.safe_dump({key: data.pop(key)}, output, default_flow_style=False)
775 output.write("\n")
776 except KeyError:
777 pass
778 if data:
779 yaml.safe_dump(data, output, default_flow_style=False)
781 def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml",
782 overwrite=True):
783 """Writes the config to location pointed to by given URI.
785 Currently supports 's3' and 'file' URI schemes.
787 Parameters
788 ----------
789 uri: `str` or `ButlerURI`
790 URI of location where the Config will be written.
791 updateFile : bool, optional
792 If True and uri does not end on a filename with extension, will
793 append `defaultFileName` to the target uri. True by default.
794 defaultFileName : bool, optional
795 The file name that will be appended to target uri if updateFile is
796 True and uri does not end on a file with an extension.
797 overwrite : bool, optional
798 If True the configuration will be written even if it already
799 exists at that location.
800 """
801 if isinstance(uri, str):
802 uri = ButlerURI(uri)
804 if not uri.scheme or uri.scheme == "file":
805 if os.path.isdir(uri.path) and updateFile:
806 uri = ButlerURI(os.path.join(uri.ospath, defaultFileName))
807 self.dumpToFile(uri.ospath, overwrite=overwrite)
808 elif uri.scheme == "s3":
809 if not uri.dirLike and "." not in uri.basename():
810 uri = ButlerURI(uri.geturl(), forceDirectory=True)
811 uri.updateFile(defaultFileName)
812 self.dumpToS3File(uri, overwrite=overwrite)
813 else:
814 raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")
816 def dumpToFile(self, path, *, overwrite=True):
817 """Writes the config to a file.
819 Parameters
820 ----------
821 path : `str`
822 Path to the file to use for output.
823 overwrite : `bool`, optional
824 If True any existing file will be over written.
826 Notes
827 -----
828 The name of the config file is stored in the Config object.
830 Raises
831 ------
832 FileExistsError
833 Raised if the file already exists but overwrite is False.
834 """
835 if overwrite:
836 mode = "w"
837 else:
838 mode = "x"
839 with open(path, mode) as f:
840 self.dump(f)
841 self.configFile = path
843 def dumpToS3File(self, uri, *, overwrite=True):
844 """Writes the config to a file in S3 Bucket.
846 Parameters
847 ----------
848 uri : `ButlerURI`
849 S3 URI where the configuration should be stored.
850 overwrite : `bool`, optional
851 If False, a check will be made to see if the key already
852 exists.
854 Raises
855 ------
856 FileExistsError
857 Raised if the configuration already exists at this location
858 and overwrite is set to `False`.
859 """
860 if boto3 is None:
861 raise ModuleNotFoundError("Could not find boto3. "
862 "Are you sure it is installed?")
864 if uri.scheme != "s3":
865 raise ValueError(f"Must provide S3 URI not {uri}")
867 s3 = boto3.client("s3")
869 if not overwrite:
870 from .s3utils import s3CheckFileExists
871 if s3CheckFileExists(uri, client=s3)[0]:
872 raise FileExistsError(f"Config already exists at {uri}")
874 bucket = uri.netloc
875 key = uri.relativeToPathRoot
877 with io.StringIO() as stream:
878 self.dump(stream)
879 stream.seek(0)
880 s3.put_object(Bucket=bucket, Key=key, Body=stream.read())
882 @staticmethod
883 def updateParameters(configType, config, full, toUpdate=None, toCopy=None, overwrite=True):
884 """Generic helper function for updating specific config parameters.
886 Allows for named parameters to be set to new values in bulk, and
887 for other values to be set by copying from a reference config.
889 Assumes that the supplied config is compatible with ``configType``
890 and will attach the updated values to the supplied config by
891 looking for the related component key. It is assumed that
892 ``config`` and ``full`` are from the same part of the
893 configuration hierarchy.
895 Parameters
896 ----------
897 configType : `ConfigSubset`
898 Config type to use to extract relevant items from ``config``.
899 config : `Config`
900 A `Config` to update. Only the subset understood by
901 the supplied `ConfigSubset` will be modified. Default values
902 will not be inserted and the content will not be validated
903 since mandatory keys are allowed to be missing until
904 populated later by merging.
905 full : `Config`
906 A complete config with all defaults expanded that can be
907 converted to a ``configType``. Read-only and will not be
908 modified by this method. Values are read from here if
909 ``toCopy`` is defined.
911 Repository-specific options that should not be obtained
912 from defaults when Butler instances are constructed
913 should be copied from ``full`` to ``config``.
914 toUpdate : `dict`, optional
915 A `dict` defining the keys to update and the new value to use.
916 The keys and values can be any supported by `Config`
917 assignment.
918 toCopy : `tuple`, optional
919 `tuple` of keys whose values should be copied from ``full``
920 into ``config``.
921 overwrite : `bool`, optional
922 If `False`, do not modify a value in ``config`` if the key
923 already exists. Default is always to overwrite.
925 Raises
926 ------
927 ValueError
928 Neither ``toUpdate`` not ``toCopy`` were defined.
929 """
930 if toUpdate is None and toCopy is None:
931 raise ValueError("One of toUpdate or toCopy parameters must be set.")
933 # If this is a parent configuration then we need to ensure that
934 # the supplied config has the relevant component key in it.
935 # If this is a parent configuration we add in the stub entry
936 # so that the ConfigSubset constructor will do the right thing.
937 # We check full for this since that is guaranteed to be complete.
938 if configType.component in full and configType.component not in config:
939 config[configType.component] = {}
941 # Extract the part of the config we wish to update
942 localConfig = configType(config, mergeDefaults=False, validate=False)
944 if toUpdate:
945 for key, value in toUpdate.items():
946 if key in localConfig and not overwrite:
947 log.debug("Not overriding key '%s' with value '%s' in config %s",
948 key, value, localConfig.__class__.__name__)
949 else:
950 localConfig[key] = value
952 if toCopy:
953 localFullConfig = configType(full, mergeDefaults=False)
954 for key in toCopy:
955 if key in localConfig and not overwrite:
956 log.debug("Not overriding key '%s' from defaults in config %s",
957 key, localConfig.__class__.__name__)
958 else:
959 localConfig[key] = localFullConfig[key]
961 # Reattach to parent if this is a child config
962 if configType.component in config:
963 config[configType.component] = localConfig
964 else:
965 config.update(localConfig)
968class ConfigSubset(Config):
969 """Config representing a subset of a more general configuration.
971 Subclasses define their own component and when given a configuration
972 that includes that component, the resulting configuration only includes
973 the subset. For example, your config might contain ``dimensions`` if it's
974 part of a global config and that subset will be stored. If ``dimensions``
975 can not be found it is assumed that the entire contents of the
976 configuration should be used.
978 Default values are read from the environment or supplied search paths
979 using the default configuration file name specified in the subclass.
980 This allows a configuration class to be instantiated without any
981 additional arguments.
983 Additional validation can be specified to check for keys that are mandatory
984 in the configuration.
986 Parameters
987 ----------
988 other : `Config` or `str` or `dict`
989 Argument specifying the configuration information as understood
990 by `Config`
991 validate : `bool`, optional
992 If `True` required keys will be checked to ensure configuration
993 consistency.
994 mergeDefaults : `bool`, optional
995 If `True` defaults will be read and the supplied config will
996 be combined with the defaults, with the supplied valiues taking
997 precedence.
998 searchPaths : `list` or `tuple`, optional
999 Explicit additional paths to search for defaults. They should
1000 be supplied in priority order. These paths have higher priority
1001 than those read from the environment in
1002 `ConfigSubset.defaultSearchPaths()`.
1003 """
1005 component: ClassVar[Optional[str]] = None
1006 """Component to use from supplied config. Can be None. If specified the
1007 key is not required. Can be a full dot-separated path to a component.
1008 """
1010 requiredKeys: ClassVar[Sequence[str]] = ()
1011 """Keys that are required to be specified in the configuration.
1012 """
1014 defaultConfigFile: ClassVar[Optional[str]] = None
1015 """Name of the file containing defaults for this config class.
1016 """
1018 def __init__(self, other=None, validate=True, mergeDefaults=True, searchPaths=None):
1020 # Create a blank object to receive the defaults
1021 # Once we have the defaults we then update with the external values
1022 super().__init__()
1024 # Create a standard Config rather than subset
1025 externalConfig = Config(other)
1027 # Select the part we need from it
1028 # To simplify the use of !include we also check for the existence of
1029 # component.component (since the included files can themselves
1030 # include the component name)
1031 if self.component is not None:
1032 doubled = (self.component, self.component)
1033 # Must check for double depth first
1034 if doubled in externalConfig:
1035 externalConfig = externalConfig[doubled]
1036 elif self.component in externalConfig:
1037 externalConfig._data = externalConfig._data[self.component]
1039 # Default files read to create this configuration
1040 self.filesRead = []
1042 # Assume we are not looking up child configurations
1043 containerKey = None
1045 # Sometimes we do not want to merge with defaults.
1046 if mergeDefaults:
1048 # Supplied search paths have highest priority
1049 fullSearchPath = []
1050 if searchPaths:
1051 fullSearchPath.extend(searchPaths)
1053 # Read default paths from enviroment
1054 fullSearchPath.extend(self.defaultSearchPaths())
1056 # There are two places to find defaults for this particular config
1057 # - The "defaultConfigFile" defined in the subclass
1058 # - The class specified in the "cls" element in the config.
1059 # Read cls after merging in case it changes.
1060 if self.defaultConfigFile is not None:
1061 self._updateWithConfigsFromPath(fullSearchPath, self.defaultConfigFile)
1063 # Can have a class specification in the external config (priority)
1064 # or from the defaults.
1065 pytype = None
1066 if "cls" in externalConfig:
1067 pytype = externalConfig["cls"]
1068 elif "cls" in self:
1069 pytype = self["cls"]
1071 if pytype is not None:
1072 try:
1073 cls = doImport(pytype)
1074 except ImportError as e:
1075 raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
1076 defaultsFile = cls.defaultConfigFile
1077 if defaultsFile is not None:
1078 self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
1080 # Get the container key in case we need it
1081 try:
1082 containerKey = cls.containerKey
1083 except AttributeError:
1084 pass
1086 # Now update this object with the external values so that the external
1087 # values always override the defaults
1088 self.update(externalConfig)
1090 # If this configuration has child configurations of the same
1091 # config class, we need to expand those defaults as well.
1093 if mergeDefaults and containerKey is not None and containerKey in self:
1094 for idx, subConfig in enumerate(self[containerKey]):
1095 self[containerKey, idx] = type(self)(other=subConfig, validate=validate,
1096 mergeDefaults=mergeDefaults,
1097 searchPaths=searchPaths)
1099 if validate:
1100 self.validate()
1102 @classmethod
1103 def defaultSearchPaths(cls):
1104 """Read the environment to determine search paths to use for global
1105 defaults.
1107 Global defaults, at lowest priority, are found in the ``config``
1108 directory of the butler source tree. Additional defaults can be
1109 defined using the environment variable ``$DAF_BUTLER_CONFIG_PATHS``
1110 which is a PATH-like variable where paths at the front of the list
1111 have priority over those later.
1113 Returns
1114 -------
1115 paths : `list`
1116 Returns a list of paths to search. The returned order is in
1117 priority with the highest priority paths first. The butler config
1118 directory will always be at the end of the list.
1119 """
1120 # We can pick up defaults from multiple search paths
1121 # We fill defaults by using the butler config path and then
1122 # the config path environment variable in reverse order.
1123 defaultsPaths = []
1125 if CONFIG_PATH in os.environ:
1126 externalPaths = os.environ[CONFIG_PATH].split(os.pathsep)
1127 defaultsPaths.extend(externalPaths)
1129 # Find the butler configs
1130 defaultsPaths.append(os.path.join(lsst.utils.getPackageDir("daf_butler"), "config"))
1132 return defaultsPaths
1134 def _updateWithConfigsFromPath(self, searchPaths, configFile):
1135 """Search the supplied paths, merging the configuration values
1137 The values read will override values currently stored in the object.
1138 Every file found in the path will be read, such that the earlier
1139 path entries have higher priority.
1141 Parameters
1142 ----------
1143 searchPaths : `list`
1144 Paths to search for the supplied configFile. This path
1145 is the priority order, such that files read from the
1146 first path entry will be selected over those read from
1147 a later path.
1148 configFile : `str`
1149 File to locate in path. If absolute path it will be read
1150 directly and the search path will not be used.
1151 """
1152 if os.path.isabs(configFile):
1153 if os.path.exists(configFile):
1154 self.filesRead.append(configFile)
1155 self._updateWithOtherConfigFile(configFile)
1156 else:
1157 # Reverse order so that high priority entries
1158 # update the object last.
1159 for pathDir in reversed(searchPaths):
1160 file = os.path.join(pathDir, configFile)
1161 if os.path.exists(file):
1162 self.filesRead.append(file)
1163 self._updateWithOtherConfigFile(file)
1165 def _updateWithOtherConfigFile(self, file):
1166 """Read in some defaults and update.
1168 Update the configuration by reading the supplied file as a config
1169 of this class, and merging such that these values override the
1170 current values. Contents of the external config are not validated.
1172 Parameters
1173 ----------
1174 file : `Config`, `str`, or `dict`
1175 Entity that can be converted to a `ConfigSubset`.
1176 """
1177 # Use this class to read the defaults so that subsetting can happen
1178 # correctly.
1179 externalConfig = type(self)(file, validate=False, mergeDefaults=False)
1180 self.update(externalConfig)
1182 def validate(self):
1183 """Check that mandatory keys are present in this configuration.
1185 Ignored if ``requiredKeys`` is empty."""
1186 # Validation
1187 missing = [k for k in self.requiredKeys if k not in self._data]
1188 if missing:
1189 raise KeyError(f"Mandatory keys ({missing}) missing from supplied configuration for {type(self)}")