Coverage for python/lsst/daf/butler/core/configSupport.py: 21%
124 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-09 02:51 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-09 02:51 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for configuration snippets"""
26__all__ = ("LookupKey", "processLookupConfigs", "processLookupConfigList")
28import logging
29import re
30from collections.abc import Mapping
31from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Set, Union
33from .dimensions import DimensionGraph
35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36, because the condition on line 35 was never true
36 from .config import Config
37 from .dimensions import Dimension, DimensionUniverse
39log = logging.getLogger(__name__)
41DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$")
42"""Regex to find dataIds embedded in configurations."""
45class LookupKey:
46 """Representation of key that can be used to lookup information.
48 Look up is based on dataset type name, storage class name, dimensions.
50 Parameters
51 ----------
52 name : `str`, optional
53 Primary index string for lookup. If this string looks like it
54 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name
55 is converted to a `DimensionGraph` and stored in ``dimensions``
56 property.
57 dimensions : `DimensionGraph`, optional
58 Dimensions that are relevant for lookup. Should not be specified
59 if ``name`` is also specified.
60 dataId : `dict`, optional
61 Keys and values from a dataId that should control lookups.
62 universe : `DimensionUniverse`, optional
63 Set of all known dimensions, used to expand and validate ``name`` or
64 ``dimensions``. Required if the key represents dimensions and a
65 full `DimensionGraph` is not provided.
66 """
68 def __init__(
69 self,
70 name: Optional[str] = None,
71 dimensions: Optional[Iterable[Union[str, Dimension]]] = None,
72 dataId: Optional[Dict[str, Any]] = None,
73 *,
74 universe: Optional[DimensionUniverse] = None,
75 ):
76 if name is None and dimensions is None: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 raise ValueError("At least one of name or dimensions must be given")
79 if name is not None and dimensions is not None: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true
80 raise ValueError("Can only accept one of name or dimensions")
82 self._dimensions = None
83 self._name = None
85 if name is not None: 85 ↛ 119line 85 didn't jump to line 119, because the condition on line 85 was never false
87 if not isinstance(name, str): 87 ↛ 88line 87 didn't jump to line 88, because the condition on line 87 was never true
88 raise ValueError(f"Supplied name must be str not: '{name}'")
90 if "+" in name: 90 ↛ 91line 90 didn't jump to line 91, because the condition on line 90 was never true
91 if universe is None:
92 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.")
94 # If we are given a single dimension we use the "+" to
95 # indicate this but have to filter out the empty value
96 dimension_names = [n for n in name.split("+") if n]
97 try:
98 self._dimensions = universe.extract(dimension_names)
99 except KeyError:
100 # One or more of the dimensions is not known to the
101 # universe. This could be a typo or it could be that
102 # a config is being used that is not compatible with
103 # this universe. Use the name directly as a lookup key
104 # but issue a warning. This will be potentially annoying
105 # in the scenario where a lookup key comes from a
106 # default config but the users are using an external
107 # universe.
108 unknown = [name for name in dimension_names if universe.get(name) is None]
109 log.debug(
110 "A LookupKey '%s' uses unknown dimensions: %s. Possible typo?"
111 " Using the name explicitly.",
112 name,
113 unknown,
114 )
115 self._name = name
116 else:
117 self._name = name
119 elif dimensions is not None:
120 if not isinstance(dimensions, DimensionGraph):
121 if universe is None:
122 raise ValueError(
123 f"Cannot construct LookupKey for dimensions={dimensions} without universe."
124 )
125 else:
126 self._dimensions = universe.extract(dimensions)
127 else:
128 self._dimensions = dimensions
129 else:
130 # mypy cannot work this out on its own
131 raise ValueError("Name was None but dimensions is also None")
133 # The dataId is converted to a frozenset of key/value
134 # tuples so that it is not mutable
135 self._dataId = frozenset(dataId.items()) if dataId is not None else None
137 def __str__(self) -> str:
138 # For the simple case return the simple string
139 if self._name:
140 name = self._name
141 elif self._dimensions is not None:
142 name = "+".join(self._dimensions.names)
143 else:
144 raise RuntimeError("Internal error since name and dimensions are both None")
146 if not self._dataId:
147 return name
149 return f"{name} ({self.dataId})"
151 def __repr__(self) -> str:
152 params = ""
153 if self.name:
154 params += f"name={self.name!r},"
155 if self.dimensions:
156 params += f"dimensions={self.dimensions!r},"
157 if self._dataId:
158 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}"
160 return f"{self.__class__.__name__}({params})"
162 def __eq__(self, other: Any) -> bool:
163 if not isinstance(other, type(self)):
164 return False
165 if (
166 self._name == other._name
167 and self._dimensions == other._dimensions
168 and self._dataId == other._dataId
169 ):
170 return True
171 return False
173 @property
174 def name(self) -> Optional[str]:
175 """Primary name string to use as lookup (`str`)."""
176 return self._name
178 @property
179 def dimensions(self) -> Optional[DimensionGraph]:
180 """Dimensions associated with lookup (`DimensionGraph`)."""
181 return self._dimensions
183 @property
184 def dataId(self) -> Optional[Dict[str, Any]]:
185 """Return dict of keys/values that are important for dataId lookup.
187 (`dict` or `None`)
188 """
189 if self._dataId is not None:
190 return {k: v for k, v in self._dataId}
191 else:
192 return None
194 def __hash__(self) -> int:
195 """Hash the lookup to allow use as a key in a dict."""
196 return hash((self._name, self._dimensions, self._dataId))
198 def clone(
199 self,
200 name: Optional[str] = None,
201 dimensions: Optional[DimensionGraph] = None,
202 dataId: Optional[Dict[str, Any]] = None,
203 ) -> LookupKey:
204 """Clone the object, overriding some options.
206 Used to create a new instance of the object whilst updating
207 some of it.
209 Parameters
210 ----------
211 name : `str`, optional
212 Primary index string for lookup. Will override ``dimensions``
213 if ``dimensions`` are set.
214 dimensions : `DimensionGraph`, optional
215 Dimensions that are relevant for lookup. Will override ``name``
216 if ``name`` is already set.
217 dataId : `dict`, optional
218 Keys and values from a dataId that should control lookups.
220 Returns
221 -------
222 clone : `LookupKey`
223 Copy with updates.
224 """
225 if name is not None and dimensions is not None:
226 raise ValueError("Both name and dimensions can not be set")
228 # if neither name nor dimensions are specified we copy from current
229 # object. Otherwise we'll use the supplied values
230 if name is None and dimensions is None:
231 name = self._name
232 dimensions = self._dimensions
234 # Make sure we use the dict form for the constructor
235 if dataId is None and self._dataId is not None:
236 dataId = self.dataId
238 return self.__class__(name=name, dimensions=dimensions, dataId=dataId)
241def processLookupConfigs(
242 config: Config, *, allow_hierarchy: bool = False, universe: Optional[DimensionUniverse] = None
243) -> Dict[LookupKey, Union[str, Dict[str, Any]]]:
244 """Process sections of configuration relating to lookups.
246 Can be by dataset type name, storage class name, dimensions, or values
247 of dimensions.
249 Parameters
250 ----------
251 config : `Config`
252 A `Config` representing a configuration mapping keys to values where
253 the keys can be dataset type names, storage class names, dimensions
254 or dataId components.
255 allow_hierarchy : `bool`, optional
256 If `True`, keys that refer to a hierarchy that does not look like
257 a DataID specification are allowed and the full hierarchy, as a dict,
258 will be returned in the value for the lookup key.
259 universe : `DimensionUniverse`, optional
260 Set of all known dimensions, used to expand and validate any used
261 in lookup keys.
263 Returns
264 -------
265 contents : `dict` of `LookupKey` to `str`
266 A `dict` with keys constructed from the configuration keys and values
267 being simple strings. It is assumed the caller will convert the
268 values to the required form.
270 Notes
271 -----
272 The configuration is a mapping where the keys correspond to names
273 that can refer to dataset type or storage class names, or can use a
274 special syntax to refer to dimensions or dataId values.
276 Dimensions are indicated by using dimension names separated by a ``+``.
277 If a single dimension is specified this is also supported so long as
278 a ``+`` is found. Dimensions are normalized before use such that if
279 ``physical_filter+visit`` is defined, then an implicit ``instrument``
280 will automatically be added.
282 DataID overrides can be specified using the form: ``field<value>`` to
283 indicate a subhierarchy. All keys within that new hierarchy will take
284 precedence over equivalent values in the root hierarchy.
286 Currently only a single dataId field can be specified for a key.
287 For example with a config such as:
289 .. code-block:: yaml
291 something:
292 calexp: value1
293 instrument<HSC>:
294 calexp: value2
296 Requesting the match for ``calexp`` would return ``value1`` unless
297 a `DatasetRef` is used with a dataId containing the key ``instrument``
298 and value ``HSC``.
300 The values of the mapping are stored as strings.
301 """
302 contents = {}
303 for name, value in config.items():
304 lookup = LookupKey(name=name, universe=universe)
306 if isinstance(value, Mapping):
307 # indicates a dataId component -- check the format
308 kv = DATAID_RE.match(name)
309 if kv:
310 dataIdKey = kv.group(1)
311 dataIdValue = kv.group(2)
312 for subKey, subStr in value.items():
313 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
314 contents[lookup] = subStr
315 elif allow_hierarchy:
316 contents[lookup] = value
317 else:
318 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
319 else:
320 contents[lookup] = value
322 return contents
325def processLookupConfigList(
326 config: Iterable[Union[str, Mapping]], *, universe: Optional[DimensionUniverse] = None
327) -> Set[LookupKey]:
328 """Process sections of configuration relating to lookups.
330 Can be by dataset type name, storage class name, dimensions, or values
331 of dimensions.
333 Parameters
334 ----------
335 config : `list` of `str` or `dict`
336 Contents of a configuration listing keys that can be
337 dataset type names, storage class names, dimensions
338 or dataId components. DataId components are represented as entries
339 in the `list` of `dicts` with a single key with a value of a `list`
340 of new keys.
341 universe : `DimensionUniverse`, optional
342 Set of all known dimensions, used to expand and validate any used
343 in lookup keys.
345 Returns
346 -------
347 lookups : `set` of `LookupKey`
348 All the entries in the input list converted to `LookupKey` and
349 returned in a `set`.
351 Notes
352 -----
353 Keys are parsed as described in `processLookupConfigs`.
354 """
355 contents = set()
357 for name in config:
358 if isinstance(name, Mapping):
359 if len(name) != 1:
360 raise RuntimeError(f"Config dict entry {name} has more than key present")
361 for dataIdLookUp, subKeys in name.items():
362 kv = DATAID_RE.match(dataIdLookUp)
363 if kv:
364 dataIdKey = kv.group(1)
365 dataIdValue = kv.group(2)
366 for subKey in subKeys:
367 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
368 contents.add(lookup)
369 else:
370 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
371 else:
372 contents.add(LookupKey(name=name, universe=universe))
374 return contents