Coverage for python/lsst/daf/butler/core/configSupport.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for configuration snippets"""
26__all__ = ("LookupKey", "processLookupConfigs",
27 "processLookupConfigList")
29import logging
30import re
31from collections.abc import Mapping
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Dict,
37 Iterable,
38 Optional,
39 Set,
40 Union,
41)
43from .dimensions import DimensionGraph
45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true
46 from .dimensions import DimensionUniverse, Dimension
47 from .config import Config
49log = logging.getLogger(__name__)
51DATAID_RE = re.compile(r"([a-z_]+)<(.*)>$")
52"""Regex to find dataIds embedded in configurations."""
55class LookupKey:
56 """Representation of key that can be used to lookup information.
58 Look up is based on dataset type name, storage class name, dimensions.
60 Parameters
61 ----------
62 name : `str`, optional
63 Primary index string for lookup. If this string looks like it
64 represents dimensions (via ``dim1+dim2+dim3`` syntax) the name
65 is converted to a `DimensionGraph` and stored in ``dimensions``
66 property.
67 dimensions : `DimensionGraph`, optional
68 Dimensions that are relevant for lookup. Should not be specified
69 if ``name`` is also specified.
70 dataId : `dict`, optional
71 Keys and values from a dataId that should control lookups.
72 universe : `DimensionUniverse`, optional
73 Set of all known dimensions, used to expand and validate ``name`` or
74 ``dimensions``. Required if the key represents dimensions and a
75 full `DimensionGraph` is not provided.
76 """
78 def __init__(self, name: Optional[str] = None,
79 dimensions: Optional[Iterable[Union[str, Dimension]]] = None,
80 dataId: Optional[Dict[str, Any]] = None, *, universe: Optional[DimensionUniverse] = None):
81 if name is None and dimensions is None: 81 ↛ 82line 81 didn't jump to line 82, because the condition on line 81 was never true
82 raise ValueError("At least one of name or dimensions must be given")
84 if name is not None and dimensions is not None: 84 ↛ 85line 84 didn't jump to line 85, because the condition on line 84 was never true
85 raise ValueError("Can only accept one of name or dimensions")
87 self._dimensions = None
88 self._name = None
90 if name is not None: 90 ↛ 106line 90 didn't jump to line 106, because the condition on line 90 was never false
92 if not isinstance(name, str): 92 ↛ 93line 92 didn't jump to line 93, because the condition on line 92 was never true
93 raise ValueError(f"Supplied name must be str not: '{name}'")
95 if "+" in name: 95 ↛ 98line 95 didn't jump to line 98, because the condition on line 95 was never true
96 # If we are given a single dimension we use the "+" to
97 # indicate this but have to filter out the empty value
98 dimension_names = [n for n in name.split("+") if n]
99 if universe is None:
100 raise ValueError(f"Cannot construct LookupKey for {name} without dimension universe.")
101 else:
102 self._dimensions = universe.extract(dimension_names)
103 else:
104 self._name = name
106 elif dimensions is not None:
107 if not isinstance(dimensions, DimensionGraph):
108 if universe is None:
109 raise ValueError(f"Cannot construct LookupKey for dimensions={dimensions} "
110 "without universe.")
111 else:
112 self._dimensions = universe.extract(dimensions)
113 else:
114 self._dimensions = dimensions
115 else:
116 # mypy cannot work this out on its own
117 raise ValueError("Name was None but dimensions is also None")
119 # The dataId is converted to a frozenset of key/value
120 # tuples so that it is not mutable
121 self._dataId = frozenset(dataId.items()) if dataId is not None else None
123 def __str__(self) -> str:
124 # For the simple case return the simple string
125 if self._name:
126 name = self._name
127 elif self._dimensions is not None:
128 name = "+".join(self._dimensions.names)
129 else:
130 raise RuntimeError("Internal error since name and dimensions are both None")
132 if not self._dataId:
133 return name
135 return f"{name} ({self.dataId})"
137 def __repr__(self) -> str:
138 params = ""
139 if self.name:
140 params += f"name={self.name!r},"
141 if self.dimensions:
142 params += f"dimensions={self.dimensions!r},"
143 if self._dataId:
144 params += "dataId={" + ",".join(f"'{k}': {v!r}" for k, v in self._dataId) + "}"
146 return f"{self.__class__.__name__}({params})"
148 def __eq__(self, other: Any) -> bool:
149 if not isinstance(other, type(self)):
150 return False
151 if self._name == other._name and self._dimensions == other._dimensions and \
152 self._dataId == other._dataId:
153 return True
154 return False
156 @property
157 def name(self) -> Optional[str]:
158 """Primary name string to use as lookup (`str`)."""
159 return self._name
161 @property
162 def dimensions(self) -> Optional[DimensionGraph]:
163 """Dimensions associated with lookup (`DimensionGraph`)."""
164 return self._dimensions
166 @property
167 def dataId(self) -> Optional[Dict[str, Any]]:
168 """Return dict of keys/values that are important for dataId lookup.
170 (`dict` or `None`)
171 """
172 if self._dataId is not None:
173 return {k: v for k, v in self._dataId}
174 else:
175 return None
177 def __hash__(self) -> int:
178 """Hash the lookup to allow use as a key in a dict."""
179 return hash((self._name, self._dimensions, self._dataId))
181 def clone(self, name: Optional[str] = None, dimensions: Optional[DimensionGraph] = None,
182 dataId: Optional[Dict[str, Any]] = None) -> LookupKey:
183 """Clone the object, overriding some options.
185 Used to create a new instance of the object whilst updating
186 some of it.
188 Parameters
189 ----------
190 name : `str`, optional
191 Primary index string for lookup. Will override ``dimensions``
192 if ``dimensions`` are set.
193 dimensions : `DimensionGraph`, optional
194 Dimensions that are relevant for lookup. Will override ``name``
195 if ``name`` is already set.
196 dataId : `dict`, optional
197 Keys and values from a dataId that should control lookups.
199 Returns
200 -------
201 clone : `LookupKey`
202 Copy with updates.
203 """
204 if name is not None and dimensions is not None:
205 raise ValueError("Both name and dimensions can not be set")
207 # if neither name nor dimensions are specified we copy from current
208 # object. Otherwise we'll use the supplied values
209 if name is None and dimensions is None:
210 name = self._name
211 dimensions = self._dimensions
213 # Make sure we use the dict form for the constructor
214 if dataId is None and self._dataId is not None:
215 dataId = self.dataId
217 return self.__class__(name=name, dimensions=dimensions, dataId=dataId)
220def processLookupConfigs(config: Config, *,
221 allow_hierarchy: bool = False,
222 universe: Optional[DimensionUniverse] = None) -> Dict[LookupKey,
223 Union[str, Dict[str, Any]]]:
224 """Process sections of configuration relating to lookups.
226 Can be by dataset type name, storage class name, dimensions, or values
227 of dimensions.
229 Parameters
230 ----------
231 config : `Config`
232 A `Config` representing a configuration mapping keys to values where
233 the keys can be dataset type names, storage class names, dimensions
234 or dataId components.
235 allow_hierarchy : `bool`, optional
236 If `True`, keys that refer to a hierarchy that does not look like
237 a DataID specification are allowed and the full hierarchy, as a dict,
238 will be returned in the value for the lookup key.
239 universe : `DimensionUniverse`, optional
240 Set of all known dimensions, used to expand and validate any used
241 in lookup keys.
243 Returns
244 -------
245 contents : `dict` of `LookupKey` to `str`
246 A `dict` with keys constructed from the configuration keys and values
247 being simple strings. It is assumed the caller will convert the
248 values to the required form.
250 Notes
251 -----
252 The configuration is a mapping where the keys correspond to names
253 that can refer to dataset type or storage class names, or can use a
254 special syntax to refer to dimensions or dataId values.
256 Dimensions are indicated by using dimension names separated by a ``+``.
257 If a single dimension is specified this is also supported so long as
258 a ``+`` is found. Dimensions are normalized before use such that if
259 ``physical_filter+visit`` is defined, then an implicit ``instrument``
260 will automatically be added.
262 DataID overrides can be specified using the form: ``field<value>`` to
263 indicate a subhierarchy. All keys within that new hierarchy will take
264 precedence over equivalent values in the root hierarchy.
266 Currently only a single dataId field can be specified for a key.
267 For example with a config such as:
269 .. code-block:: yaml
271 something:
272 calexp: value1
273 instrument<HSC>:
274 calexp: value2
276 Requesting the match for ``calexp`` would return ``value1`` unless
277 a `DatasetRef` is used with a dataId containing the key ``instrument``
278 and value ``HSC``.
280 The values of the mapping are stored as strings.
281 """
282 contents = {}
283 for name, value in config.items():
284 lookup = LookupKey(name=name, universe=universe)
286 if isinstance(value, Mapping):
287 # indicates a dataId component -- check the format
288 kv = DATAID_RE.match(name)
289 if kv:
290 dataIdKey = kv.group(1)
291 dataIdValue = kv.group(2)
292 for subKey, subStr in value.items():
293 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
294 contents[lookup] = subStr
295 elif allow_hierarchy:
296 contents[lookup] = value
297 else:
298 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
299 else:
300 contents[lookup] = value
302 return contents
305def processLookupConfigList(config: Iterable[Union[str, Mapping]],
306 *, universe: Optional[DimensionUniverse] = None) -> Set[LookupKey]:
307 """Process sections of configuration relating to lookups.
309 Can be by dataset type name, storage class name, dimensions, or values
310 of dimensions.
312 Parameters
313 ----------
314 config : `list` of `str` or `dict`
315 Contents of a configuration listing keys that can be
316 dataset type names, storage class names, dimensions
317 or dataId components. DataId components are represented as entries
318 in the `list` of `dicts` with a single key with a value of a `list`
319 of new keys.
320 universe : `DimensionUniverse`, optional
321 Set of all known dimensions, used to expand and validate any used
322 in lookup keys.
324 Returns
325 -------
326 lookups : `set` of `LookupKey`
327 All the entries in the input list converted to `LookupKey` and
328 returned in a `set`.
330 Notes
331 -----
332 Keys are parsed as described in `processLookupConfigs`.
333 """
334 contents = set()
336 for name in config:
337 if isinstance(name, Mapping):
338 if len(name) != 1:
339 raise RuntimeError(f"Config dict entry {name} has more than key present")
340 for dataIdLookUp, subKeys in name.items():
341 kv = DATAID_RE.match(dataIdLookUp)
342 if kv:
343 dataIdKey = kv.group(1)
344 dataIdValue = kv.group(2)
345 for subKey in subKeys:
346 lookup = LookupKey(name=subKey, dataId={dataIdKey: dataIdValue}, universe=universe)
347 contents.add(lookup)
348 else:
349 raise RuntimeError(f"Hierarchical key '{name}' not in form 'key<value>'")
350 else:
351 contents.add(LookupKey(name=name, universe=universe))
353 return contents