Coverage for python/lsst/daf/butler/registry/wildcards.py: 22%
171 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = (
30 "CategorizedWildcard",
31 "CollectionWildcard",
32 "DatasetTypeWildcard",
33)
35import contextlib
36import dataclasses
37import re
38from collections.abc import Callable, Iterable, Mapping
39from types import EllipsisType
40from typing import Any
42from lsst.utils.iteration import ensure_iterable
44from .._dataset_type import DatasetType
45from ..utils import globToRegex
46from ._exceptions import CollectionExpressionError, DatasetTypeExpressionError
49@dataclasses.dataclass
50class CategorizedWildcard:
51 """The results of preprocessing a wildcard expression to separate match
52 patterns from strings.
54 The `fromExpression` method should almost always be used to construct
55 instances, as the regular constructor performs no checking of inputs (and
56 that can lead to confusing error messages downstream).
57 """
59 @classmethod
60 def fromExpression(
61 cls,
62 expression: Any,
63 *,
64 allowAny: bool = True,
65 allowPatterns: bool = True,
66 coerceUnrecognized: Callable[[Any], tuple[str, Any] | str] | None = None,
67 coerceItemValue: Callable[[Any], Any] | None = None,
68 defaultItemValue: Any | None = None,
69 ) -> CategorizedWildcard | EllipsisType:
70 """Categorize a wildcard expression.
72 Parameters
73 ----------
74 expression : `~typing.Any`
75 The expression to categorize. May be any of:
77 - `str` (including glob patterns if ``allowPatterns`` is `True`);
78 - `re.Pattern` (only if ``allowPatterns`` is `True`);
79 - objects recognized by ``coerceUnrecognized`` (if provided);
80 - two-element tuples of (`str`, value) where value is recognized
81 by ``coerceItemValue`` (if provided);
82 - a non-`str`, non-mapping iterable containing any of the above;
83 - the special value `...` (only if ``allowAny`` is `True`), which
84 matches anything;
85 - a mapping from `str` to a value are recognized by
86 ``coerceItemValue`` (if provided);
87 - a `CategorizedWildcard` instance (passed through unchanged if
88 it meets the requirements specified by keyword arguments).
89 allowAny : `bool`, optional
90 If `False` (`True` is default) raise `TypeError` if `...` is
91 encountered.
92 allowPatterns : `bool`, optional
93 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
94 is encountered, or if ``expression`` is a `CategorizedWildcard`
95 with `patterns` not empty.
96 coerceUnrecognized : `~collections.abc.Callable`, optional
97 A callback that takes a single argument of arbitrary type and
98 returns either a `str` - appended to `strings` - or a `tuple` of
99 (`str`, `Any`) to be appended to `items`. This will be called on
100 objects of unrecognized type. Exceptions will be reraised as
101 `TypeError` (and chained).
102 coerceItemValue : `~collections.abc.Callable`, optional
103 If provided, ``expression`` may be a mapping from `str` to any
104 type that can be passed to this function; the result of that call
105 will be stored instead as the value in ``self.items``.
106 defaultItemValue : `Any`, optional
107 If provided, combine this value with any string values encountered
108 (including any returned by ``coerceUnrecognized``) to form a
109 `tuple` and add it to `items`, guaranteeing that `strings` will be
110 empty. Patterns are never added to `items`.
112 Returns
113 -------
114 categorized : `CategorizedWildcard` or ``...``.
115 The struct describing the wildcard. ``...`` is passed through
116 unchanged.
118 Raises
119 ------
120 TypeError
121 Raised if an unsupported type is found in the expression.
122 """
123 assert expression is not None
124 # See if we were given ...; just return that if we were.
125 if expression is ...:
126 if not allowAny:
127 raise TypeError("This expression may not be unconstrained.")
128 return ...
129 if isinstance(expression, cls):
130 # This is already a CategorizedWildcard. Make sure it meets the
131 # reqs. implied by the kwargs we got.
132 if not allowPatterns and expression.patterns:
133 raise TypeError(
134 f"Regular expression(s) {expression.patterns} are not allowed in this context."
135 )
136 if defaultItemValue is not None and expression.strings:
137 if expression.items:
138 raise TypeError(
139 "Incompatible preprocessed expression: an ordered sequence of str is "
140 "needed, but the original order was lost in the preprocessing."
141 )
142 return cls(
143 strings=[],
144 patterns=expression.patterns,
145 items=[(k, defaultItemValue) for k in expression.strings],
146 )
147 elif defaultItemValue is None and expression.items:
148 if expression.strings:
149 raise TypeError(
150 "Incompatible preprocessed expression: an ordered sequence of items is "
151 "needed, but the original order was lost in the preprocessing."
152 )
153 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
154 else:
155 # Original expression was created with keyword arguments that
156 # were at least as restrictive as what we just got; pass it
157 # through.
158 return expression
160 # If we get here, we know we'll be creating a new instance.
161 # Initialize an empty one now.
162 self = cls(strings=[], patterns=[], items=[])
164 # If mappings are allowed, see if we were given a single mapping by
165 # trying to get items.
166 if coerceItemValue is not None:
167 rawItems = None
168 with contextlib.suppress(AttributeError):
169 rawItems = expression.items()
171 if rawItems is not None:
172 for k, v in rawItems:
173 try:
174 self.items.append((k, coerceItemValue(v)))
175 except Exception as err:
176 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
177 return self
179 # Not ..., a CategorizedWildcard instance, or a mapping. Just
180 # process scalars or an iterable. We put the body of the loop inside
181 # a local function so we can recurse after coercion.
183 def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None:
184 if isinstance(element, str):
185 if defaultItemValue is not None:
186 self.items.append((element, defaultItemValue))
187 return None
188 else:
189 # This returns a list but we know we only passed in
190 # single value.
191 converted = globToRegex(element)
192 if converted is ...:
193 return ...
194 element = converted[0]
195 # Let regex and ... go through to the next check
196 if isinstance(element, str):
197 self.strings.append(element)
198 return None
199 if allowPatterns and isinstance(element, re.Pattern):
200 self.patterns.append(element)
201 return None
202 if alreadyCoerced:
203 try:
204 k, v = element
205 except TypeError:
206 raise TypeError(
207 f"Object '{element!r}' returned by coercion function must be `str` or `tuple`."
208 ) from None
209 else:
210 self.items.append((k, v))
211 return None
212 if coerceItemValue is not None:
213 try:
214 k, v = element
215 except TypeError:
216 pass
217 else:
218 if not isinstance(k, str):
219 raise TypeError(f"Item key '{k}' is not a string.")
220 try:
221 v = coerceItemValue(v)
222 except Exception as err:
223 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'.") from err
224 self.items.append((k, v))
225 return None
226 if coerceUnrecognized is not None:
227 try:
228 # This should be safe but flake8 cant tell that the
229 # function will be re-declared next function call
230 process(coerceUnrecognized(element), alreadyCoerced=True) # noqa: F821
231 except Exception as err:
232 raise TypeError(f"Could not coerce expression element '{element!r}'.") from err
233 else:
234 extra = "."
235 if isinstance(element, re.Pattern):
236 extra = " and patterns are not allowed."
237 raise TypeError(f"Unsupported object in wildcard expression: '{element!r}'{extra}")
238 return None
240 for element in ensure_iterable(expression):
241 retval = process(element)
242 if retval is ...:
243 # One of the globs matched everything
244 if not allowAny:
245 raise TypeError("This expression may not be unconstrained.")
246 return ...
247 del process
248 return self
250 strings: list[str]
251 """Explicit string values found in the wildcard (`list` [ `str` ]).
252 """
254 patterns: list[re.Pattern]
255 """Regular expression patterns found in the wildcard
256 (`list` [ `re.Pattern` ]).
257 """
259 items: list[tuple[str, Any]]
260 """Two-item tuples that relate string values to other objects
261 (`list` [ `tuple` [ `str`, `Any` ] ]).
262 """
265@dataclasses.dataclass(frozen=True)
266class CollectionWildcard:
267 """A validated wildcard for collection names.
269 The `from_expression` method should almost always be used to construct
270 instances, as the regular constructor performs no checking of inputs (and
271 that can lead to confusing error messages downstream).
273 Notes
274 -----
275 `CollectionWildcard` is expected to be rarely used outside of `Registry`
276 (which uses it to back several of its "query" methods that take general
277 expressions for collections), but it may occasionally be useful outside
278 `Registry` as a way to preprocess expressions that contain single-pass
279 iterators into a form that can be used to call those `Registry` methods
280 multiple times.
281 """
283 strings: tuple[str, ...] = ()
284 """An an ordered list of explicitly-named collections. (`tuple` [ `str` ]).
285 """
287 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
288 """Regular expression patterns to match against collection names, or the
289 special value ``...`` indicating all collections.
291 `...` must be accompanied by ``strings=()``.
292 """
294 def __post_init__(self) -> None:
295 if self.patterns is ... and self.strings:
296 raise ValueError(
297 f"Collection wildcard matches any string, but still has explicit strings {self.strings}."
298 )
300 @classmethod
301 def from_expression(cls, expression: Any, require_ordered: bool = False) -> CollectionWildcard:
302 """Process a general expression to construct a `CollectionWildcard`
303 instance.
305 Parameters
306 ----------
307 expression : `~typing.Any`
308 May be:
310 - a `str` collection name;
311 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
312 against collection names;
313 - any iterable containing any of the above;
314 - another `CollectionWildcard` instance (passed through unchanged).
316 Duplicate collection names will be removed (preserving the first
317 appearance of each collection name).
318 require_ordered : `bool`, optional
319 If `True` (`False` is default) require the expression to be
320 ordered, and raise `CollectionExpressionError` if it is not.
322 Returns
323 -------
324 wildcard : `CollectionWildcard`
325 A `CollectionWildcard` instance.
327 Raises
328 ------
329 CollectionExpressionError
330 Raised if the patterns has regular expression, glob patterns, or
331 the ``...`` wildcard, and ``require_ordered=True``.
332 """
333 if isinstance(expression, cls):
334 return expression
335 if expression is ...:
336 return cls()
337 wildcard = CategorizedWildcard.fromExpression(
338 expression,
339 allowAny=True,
340 allowPatterns=True,
341 )
342 if wildcard is ...:
343 return cls()
344 result = cls(
345 strings=tuple(wildcard.strings),
346 patterns=tuple(wildcard.patterns),
347 )
348 if require_ordered:
349 result.require_ordered()
350 return result
352 @classmethod
353 def from_names(cls, names: Iterable[str]) -> CollectionWildcard:
354 """Construct from an iterable of explicit collection names.
356 Parameters
357 ----------
358 names : `~collections.abc.Iterable` [ `str` ]
359 Iterable of collection names.
361 Returns
362 -------
363 wildcard : ~CollectionWildcard`
364 A `CollectionWildcard` instance. `require_ordered` is guaranteed
365 to succeed and return the given names in order.
366 """
367 return cls(strings=tuple(names), patterns=())
369 def require_ordered(self) -> tuple[str, ...]:
370 """Require that this wildcard contains no patterns, and return the
371 ordered tuple of names that it does hold.
373 Returns
374 -------
375 names : `tuple` [ `str` ]
376 Ordered tuple of collection names.
378 Raises
379 ------
380 CollectionExpressionError
381 Raised if the patterns has regular expression, glob patterns, or
382 the ``...`` wildcard.
383 """
384 if self.patterns:
385 raise CollectionExpressionError(
386 f"An ordered collection expression is required; got patterns {self.patterns}."
387 )
388 return self.strings
390 def empty(self) -> bool:
391 """Return true if both ``strings`` and ``patterns`` are empty."""
392 # bool(Ellipsis) is True
393 return not self.strings and not self.patterns
395 def __str__(self) -> str:
396 if self.patterns is ...:
397 return "..."
398 else:
399 terms = list(self.strings)
400 terms.extend(str(p) for p in self.patterns)
401 return "[{}]".format(", ".join(terms))
404@dataclasses.dataclass
405class DatasetTypeWildcard:
406 """A validated expression that resolves to one or more dataset types.
408 The `from_expression` method should almost always be used to construct
409 instances, as the regular constructor performs no checking of inputs (and
410 that can lead to confusing error messages downstream).
411 """
413 values: Mapping[str, DatasetType | None] = dataclasses.field(default_factory=dict)
414 """A mapping with `str` dataset type name keys and optional `DatasetType`
415 instances.
416 """
418 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
419 """Regular expressions to be matched against dataset type names, or the
420 special value ``...`` indicating all dataset types.
422 Any pattern matching a dataset type is considered an overall match for
423 the expression.
424 """
426 @classmethod
427 def from_expression(cls, expression: Any) -> DatasetTypeWildcard:
428 """Construct an instance by analyzing the given expression.
430 Parameters
431 ----------
432 expression : `~typing.Any`
433 Expression to analyze. May be any of the following:
435 - a `str` dataset type name;
436 - a `DatasetType` instance;
437 - a `re.Pattern` to match against dataset type names;
438 - an iterable whose elements may be any of the above (any dataset
439 type matching any element in the list is an overall match);
440 - an existing `DatasetTypeWildcard` instance;
441 - the special ``...`` ellipsis object, which matches any dataset
442 type.
444 Returns
445 -------
446 query : `DatasetTypeWildcard`
447 An instance of this class (new unless an existing instance was
448 passed in).
450 Raises
451 ------
452 DatasetTypeExpressionError
453 Raised if the given expression does not have one of the allowed
454 types.
455 """
456 if isinstance(expression, cls):
457 return expression
458 try:
459 wildcard = CategorizedWildcard.fromExpression(
460 expression, coerceUnrecognized=lambda d: (d.name, d)
461 )
462 except TypeError as err:
463 raise DatasetTypeExpressionError(f"Invalid dataset type expression: {expression!r}.") from err
464 if wildcard is ...:
465 return cls()
466 values: dict[str, DatasetType | None] = {}
467 for name in wildcard.strings:
468 values[name] = None
469 for name, item in wildcard.items:
470 if not isinstance(item, DatasetType):
471 raise DatasetTypeExpressionError(
472 f"Invalid value '{item}' of type {type(item)} in dataset type expression; "
473 "expected str, re.Pattern, DatasetType objects, iterables thereof, or '...'."
474 )
475 values[name] = item
476 return cls(values, patterns=tuple(wildcard.patterns))
478 def __str__(self) -> str:
479 if self.patterns is ...:
480 return "..."
481 else:
482 terms = list(self.values.keys())
483 terms.extend(str(p) for p in self.patterns)
484 return "[{}]".format(", ".join(terms))