Coverage for python/lsst/daf/butler/registry/wildcards.py: 21%
209 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 09:33 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-17 09:33 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CategorizedWildcard",
25 "CollectionWildcard",
26 "CollectionSearch",
27 "DatasetTypeWildcard",
28)
30import dataclasses
31import re
32from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
33from typing import Any
35from deprecated.sphinx import deprecated
36from lsst.utils.ellipsis import Ellipsis, EllipsisType
37from lsst.utils.iteration import ensure_iterable
38from pydantic import BaseModel
40from ..core import DatasetType
41from ..core.utils import globToRegex
42from ._exceptions import CollectionExpressionError, DatasetTypeExpressionError
45@dataclasses.dataclass
46class CategorizedWildcard:
47 """The results of preprocessing a wildcard expression to separate match
48 patterns from strings.
50 The `fromExpression` method should almost always be used to construct
51 instances, as the regular constructor performs no checking of inputs (and
52 that can lead to confusing error messages downstream).
53 """
55 @classmethod
56 def fromExpression(
57 cls,
58 expression: Any,
59 *,
60 allowAny: bool = True,
61 allowPatterns: bool = True,
62 coerceUnrecognized: Callable[[Any], tuple[str, Any] | str] | None = None,
63 coerceItemValue: Callable[[Any], Any] | None = None,
64 defaultItemValue: Any | None = None,
65 ) -> CategorizedWildcard | EllipsisType:
66 """Categorize a wildcard expression.
68 Parameters
69 ----------
70 expression
71 The expression to categorize. May be any of:
72 - `str` (including glob patterns if ``allowPatterns`` is `True`);
73 - `re.Pattern` (only if ``allowPatterns`` is `True`);
74 - objects recognized by ``coerceUnrecognized`` (if provided);
75 - two-element tuples of (`str`, value) where value is recognized
76 by ``coerceItemValue`` (if provided);
77 - a non-`str`, non-mapping iterable containing any of the above;
78 - the special value `...` (only if ``allowAny`` is `True`), which
79 matches anything;
80 - a mapping from `str` to a value are recognized by
81 ``coerceItemValue`` (if provided);
82 - a `CategorizedWildcard` instance (passed through unchanged if
83 it meets the requirements specified by keyword arguments).
84 allowAny: `bool`, optional
85 If `False` (`True` is default) raise `TypeError` if `...` is
86 encountered.
87 allowPatterns: `bool`, optional
88 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
89 is encountered, or if ``expression`` is a `CategorizedWildcard`
90 with `patterns` not empty.
91 coerceUnrecognized: `Callable`, optional
92 A callback that takes a single argument of arbitrary type and
93 returns either a `str` - appended to `strings` - or a `tuple` of
94 (`str`, `Any`) to be appended to `items`. This will be called on
95 objects of unrecognized type. Exceptions will be reraised as
96 `TypeError` (and chained).
97 coerceItemValue: `Callable`, optional
98 If provided, ``expression`` may be a mapping from `str` to any
99 type that can be passed to this function; the result of that call
100 will be stored instead as the value in ``self.items``.
101 defaultItemValue: `Any`, optional
102 If provided, combine this value with any string values encountered
103 (including any returned by ``coerceUnrecognized``) to form a
104 `tuple` and add it to `items`, guaranteeing that `strings` will be
105 empty. Patterns are never added to `items`.
107 Returns
108 -------
109 categorized : `CategorizedWildcard` or ``...``.
110 The struct describing the wildcard. ``...`` is passed through
111 unchanged.
113 Raises
114 ------
115 TypeError
116 Raised if an unsupported type is found in the expression.
117 """
118 assert expression is not None
119 # See if we were given ...; just return that if we were.
120 if expression is Ellipsis:
121 if not allowAny:
122 raise TypeError("This expression may not be unconstrained.")
123 return Ellipsis
124 if isinstance(expression, cls):
125 # This is already a CategorizedWildcard. Make sure it meets the
126 # reqs. implied by the kwargs we got.
127 if not allowPatterns and expression.patterns:
128 raise TypeError(
129 f"Regular expression(s) {expression.patterns} are not allowed in this context."
130 )
131 if defaultItemValue is not None and expression.strings:
132 if expression.items:
133 raise TypeError(
134 "Incompatible preprocessed expression: an ordered sequence of str is "
135 "needed, but the original order was lost in the preprocessing."
136 )
137 return cls(
138 strings=[],
139 patterns=expression.patterns,
140 items=[(k, defaultItemValue) for k in expression.strings],
141 )
142 elif defaultItemValue is None and expression.items:
143 if expression.strings:
144 raise TypeError(
145 "Incompatible preprocessed expression: an ordered sequence of items is "
146 "needed, but the original order was lost in the preprocessing."
147 )
148 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
149 else:
150 # Original expression was created with keyword arguments that
151 # were at least as restrictive as what we just got; pass it
152 # through.
153 return expression
155 # If we get here, we know we'll be creating a new instance.
156 # Initialize an empty one now.
157 self = cls(strings=[], patterns=[], items=[])
159 # If mappings are allowed, see if we were given a single mapping by
160 # trying to get items.
161 if coerceItemValue is not None:
162 rawItems = None
163 try:
164 rawItems = expression.items()
165 except AttributeError:
166 pass
167 if rawItems is not None:
168 for k, v in rawItems:
169 try:
170 self.items.append((k, coerceItemValue(v)))
171 except Exception as err:
172 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
173 return self
175 # Not ..., a CategorizedWildcard instance, or a mapping. Just
176 # process scalars or an iterable. We put the body of the loop inside
177 # a local function so we can recurse after coercion.
179 def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None:
180 if isinstance(element, str):
181 if defaultItemValue is not None:
182 self.items.append((element, defaultItemValue))
183 return None
184 else:
185 # This returns a list but we know we only passed in
186 # single value.
187 converted = globToRegex(element)
188 if converted is Ellipsis:
189 return Ellipsis
190 element = converted[0]
191 # Let regex and ... go through to the next check
192 if isinstance(element, str):
193 self.strings.append(element)
194 return None
195 if allowPatterns and isinstance(element, re.Pattern):
196 self.patterns.append(element)
197 return None
198 if alreadyCoerced:
199 try:
200 k, v = element
201 except TypeError:
202 raise TypeError(
203 f"Object '{element!r}' returned by coercion function must be `str` or `tuple`."
204 ) from None
205 else:
206 self.items.append((k, v))
207 return None
208 if coerceItemValue is not None:
209 try:
210 k, v = element
211 except TypeError:
212 pass
213 else:
214 if not isinstance(k, str):
215 raise TypeError(f"Item key '{k}' is not a string.")
216 try:
217 v = coerceItemValue(v)
218 except Exception as err:
219 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'.") from err
220 self.items.append((k, v))
221 return None
222 if coerceUnrecognized is not None:
223 try:
224 # This should be safe but flake8 cant tell that the
225 # function will be re-declared next function call
226 process(coerceUnrecognized(element), alreadyCoerced=True) # noqa: F821
227 except Exception as err:
228 raise TypeError(f"Could not coerce expression element '{element!r}'.") from err
229 else:
230 extra = "."
231 if isinstance(element, re.Pattern):
232 extra = " and patterns are not allowed."
233 raise TypeError(f"Unsupported object in wildcard expression: '{element!r}'{extra}")
234 return None
236 for element in ensure_iterable(expression):
237 retval = process(element)
238 if retval is Ellipsis:
239 # One of the globs matched everything
240 if not allowAny:
241 raise TypeError("This expression may not be unconstrained.")
242 return Ellipsis
243 del process
244 return self
246 strings: list[str]
247 """Explicit string values found in the wildcard (`list` [ `str` ]).
248 """
250 patterns: list[re.Pattern]
251 """Regular expression patterns found in the wildcard
252 (`list` [ `re.Pattern` ]).
253 """
255 items: list[tuple[str, Any]]
256 """Two-item tuples that relate string values to other objects
257 (`list` [ `tuple` [ `str`, `Any` ] ]).
258 """
261@deprecated(
262 reason="Tuples of string collection names are now preferred. Will be removed after v26.",
263 version="v25.0",
264 category=FutureWarning,
265)
266class CollectionSearch(BaseModel, Sequence[str]):
267 """An ordered search path of collections.
269 The `fromExpression` method should almost always be used to construct
270 instances, as the regular constructor performs no checking of inputs (and
271 that can lead to confusing error messages downstream).
273 Parameters
274 ----------
275 collections : `tuple` [ `str` ]
276 Tuple of collection names, ordered from the first searched to the last
277 searched.
279 Notes
280 -----
281 A `CollectionSearch` is used to find a single dataset (or set of datasets
282 with different dataset types or data IDs) according to its dataset type and
283 data ID, giving preference to collections in the order in which they are
284 specified. A `CollectionWildcard` can be constructed from a broader range
285 of expressions but does not order the collections to be searched.
287 `CollectionSearch` is an immutable sequence of `str` collection names.
289 A `CollectionSearch` instance constructed properly (e.g. via
290 `fromExpression`) is a unique representation of a particular search path;
291 it is exactly the same internally and compares as equal to any
292 `CollectionSearch` constructed from an equivalent expression, regardless of
293 how different the original expressions appear.
294 """
296 __root__: tuple[str, ...]
298 @classmethod
299 def fromExpression(cls, expression: Any) -> CollectionSearch:
300 """Process a general expression to construct a `CollectionSearch`
301 instance.
303 Parameters
304 ----------
305 expression
306 May be:
307 - a `str` collection name;
308 - an iterable of `str` collection names;
309 - another `CollectionSearch` instance (passed through
310 unchanged).
312 Duplicate entries will be removed (preserving the first appearance
313 of each collection name).
315 Returns
316 -------
317 collections : `CollectionSearch`
318 A `CollectionSearch` instance.
319 """
320 # First see if this is already a CollectionSearch; just pass that
321 # through unchanged. This lets us standardize expressions (and turn
322 # single-pass iterators into multi-pass iterables) in advance and pass
323 # them down to other routines that accept arbitrary expressions.
324 if isinstance(expression, cls):
325 return expression
326 try:
327 wildcard = CategorizedWildcard.fromExpression(
328 expression,
329 allowAny=False,
330 allowPatterns=False,
331 )
332 except TypeError as err:
333 raise CollectionExpressionError(str(err)) from None
334 assert wildcard is not Ellipsis
335 assert not wildcard.patterns
336 assert not wildcard.items
337 deduplicated = []
338 for name in wildcard.strings:
339 if name not in deduplicated:
340 deduplicated.append(name)
341 return cls(__root__=tuple(deduplicated))
343 def explicitNames(self) -> Iterator[str]:
344 """Iterate over collection names that were specified explicitly."""
345 yield from self.__root__
347 def __iter__(self) -> Iterator[str]: # type: ignore
348 yield from self.__root__
350 def __len__(self) -> int:
351 return len(self.__root__)
353 def __getitem__(self, index: Any) -> str:
354 return self.__root__[index]
356 def __eq__(self, other: Any) -> bool:
357 if isinstance(other, CollectionSearch):
358 return self.__root__ == other.__root__
359 return False
361 def __str__(self) -> str:
362 return "[{}]".format(", ".join(self))
364 def __repr__(self) -> str:
365 return f"CollectionSearch({self.__root__!r})"
368@dataclasses.dataclass(frozen=True)
369class CollectionWildcard:
370 """A validated wildcard for collection names
372 The `from_expression` method should almost always be used to construct
373 instances, as the regular constructor performs no checking of inputs (and
374 that can lead to confusing error messages downstream).
376 Notes
377 -----
378 `CollectionWildcard` is expected to be rarely used outside of `Registry`
379 (which uses it to back several of its "query" methods that take general
380 expressions for collections), but it may occasionally be useful outside
381 `Registry` as a way to preprocess expressions that contain single-pass
382 iterators into a form that can be used to call those `Registry` methods
383 multiple times.
384 """
386 strings: tuple[str, ...] = ()
387 """An an ordered list of explicitly-named collections. (`tuple` [ `str` ]).
388 """
390 patterns: tuple[re.Pattern, ...] | EllipsisType = Ellipsis
391 """Regular expression patterns to match against collection names, or the
392 special value ``...`` indicating all collections.
394 `...` must be accompanied by ``strings=()``.
395 """
397 def __post_init__(self) -> None:
398 if self.patterns is Ellipsis and self.strings:
399 raise ValueError(
400 f"Collection wildcard matches any string, but still has explicit strings {self.strings}."
401 )
403 @classmethod
404 def from_expression(cls, expression: Any, require_ordered: bool = False) -> CollectionWildcard:
405 """Process a general expression to construct a `CollectionWildcard`
406 instance.
408 Parameters
409 ----------
410 expression
411 May be:
412 - a `str` collection name;
413 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
414 against collection names;
415 - any iterable containing any of the above;
416 - another `CollectionWildcard` instance (passed through
417 unchanged).
419 Duplicate collection names will be removed (preserving the first
420 appearance of each collection name).
421 require_ordered : `bool`, optional
422 If `True` (`False` is default) require the expression to be
423 ordered, and raise `CollectionExpressionError` if it is not.
425 Returns
426 -------
427 wildcard : `CollectionWildcard`
428 A `CollectionWildcard` instance.
430 Raises
431 ------
432 CollectionExpressionError
433 Raised if the patterns has regular expression, glob patterns, or
434 the ``...`` wildcard, and ``require_ordered=True``.
435 """
436 if isinstance(expression, cls):
437 return expression
438 if expression is Ellipsis:
439 return cls()
440 wildcard = CategorizedWildcard.fromExpression(
441 expression,
442 allowAny=True,
443 allowPatterns=True,
444 )
445 if wildcard is Ellipsis:
446 return cls()
447 result = cls(
448 strings=tuple(wildcard.strings),
449 patterns=tuple(wildcard.patterns),
450 )
451 if require_ordered:
452 result.require_ordered()
453 return result
455 @classmethod
456 def from_names(cls, names: Iterable[str]) -> CollectionWildcard:
457 """Construct from an iterable of explicit collection names.
459 Parameters
460 ----------
461 names : `Iterable` [ `str` ]
462 Iterable of collection names.
464 Returns
465 -------
466 wildcard : ~CollectionWildcard`
467 A `CollectionWildcard` instance. `require_ordered` is guaranteed
468 to succeed and return the given names in order.
469 """
470 return cls(strings=tuple(names), patterns=())
472 def require_ordered(self) -> tuple[str, ...]:
473 """Require that this wildcard contains no patterns, and return the
474 ordered tuple of names that it does hold.
476 Returns
477 -------
478 names : `tuple` [ `str` ]
479 Ordered tuple of collection names.
481 Raises
482 ------
483 CollectionExpressionError
484 Raised if the patterns has regular expression, glob patterns, or
485 the ``...`` wildcard.
486 """
487 if self.patterns:
488 raise CollectionExpressionError(
489 f"An ordered collection expression is required; got patterns {self.patterns}."
490 )
491 return self.strings
493 def empty(self) -> bool:
494 """Return true if both ``strings`` and ``patterns`` are empty."""
495 # bool(Ellipsis) is True
496 return not self.strings and not self.patterns
498 def __str__(self) -> str:
499 if self.patterns is Ellipsis:
500 return "..."
501 else:
502 terms = list(self.strings)
503 terms.extend(str(p) for p in self.patterns)
504 return "[{}]".format(", ".join(terms))
507@dataclasses.dataclass
508class DatasetTypeWildcard:
509 """A validated expression that resolves to one or more dataset types.
511 The `from_expression` method should almost always be used to construct
512 instances, as the regular constructor performs no checking of inputs (and
513 that can lead to confusing error messages downstream).
514 """
516 values: Mapping[str, DatasetType | None] = dataclasses.field(default_factory=dict)
517 """A mapping with `str` dataset type name keys and optional `DatasetType`
518 instances.
519 """
521 patterns: tuple[re.Pattern, ...] | EllipsisType = Ellipsis
522 """Regular expressions to be matched against dataset type names, or the
523 special value ``...`` indicating all dataset types.
525 Any pattern matching a dataset type is considered an overall match for
526 the expression.
527 """
529 @classmethod
530 def from_expression(cls, expression: Any) -> DatasetTypeWildcard:
531 """Construct an instance by analyzing the given expression.
533 Parameters
534 ----------
535 expression
536 Expression to analyze. May be any of the following:
538 - a `str` dataset type name;
539 - a `DatasetType` instance;
540 - a `re.Pattern` to match against dataset type names;
541 - an iterable whose elements may be any of the above (any dataset
542 type matching any element in the list is an overall match);
543 - an existing `DatasetTypeWildcard` instance;
544 - the special ``...`` ellipsis object, which matches any dataset
545 type.
547 Returns
548 -------
549 query : `DatasetTypeWildcard`
550 An instance of this class (new unless an existing instance was
551 passed in).
553 Raises
554 ------
555 DatasetTypeExpressionError
556 Raised if the given expression does not have one of the allowed
557 types.
558 """
559 if isinstance(expression, cls):
560 return expression
561 try:
562 wildcard = CategorizedWildcard.fromExpression(
563 expression, coerceUnrecognized=lambda d: (d.name, d)
564 )
565 except TypeError as err:
566 raise DatasetTypeExpressionError(f"Invalid dataset type expression: {expression!r}.") from err
567 if wildcard is Ellipsis:
568 return cls()
569 values: dict[str, DatasetType | None] = {}
570 for name in wildcard.strings:
571 values[name] = None
572 for name, item in wildcard.items:
573 if not isinstance(item, DatasetType):
574 raise DatasetTypeExpressionError(
575 f"Invalid value '{item}' of type {type(item)} in dataset type expression; "
576 "expected str, re.Pattern, DatasetType objects, iterables thereof, or '...'."
577 )
578 values[name] = item
579 return cls(values, patterns=tuple(wildcard.patterns))
581 def __str__(self) -> str:
582 if self.patterns is Ellipsis:
583 return "..."
584 else:
585 terms = list(self.values.keys())
586 terms.extend(str(p) for p in self.patterns)
587 return "[{}]".format(", ".join(terms))