Coverage for python/lsst/daf/butler/registry/wildcards.py: 25%
220 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CategorizedWildcard",
25 "CollectionWildcard",
26 "CollectionSearch",
27 "DatasetTypeWildcard",
28)
30import contextlib
31import dataclasses
32import re
33from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
34from types import EllipsisType
35from typing import Any
37from deprecated.sphinx import deprecated
38from lsst.daf.butler._compat import PYDANTIC_V2
39from lsst.utils.iteration import ensure_iterable
41from ..core import DatasetType
42from ..core.utils import globToRegex
43from ._exceptions import CollectionExpressionError, DatasetTypeExpressionError
46@dataclasses.dataclass
47class CategorizedWildcard:
48 """The results of preprocessing a wildcard expression to separate match
49 patterns from strings.
51 The `fromExpression` method should almost always be used to construct
52 instances, as the regular constructor performs no checking of inputs (and
53 that can lead to confusing error messages downstream).
54 """
56 @classmethod
57 def fromExpression(
58 cls,
59 expression: Any,
60 *,
61 allowAny: bool = True,
62 allowPatterns: bool = True,
63 coerceUnrecognized: Callable[[Any], tuple[str, Any] | str] | None = None,
64 coerceItemValue: Callable[[Any], Any] | None = None,
65 defaultItemValue: Any | None = None,
66 ) -> CategorizedWildcard | EllipsisType:
67 """Categorize a wildcard expression.
69 Parameters
70 ----------
71 expression
72 The expression to categorize. May be any of:
73 - `str` (including glob patterns if ``allowPatterns`` is `True`);
74 - `re.Pattern` (only if ``allowPatterns`` is `True`);
75 - objects recognized by ``coerceUnrecognized`` (if provided);
76 - two-element tuples of (`str`, value) where value is recognized
77 by ``coerceItemValue`` (if provided);
78 - a non-`str`, non-mapping iterable containing any of the above;
79 - the special value `...` (only if ``allowAny`` is `True`), which
80 matches anything;
81 - a mapping from `str` to a value are recognized by
82 ``coerceItemValue`` (if provided);
83 - a `CategorizedWildcard` instance (passed through unchanged if
84 it meets the requirements specified by keyword arguments).
85 allowAny: `bool`, optional
86 If `False` (`True` is default) raise `TypeError` if `...` is
87 encountered.
88 allowPatterns: `bool`, optional
89 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
90 is encountered, or if ``expression`` is a `CategorizedWildcard`
91 with `patterns` not empty.
92 coerceUnrecognized: `~collections.abc.Callable`, optional
93 A callback that takes a single argument of arbitrary type and
94 returns either a `str` - appended to `strings` - or a `tuple` of
95 (`str`, `Any`) to be appended to `items`. This will be called on
96 objects of unrecognized type. Exceptions will be reraised as
97 `TypeError` (and chained).
98 coerceItemValue: `~collections.abc.Callable`, optional
99 If provided, ``expression`` may be a mapping from `str` to any
100 type that can be passed to this function; the result of that call
101 will be stored instead as the value in ``self.items``.
102 defaultItemValue: `Any`, optional
103 If provided, combine this value with any string values encountered
104 (including any returned by ``coerceUnrecognized``) to form a
105 `tuple` and add it to `items`, guaranteeing that `strings` will be
106 empty. Patterns are never added to `items`.
108 Returns
109 -------
110 categorized : `CategorizedWildcard` or ``...``.
111 The struct describing the wildcard. ``...`` is passed through
112 unchanged.
114 Raises
115 ------
116 TypeError
117 Raised if an unsupported type is found in the expression.
118 """
119 assert expression is not None
120 # See if we were given ...; just return that if we were.
121 if expression is ...:
122 if not allowAny:
123 raise TypeError("This expression may not be unconstrained.")
124 return ...
125 if isinstance(expression, cls):
126 # This is already a CategorizedWildcard. Make sure it meets the
127 # reqs. implied by the kwargs we got.
128 if not allowPatterns and expression.patterns:
129 raise TypeError(
130 f"Regular expression(s) {expression.patterns} are not allowed in this context."
131 )
132 if defaultItemValue is not None and expression.strings:
133 if expression.items:
134 raise TypeError(
135 "Incompatible preprocessed expression: an ordered sequence of str is "
136 "needed, but the original order was lost in the preprocessing."
137 )
138 return cls(
139 strings=[],
140 patterns=expression.patterns,
141 items=[(k, defaultItemValue) for k in expression.strings],
142 )
143 elif defaultItemValue is None and expression.items:
144 if expression.strings:
145 raise TypeError(
146 "Incompatible preprocessed expression: an ordered sequence of items is "
147 "needed, but the original order was lost in the preprocessing."
148 )
149 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
150 else:
151 # Original expression was created with keyword arguments that
152 # were at least as restrictive as what we just got; pass it
153 # through.
154 return expression
156 # If we get here, we know we'll be creating a new instance.
157 # Initialize an empty one now.
158 self = cls(strings=[], patterns=[], items=[])
160 # If mappings are allowed, see if we were given a single mapping by
161 # trying to get items.
162 if coerceItemValue is not None:
163 rawItems = None
164 with contextlib.suppress(AttributeError):
165 rawItems = expression.items()
167 if rawItems is not None:
168 for k, v in rawItems:
169 try:
170 self.items.append((k, coerceItemValue(v)))
171 except Exception as err:
172 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
173 return self
175 # Not ..., a CategorizedWildcard instance, or a mapping. Just
176 # process scalars or an iterable. We put the body of the loop inside
177 # a local function so we can recurse after coercion.
179 def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None:
180 if isinstance(element, str):
181 if defaultItemValue is not None:
182 self.items.append((element, defaultItemValue))
183 return None
184 else:
185 # This returns a list but we know we only passed in
186 # single value.
187 converted = globToRegex(element)
188 if converted is ...:
189 return ...
190 element = converted[0]
191 # Let regex and ... go through to the next check
192 if isinstance(element, str):
193 self.strings.append(element)
194 return None
195 if allowPatterns and isinstance(element, re.Pattern):
196 self.patterns.append(element)
197 return None
198 if alreadyCoerced:
199 try:
200 k, v = element
201 except TypeError:
202 raise TypeError(
203 f"Object '{element!r}' returned by coercion function must be `str` or `tuple`."
204 ) from None
205 else:
206 self.items.append((k, v))
207 return None
208 if coerceItemValue is not None:
209 try:
210 k, v = element
211 except TypeError:
212 pass
213 else:
214 if not isinstance(k, str):
215 raise TypeError(f"Item key '{k}' is not a string.")
216 try:
217 v = coerceItemValue(v)
218 except Exception as err:
219 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'.") from err
220 self.items.append((k, v))
221 return None
222 if coerceUnrecognized is not None:
223 try:
224 # This should be safe but flake8 cant tell that the
225 # function will be re-declared next function call
226 process(coerceUnrecognized(element), alreadyCoerced=True) # noqa: F821
227 except Exception as err:
228 raise TypeError(f"Could not coerce expression element '{element!r}'.") from err
229 else:
230 extra = "."
231 if isinstance(element, re.Pattern):
232 extra = " and patterns are not allowed."
233 raise TypeError(f"Unsupported object in wildcard expression: '{element!r}'{extra}")
234 return None
236 for element in ensure_iterable(expression):
237 retval = process(element)
238 if retval is ...:
239 # One of the globs matched everything
240 if not allowAny:
241 raise TypeError("This expression may not be unconstrained.")
242 return ...
243 del process
244 return self
246 strings: list[str]
247 """Explicit string values found in the wildcard (`list` [ `str` ]).
248 """
250 patterns: list[re.Pattern]
251 """Regular expression patterns found in the wildcard
252 (`list` [ `re.Pattern` ]).
253 """
255 items: list[tuple[str, Any]]
256 """Two-item tuples that relate string values to other objects
257 (`list` [ `tuple` [ `str`, `Any` ] ]).
258 """
261if PYDANTIC_V2: 261 ↛ 262line 261 didn't jump to line 262, because the condition on line 261 was never true
262 from pydantic import RootModel # type: ignore
264 class _CollectionSearch(RootModel):
265 root: tuple[str, ...]
267else:
268 from pydantic import BaseModel
270 class _CollectionSearch(BaseModel, Sequence[str]): # type: ignore
271 __root__: tuple[str, ...]
273 @property
274 def root(self) -> tuple[str, ...]:
275 return self.__root__
278@deprecated(
279 reason="Tuples of string collection names are now preferred. Will be removed after v26.",
280 version="v25.0",
281 category=FutureWarning,
282)
283class CollectionSearch(_CollectionSearch):
284 """An ordered search path of collections.
286 The `fromExpression` method should almost always be used to construct
287 instances, as the regular constructor performs no checking of inputs (and
288 that can lead to confusing error messages downstream).
290 Parameters
291 ----------
292 collections : `tuple` [ `str` ]
293 Tuple of collection names, ordered from the first searched to the last
294 searched.
296 Notes
297 -----
298 A `CollectionSearch` is used to find a single dataset (or set of datasets
299 with different dataset types or data IDs) according to its dataset type and
300 data ID, giving preference to collections in the order in which they are
301 specified. A `CollectionWildcard` can be constructed from a broader range
302 of expressions but does not order the collections to be searched.
304 `CollectionSearch` is an immutable sequence of `str` collection names.
306 A `CollectionSearch` instance constructed properly (e.g. via
307 `fromExpression`) is a unique representation of a particular search path;
308 it is exactly the same internally and compares as equal to any
309 `CollectionSearch` constructed from an equivalent expression, regardless of
310 how different the original expressions appear.
311 """
313 @classmethod
314 def fromExpression(cls, expression: Any) -> CollectionSearch:
315 """Process a general expression to construct a `CollectionSearch`
316 instance.
318 Parameters
319 ----------
320 expression
321 May be:
322 - a `str` collection name;
323 - an iterable of `str` collection names;
324 - another `CollectionSearch` instance (passed through
325 unchanged).
327 Duplicate entries will be removed (preserving the first appearance
328 of each collection name).
330 Returns
331 -------
332 collections : `CollectionSearch`
333 A `CollectionSearch` instance.
334 """
335 # First see if this is already a CollectionSearch; just pass that
336 # through unchanged. This lets us standardize expressions (and turn
337 # single-pass iterators into multi-pass iterables) in advance and pass
338 # them down to other routines that accept arbitrary expressions.
339 if isinstance(expression, cls):
340 return expression
341 try:
342 wildcard = CategorizedWildcard.fromExpression(
343 expression,
344 allowAny=False,
345 allowPatterns=False,
346 )
347 except TypeError as err:
348 raise CollectionExpressionError(str(err)) from None
349 assert wildcard is not ...
350 assert not wildcard.patterns
351 assert not wildcard.items
352 deduplicated = []
353 for name in wildcard.strings:
354 if name not in deduplicated:
355 deduplicated.append(name)
356 if PYDANTIC_V2:
357 model = cls(tuple(deduplicated)) # type: ignore
358 else:
359 model = cls(__root__=tuple(deduplicated)) # type: ignore
360 return model
362 def explicitNames(self) -> Iterator[str]:
363 """Iterate over collection names that were specified explicitly."""
364 yield from self.root
366 def __iter__(self) -> Iterator[str]: # type: ignore
367 yield from self.root
369 def __len__(self) -> int:
370 return len(self.root)
372 def __getitem__(self, index: Any) -> str:
373 return self.root[index]
375 def __eq__(self, other: Any) -> bool:
376 if isinstance(other, CollectionSearch):
377 return self.root == other.root
378 return False
380 def __str__(self) -> str:
381 return "[{}]".format(", ".join(self))
383 def __repr__(self) -> str:
384 return f"CollectionSearch({self.root!r})"
387@dataclasses.dataclass(frozen=True)
388class CollectionWildcard:
389 """A validated wildcard for collection names.
391 The `from_expression` method should almost always be used to construct
392 instances, as the regular constructor performs no checking of inputs (and
393 that can lead to confusing error messages downstream).
395 Notes
396 -----
397 `CollectionWildcard` is expected to be rarely used outside of `Registry`
398 (which uses it to back several of its "query" methods that take general
399 expressions for collections), but it may occasionally be useful outside
400 `Registry` as a way to preprocess expressions that contain single-pass
401 iterators into a form that can be used to call those `Registry` methods
402 multiple times.
403 """
405 strings: tuple[str, ...] = ()
406 """An an ordered list of explicitly-named collections. (`tuple` [ `str` ]).
407 """
409 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
410 """Regular expression patterns to match against collection names, or the
411 special value ``...`` indicating all collections.
413 `...` must be accompanied by ``strings=()``.
414 """
416 def __post_init__(self) -> None:
417 if self.patterns is ... and self.strings:
418 raise ValueError(
419 f"Collection wildcard matches any string, but still has explicit strings {self.strings}."
420 )
422 @classmethod
423 def from_expression(cls, expression: Any, require_ordered: bool = False) -> CollectionWildcard:
424 """Process a general expression to construct a `CollectionWildcard`
425 instance.
427 Parameters
428 ----------
429 expression
430 May be:
431 - a `str` collection name;
432 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
433 against collection names;
434 - any iterable containing any of the above;
435 - another `CollectionWildcard` instance (passed through
436 unchanged).
438 Duplicate collection names will be removed (preserving the first
439 appearance of each collection name).
440 require_ordered : `bool`, optional
441 If `True` (`False` is default) require the expression to be
442 ordered, and raise `CollectionExpressionError` if it is not.
444 Returns
445 -------
446 wildcard : `CollectionWildcard`
447 A `CollectionWildcard` instance.
449 Raises
450 ------
451 CollectionExpressionError
452 Raised if the patterns has regular expression, glob patterns, or
453 the ``...`` wildcard, and ``require_ordered=True``.
454 """
455 if isinstance(expression, cls):
456 return expression
457 if expression is ...:
458 return cls()
459 wildcard = CategorizedWildcard.fromExpression(
460 expression,
461 allowAny=True,
462 allowPatterns=True,
463 )
464 if wildcard is ...:
465 return cls()
466 result = cls(
467 strings=tuple(wildcard.strings),
468 patterns=tuple(wildcard.patterns),
469 )
470 if require_ordered:
471 result.require_ordered()
472 return result
474 @classmethod
475 def from_names(cls, names: Iterable[str]) -> CollectionWildcard:
476 """Construct from an iterable of explicit collection names.
478 Parameters
479 ----------
480 names : `~collections.abc.Iterable` [ `str` ]
481 Iterable of collection names.
483 Returns
484 -------
485 wildcard : ~CollectionWildcard`
486 A `CollectionWildcard` instance. `require_ordered` is guaranteed
487 to succeed and return the given names in order.
488 """
489 return cls(strings=tuple(names), patterns=())
491 def require_ordered(self) -> tuple[str, ...]:
492 """Require that this wildcard contains no patterns, and return the
493 ordered tuple of names that it does hold.
495 Returns
496 -------
497 names : `tuple` [ `str` ]
498 Ordered tuple of collection names.
500 Raises
501 ------
502 CollectionExpressionError
503 Raised if the patterns has regular expression, glob patterns, or
504 the ``...`` wildcard.
505 """
506 if self.patterns:
507 raise CollectionExpressionError(
508 f"An ordered collection expression is required; got patterns {self.patterns}."
509 )
510 return self.strings
512 def empty(self) -> bool:
513 """Return true if both ``strings`` and ``patterns`` are empty."""
514 # bool(Ellipsis) is True
515 return not self.strings and not self.patterns
517 def __str__(self) -> str:
518 if self.patterns is ...:
519 return "..."
520 else:
521 terms = list(self.strings)
522 terms.extend(str(p) for p in self.patterns)
523 return "[{}]".format(", ".join(terms))
526@dataclasses.dataclass
527class DatasetTypeWildcard:
528 """A validated expression that resolves to one or more dataset types.
530 The `from_expression` method should almost always be used to construct
531 instances, as the regular constructor performs no checking of inputs (and
532 that can lead to confusing error messages downstream).
533 """
535 values: Mapping[str, DatasetType | None] = dataclasses.field(default_factory=dict)
536 """A mapping with `str` dataset type name keys and optional `DatasetType`
537 instances.
538 """
540 patterns: tuple[re.Pattern, ...] | EllipsisType = ...
541 """Regular expressions to be matched against dataset type names, or the
542 special value ``...`` indicating all dataset types.
544 Any pattern matching a dataset type is considered an overall match for
545 the expression.
546 """
548 @classmethod
549 def from_expression(cls, expression: Any) -> DatasetTypeWildcard:
550 """Construct an instance by analyzing the given expression.
552 Parameters
553 ----------
554 expression
555 Expression to analyze. May be any of the following:
557 - a `str` dataset type name;
558 - a `DatasetType` instance;
559 - a `re.Pattern` to match against dataset type names;
560 - an iterable whose elements may be any of the above (any dataset
561 type matching any element in the list is an overall match);
562 - an existing `DatasetTypeWildcard` instance;
563 - the special ``...`` ellipsis object, which matches any dataset
564 type.
566 Returns
567 -------
568 query : `DatasetTypeWildcard`
569 An instance of this class (new unless an existing instance was
570 passed in).
572 Raises
573 ------
574 DatasetTypeExpressionError
575 Raised if the given expression does not have one of the allowed
576 types.
577 """
578 if isinstance(expression, cls):
579 return expression
580 try:
581 wildcard = CategorizedWildcard.fromExpression(
582 expression, coerceUnrecognized=lambda d: (d.name, d)
583 )
584 except TypeError as err:
585 raise DatasetTypeExpressionError(f"Invalid dataset type expression: {expression!r}.") from err
586 if wildcard is ...:
587 return cls()
588 values: dict[str, DatasetType | None] = {}
589 for name in wildcard.strings:
590 values[name] = None
591 for name, item in wildcard.items:
592 if not isinstance(item, DatasetType):
593 raise DatasetTypeExpressionError(
594 f"Invalid value '{item}' of type {type(item)} in dataset type expression; "
595 "expected str, re.Pattern, DatasetType objects, iterables thereof, or '...'."
596 )
597 values[name] = item
598 return cls(values, patterns=tuple(wildcard.patterns))
600 def __str__(self) -> str:
601 if self.patterns is ...:
602 return "..."
603 else:
604 terms = list(self.values.keys())
605 terms.extend(str(p) for p in self.patterns)
606 return "[{}]".format(", ".join(terms))