Coverage for python/lsst/daf/butler/registry/wildcards.py: 22%

171 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29__all__ = ( 

30 "CategorizedWildcard", 

31 "CollectionWildcard", 

32 "DatasetTypeWildcard", 

33) 

34 

35import contextlib 

36import dataclasses 

37import re 

38from collections.abc import Callable, Iterable, Mapping 

39from types import EllipsisType 

40from typing import Any 

41 

42from lsst.utils.iteration import ensure_iterable 

43 

44from .._dataset_type import DatasetType 

45from ..utils import globToRegex 

46from ._exceptions import CollectionExpressionError, DatasetTypeExpressionError 

47 

48 

49@dataclasses.dataclass 

50class CategorizedWildcard: 

51 """The results of preprocessing a wildcard expression to separate match 

52 patterns from strings. 

53 

54 The `fromExpression` method should almost always be used to construct 

55 instances, as the regular constructor performs no checking of inputs (and 

56 that can lead to confusing error messages downstream). 

57 """ 

58 

59 @classmethod 

60 def fromExpression( 

61 cls, 

62 expression: Any, 

63 *, 

64 allowAny: bool = True, 

65 allowPatterns: bool = True, 

66 coerceUnrecognized: Callable[[Any], tuple[str, Any] | str] | None = None, 

67 coerceItemValue: Callable[[Any], Any] | None = None, 

68 defaultItemValue: Any | None = None, 

69 ) -> CategorizedWildcard | EllipsisType: 

70 """Categorize a wildcard expression. 

71 

72 Parameters 

73 ---------- 

74 expression : `~typing.Any` 

75 The expression to categorize. May be any of: 

76 

77 - `str` (including glob patterns if ``allowPatterns`` is `True`); 

78 - `re.Pattern` (only if ``allowPatterns`` is `True`); 

79 - objects recognized by ``coerceUnrecognized`` (if provided); 

80 - two-element tuples of (`str`, value) where value is recognized 

81 by ``coerceItemValue`` (if provided); 

82 - a non-`str`, non-mapping iterable containing any of the above; 

83 - the special value `...` (only if ``allowAny`` is `True`), which 

84 matches anything; 

85 - a mapping from `str` to a value are recognized by 

86 ``coerceItemValue`` (if provided); 

87 - a `CategorizedWildcard` instance (passed through unchanged if 

88 it meets the requirements specified by keyword arguments). 

89 allowAny : `bool`, optional 

90 If `False` (`True` is default) raise `TypeError` if `...` is 

91 encountered. 

92 allowPatterns : `bool`, optional 

93 If `False` (`True` is default) raise `TypeError` if a `re.Pattern` 

94 is encountered, or if ``expression`` is a `CategorizedWildcard` 

95 with `patterns` not empty. 

96 coerceUnrecognized : `~collections.abc.Callable`, optional 

97 A callback that takes a single argument of arbitrary type and 

98 returns either a `str` - appended to `strings` - or a `tuple` of 

99 (`str`, `Any`) to be appended to `items`. This will be called on 

100 objects of unrecognized type. Exceptions will be reraised as 

101 `TypeError` (and chained). 

102 coerceItemValue : `~collections.abc.Callable`, optional 

103 If provided, ``expression`` may be a mapping from `str` to any 

104 type that can be passed to this function; the result of that call 

105 will be stored instead as the value in ``self.items``. 

106 defaultItemValue : `Any`, optional 

107 If provided, combine this value with any string values encountered 

108 (including any returned by ``coerceUnrecognized``) to form a 

109 `tuple` and add it to `items`, guaranteeing that `strings` will be 

110 empty. Patterns are never added to `items`. 

111 

112 Returns 

113 ------- 

114 categorized : `CategorizedWildcard` or ``...``. 

115 The struct describing the wildcard. ``...`` is passed through 

116 unchanged. 

117 

118 Raises 

119 ------ 

120 TypeError 

121 Raised if an unsupported type is found in the expression. 

122 """ 

123 assert expression is not None 

124 # See if we were given ...; just return that if we were. 

125 if expression is ...: 

126 if not allowAny: 

127 raise TypeError("This expression may not be unconstrained.") 

128 return ... 

129 if isinstance(expression, cls): 

130 # This is already a CategorizedWildcard. Make sure it meets the 

131 # reqs. implied by the kwargs we got. 

132 if not allowPatterns and expression.patterns: 

133 raise TypeError( 

134 f"Regular expression(s) {expression.patterns} are not allowed in this context." 

135 ) 

136 if defaultItemValue is not None and expression.strings: 

137 if expression.items: 

138 raise TypeError( 

139 "Incompatible preprocessed expression: an ordered sequence of str is " 

140 "needed, but the original order was lost in the preprocessing." 

141 ) 

142 return cls( 

143 strings=[], 

144 patterns=expression.patterns, 

145 items=[(k, defaultItemValue) for k in expression.strings], 

146 ) 

147 elif defaultItemValue is None and expression.items: 

148 if expression.strings: 

149 raise TypeError( 

150 "Incompatible preprocessed expression: an ordered sequence of items is " 

151 "needed, but the original order was lost in the preprocessing." 

152 ) 

153 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[]) 

154 else: 

155 # Original expression was created with keyword arguments that 

156 # were at least as restrictive as what we just got; pass it 

157 # through. 

158 return expression 

159 

160 # If we get here, we know we'll be creating a new instance. 

161 # Initialize an empty one now. 

162 self = cls(strings=[], patterns=[], items=[]) 

163 

164 # If mappings are allowed, see if we were given a single mapping by 

165 # trying to get items. 

166 if coerceItemValue is not None: 

167 rawItems = None 

168 with contextlib.suppress(AttributeError): 

169 rawItems = expression.items() 

170 

171 if rawItems is not None: 

172 for k, v in rawItems: 

173 try: 

174 self.items.append((k, coerceItemValue(v))) 

175 except Exception as err: 

176 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err 

177 return self 

178 

179 # Not ..., a CategorizedWildcard instance, or a mapping. Just 

180 # process scalars or an iterable. We put the body of the loop inside 

181 # a local function so we can recurse after coercion. 

182 

183 def process(element: Any, alreadyCoerced: bool = False) -> EllipsisType | None: 

184 if isinstance(element, str): 

185 if defaultItemValue is not None: 

186 self.items.append((element, defaultItemValue)) 

187 return None 

188 else: 

189 # This returns a list but we know we only passed in 

190 # single value. 

191 converted = globToRegex(element) 

192 if converted is ...: 

193 return ... 

194 element = converted[0] 

195 # Let regex and ... go through to the next check 

196 if isinstance(element, str): 

197 self.strings.append(element) 

198 return None 

199 if allowPatterns and isinstance(element, re.Pattern): 

200 self.patterns.append(element) 

201 return None 

202 if alreadyCoerced: 

203 try: 

204 k, v = element 

205 except TypeError: 

206 raise TypeError( 

207 f"Object '{element!r}' returned by coercion function must be `str` or `tuple`." 

208 ) from None 

209 else: 

210 self.items.append((k, v)) 

211 return None 

212 if coerceItemValue is not None: 

213 try: 

214 k, v = element 

215 except TypeError: 

216 pass 

217 else: 

218 if not isinstance(k, str): 

219 raise TypeError(f"Item key '{k}' is not a string.") 

220 try: 

221 v = coerceItemValue(v) 

222 except Exception as err: 

223 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'.") from err 

224 self.items.append((k, v)) 

225 return None 

226 if coerceUnrecognized is not None: 

227 try: 

228 # This should be safe but flake8 cant tell that the 

229 # function will be re-declared next function call 

230 process(coerceUnrecognized(element), alreadyCoerced=True) # noqa: F821 

231 except Exception as err: 

232 raise TypeError(f"Could not coerce expression element '{element!r}'.") from err 

233 else: 

234 extra = "." 

235 if isinstance(element, re.Pattern): 

236 extra = " and patterns are not allowed." 

237 raise TypeError(f"Unsupported object in wildcard expression: '{element!r}'{extra}") 

238 return None 

239 

240 for element in ensure_iterable(expression): 

241 retval = process(element) 

242 if retval is ...: 

243 # One of the globs matched everything 

244 if not allowAny: 

245 raise TypeError("This expression may not be unconstrained.") 

246 return ... 

247 del process 

248 return self 

249 

250 strings: list[str] 

251 """Explicit string values found in the wildcard (`list` [ `str` ]). 

252 """ 

253 

254 patterns: list[re.Pattern] 

255 """Regular expression patterns found in the wildcard 

256 (`list` [ `re.Pattern` ]). 

257 """ 

258 

259 items: list[tuple[str, Any]] 

260 """Two-item tuples that relate string values to other objects 

261 (`list` [ `tuple` [ `str`, `Any` ] ]). 

262 """ 

263 

264 

265@dataclasses.dataclass(frozen=True) 

266class CollectionWildcard: 

267 """A validated wildcard for collection names. 

268 

269 The `from_expression` method should almost always be used to construct 

270 instances, as the regular constructor performs no checking of inputs (and 

271 that can lead to confusing error messages downstream). 

272 

273 Notes 

274 ----- 

275 `CollectionWildcard` is expected to be rarely used outside of `Registry` 

276 (which uses it to back several of its "query" methods that take general 

277 expressions for collections), but it may occasionally be useful outside 

278 `Registry` as a way to preprocess expressions that contain single-pass 

279 iterators into a form that can be used to call those `Registry` methods 

280 multiple times. 

281 """ 

282 

283 strings: tuple[str, ...] = () 

284 """An an ordered list of explicitly-named collections. (`tuple` [ `str` ]). 

285 """ 

286 

287 patterns: tuple[re.Pattern, ...] | EllipsisType = ... 

288 """Regular expression patterns to match against collection names, or the 

289 special value ``...`` indicating all collections. 

290 

291 `...` must be accompanied by ``strings=()``. 

292 """ 

293 

294 def __post_init__(self) -> None: 

295 if self.patterns is ... and self.strings: 

296 raise ValueError( 

297 f"Collection wildcard matches any string, but still has explicit strings {self.strings}." 

298 ) 

299 

300 @classmethod 

301 def from_expression(cls, expression: Any, require_ordered: bool = False) -> CollectionWildcard: 

302 """Process a general expression to construct a `CollectionWildcard` 

303 instance. 

304 

305 Parameters 

306 ---------- 

307 expression : `~typing.Any` 

308 May be: 

309 

310 - a `str` collection name; 

311 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`) 

312 against collection names; 

313 - any iterable containing any of the above; 

314 - another `CollectionWildcard` instance (passed through unchanged). 

315 

316 Duplicate collection names will be removed (preserving the first 

317 appearance of each collection name). 

318 require_ordered : `bool`, optional 

319 If `True` (`False` is default) require the expression to be 

320 ordered, and raise `CollectionExpressionError` if it is not. 

321 

322 Returns 

323 ------- 

324 wildcard : `CollectionWildcard` 

325 A `CollectionWildcard` instance. 

326 

327 Raises 

328 ------ 

329 CollectionExpressionError 

330 Raised if the patterns has regular expression, glob patterns, or 

331 the ``...`` wildcard, and ``require_ordered=True``. 

332 """ 

333 if isinstance(expression, cls): 

334 return expression 

335 if expression is ...: 

336 return cls() 

337 wildcard = CategorizedWildcard.fromExpression( 

338 expression, 

339 allowAny=True, 

340 allowPatterns=True, 

341 ) 

342 if wildcard is ...: 

343 return cls() 

344 result = cls( 

345 strings=tuple(wildcard.strings), 

346 patterns=tuple(wildcard.patterns), 

347 ) 

348 if require_ordered: 

349 result.require_ordered() 

350 return result 

351 

352 @classmethod 

353 def from_names(cls, names: Iterable[str]) -> CollectionWildcard: 

354 """Construct from an iterable of explicit collection names. 

355 

356 Parameters 

357 ---------- 

358 names : `~collections.abc.Iterable` [ `str` ] 

359 Iterable of collection names. 

360 

361 Returns 

362 ------- 

363 wildcard : ~CollectionWildcard` 

364 A `CollectionWildcard` instance. `require_ordered` is guaranteed 

365 to succeed and return the given names in order. 

366 """ 

367 return cls(strings=tuple(names), patterns=()) 

368 

369 def require_ordered(self) -> tuple[str, ...]: 

370 """Require that this wildcard contains no patterns, and return the 

371 ordered tuple of names that it does hold. 

372 

373 Returns 

374 ------- 

375 names : `tuple` [ `str` ] 

376 Ordered tuple of collection names. 

377 

378 Raises 

379 ------ 

380 CollectionExpressionError 

381 Raised if the patterns has regular expression, glob patterns, or 

382 the ``...`` wildcard. 

383 """ 

384 if self.patterns: 

385 raise CollectionExpressionError( 

386 f"An ordered collection expression is required; got patterns {self.patterns}." 

387 ) 

388 return self.strings 

389 

390 def empty(self) -> bool: 

391 """Return true if both ``strings`` and ``patterns`` are empty.""" 

392 # bool(Ellipsis) is True 

393 return not self.strings and not self.patterns 

394 

395 def __str__(self) -> str: 

396 if self.patterns is ...: 

397 return "..." 

398 else: 

399 terms = list(self.strings) 

400 terms.extend(str(p) for p in self.patterns) 

401 return "[{}]".format(", ".join(terms)) 

402 

403 

404@dataclasses.dataclass 

405class DatasetTypeWildcard: 

406 """A validated expression that resolves to one or more dataset types. 

407 

408 The `from_expression` method should almost always be used to construct 

409 instances, as the regular constructor performs no checking of inputs (and 

410 that can lead to confusing error messages downstream). 

411 """ 

412 

413 values: Mapping[str, DatasetType | None] = dataclasses.field(default_factory=dict) 

414 """A mapping with `str` dataset type name keys and optional `DatasetType` 

415 instances. 

416 """ 

417 

418 patterns: tuple[re.Pattern, ...] | EllipsisType = ... 

419 """Regular expressions to be matched against dataset type names, or the 

420 special value ``...`` indicating all dataset types. 

421 

422 Any pattern matching a dataset type is considered an overall match for 

423 the expression. 

424 """ 

425 

426 @classmethod 

427 def from_expression(cls, expression: Any) -> DatasetTypeWildcard: 

428 """Construct an instance by analyzing the given expression. 

429 

430 Parameters 

431 ---------- 

432 expression : `~typing.Any` 

433 Expression to analyze. May be any of the following: 

434 

435 - a `str` dataset type name; 

436 - a `DatasetType` instance; 

437 - a `re.Pattern` to match against dataset type names; 

438 - an iterable whose elements may be any of the above (any dataset 

439 type matching any element in the list is an overall match); 

440 - an existing `DatasetTypeWildcard` instance; 

441 - the special ``...`` ellipsis object, which matches any dataset 

442 type. 

443 

444 Returns 

445 ------- 

446 query : `DatasetTypeWildcard` 

447 An instance of this class (new unless an existing instance was 

448 passed in). 

449 

450 Raises 

451 ------ 

452 DatasetTypeExpressionError 

453 Raised if the given expression does not have one of the allowed 

454 types. 

455 """ 

456 if isinstance(expression, cls): 

457 return expression 

458 try: 

459 wildcard = CategorizedWildcard.fromExpression( 

460 expression, coerceUnrecognized=lambda d: (d.name, d) 

461 ) 

462 except TypeError as err: 

463 raise DatasetTypeExpressionError(f"Invalid dataset type expression: {expression!r}.") from err 

464 if wildcard is ...: 

465 return cls() 

466 values: dict[str, DatasetType | None] = {} 

467 for name in wildcard.strings: 

468 values[name] = None 

469 for name, item in wildcard.items: 

470 if not isinstance(item, DatasetType): 

471 raise DatasetTypeExpressionError( 

472 f"Invalid value '{item}' of type {type(item)} in dataset type expression; " 

473 "expected str, re.Pattern, DatasetType objects, iterables thereof, or '...'." 

474 ) 

475 values[name] = item 

476 return cls(values, patterns=tuple(wildcard.patterns)) 

477 

478 def __str__(self) -> str: 

479 if self.patterns is ...: 

480 return "..." 

481 else: 

482 terms = list(self.values.keys()) 

483 terms.extend(str(p) for p in self.patterns) 

484 return "[{}]".format(", ".join(terms))