Coverage for python/lsst/obs/base/gen2to3/repoWalker/walker.py : 34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# This file is part of obs_base. # # Developed for the LSST Data Management System. # This product includes software developed by the LSST Project # (http://www.lsst.org). # See the COPYRIGHT file at the top-level directory of this distribution # for details of code ownership. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. by this package. """
Callable, ClassVar, Dict, Iterable, List, Mapping, Optional, Union, )
DataCoordinate, DatasetType, FileDataset, )
"""An object that recursively walks a Gen2 data repository tree, extracting Gen3 `FileDataset` objects and warning about unrecognized or unconvertable Gen2 datasets.
Parameters ---------- inputs : `~collections.abc.Iterable` of `Target` or `Skip` Structs that indicate dataset types to be extracted (`Target`) or explicitly skipped (`Skip`). Skips may include a warning message to log when matching entries are encountered. fileIgnoreRegEx : `re.Pattern`, optional A regular expression pattern that identifies non-dataset files that can be ignored, to be applied at all levels of the directory tree. dirIgnoreRegEx : `re.Pattern`, optional A regular expression pattern that identifies non-dataset subdirectories that can be ignored, to be applied at all levels of the directory tree. """ fileIgnoreRegEx: Optional[re.Pattern] = None, dirIgnoreRegEx: Optional[re.Pattern] = None): super().__init__() tree = BuilderTree() allKeys: Dict[str, type] = {} for leaf in inputs: tree.insert(0, leaf) for key, dtype in leaf.keys.items(): if allKeys.setdefault(key, dtype) != dtype: raise ValueError(f"Multiple types for key '{key}': {dtype} " f"(from {leaf.template}) vs. {allKeys[key]}.") tree, messages, pruned = tree.prune() if pruned: raise RuntimeError(f"Nothing to search for after pruning skipped datasets:" f" {'; '.join(messages)}.") self._scanner = DirectoryScanner() tree.fill(self._scanner, allKeys, {}, fileIgnoreRegEx=fileIgnoreRegEx, dirIgnoreRegEx=dirIgnoreRegEx)
"""An input struct type whose instances represent a dataset type to be extracted (`type`). """
"""An input struct type whose instances represent a dataset type to be explicitly skipped. """
) -> Mapping[DatasetType, List[FileDataset]]: """Walk a Gen2 repository root to extract Gen3 `FileDataset` instances from it.
Parameters ---------- root : `str` Absolute path to the repository root. log : `Log` Logger for warnings and diagnostic information. predicate : `~collections.abc.Callable`, optional If not `None`, a callable that returns `True` if a `DataCoordinate` is consistent with what we want to extract. If ``predicate`` returns `False`, the file or directory that data ID was extracted from will not be processed, even if it includes target dataset types.
Returns ------- datasets : `defaultdict` [`DatasetType`, `list`[`FileDataset`]] Extracted datasets, grouped by Gen3 `DatasetType`. """ if predicate is None: def predicate(dataId: DataCoordinate) -> bool: return True datasets = defaultdict(list) self._scanner.scan(root, datasets, log=log, predicate=predicate) return datasets |