Coverage for python/lsst/daf/butler/script/removeRuns.py: 39%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from collections import defaultdict
24from dataclasses import dataclass
25from functools import partial
26from typing import Callable, Dict, List, Mapping, Sequence, Tuple
28from .._butler import Butler
29from ..registry import CollectionType, MissingCollectionError
30from ..registry.queries import DatasetQueryResults
33@dataclass
34class RemoveRunsResult:
35 """Container to return to the cli command.
37 Contains the names of runs that will be deleted, and a map of dataset type
38 to how many of that dataset will be deleted. Also contains the callback
39 function to execute the remove upon user confirmation.
40 """
42 # the callback function to do the removal
43 onConfirmation: Callable[[], None]
44 # list of the run collections that will be removed
45 runs: Sequence[str]
46 # mapping of dataset type name to how many will be removed.
47 datasets: Mapping[str, int]
50def _getCollectionInfo(
51 repo: str,
52 collection: str,
53) -> Tuple[List[str], Mapping[str, int]]:
54 """Get the names and types of collections that match the collection
55 string.
57 Parameters
58 ----------
59 repo : `str`
60 The URI to the repostiory.
61 collection : `str`
62 The collection string to search for. Same as the `expression`
63 argument to `registry.queryCollections`.
65 Returns
66 -------
67 runs : `list` of `str`
68 The runs that will be removed.
69 datasets : `dict` [`str`, `int`]
70 The dataset types and and how many will be removed.
71 """
72 butler = Butler(repo)
73 try:
74 collectionNames = list(
75 butler.registry.queryCollections(
76 collectionTypes=frozenset((CollectionType.RUN,)),
77 expression=collection,
78 includeChains=False,
79 )
80 )
81 except MissingCollectionError:
82 collectionNames = list()
83 runs = []
84 datasets: Dict[str, int] = defaultdict(int)
85 for collectionName in collectionNames:
86 assert butler.registry.getCollectionType(collectionName).name == "RUN"
87 runs.append(collectionName)
88 all_results = butler.registry.queryDatasets(..., collections=collectionName)
89 assert isinstance(all_results, DatasetQueryResults)
90 for r in all_results.byParentDatasetType():
91 datasets[r.parentDatasetType.name] += r.count(exact=False)
92 return runs, datasets
95def removeRuns(
96 repo: str,
97 collection: str,
98) -> RemoveRunsResult:
99 """Remove collections.
101 Parameters
102 ----------
103 repo : `str`
104 Same as the ``config`` argument to ``Butler.__init__``
105 collection : `str`
106 Same as the ``name`` argument to ``Butler.pruneCollection``.
108 Returns
109 -------
110 collections : `RemoveRunsResult`
111 Contains information describing what will be removed.
112 """
113 runs, datasets = _getCollectionInfo(repo, collection)
115 def doRemove(runs: Sequence[str]) -> None:
116 """Perform the remove step."""
117 butler = Butler(repo, writeable=True)
118 butler.removeRuns(runs, unstore=True)
120 result = RemoveRunsResult(
121 onConfirmation=partial(doRemove, runs),
122 runs=runs,
123 datasets=datasets,
124 )
125 return result