Coverage for python/lsst/daf/butler/script/removeRuns.py: 33%
47 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-30 02:19 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-30 02:19 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
23from collections import defaultdict
24from dataclasses import dataclass
25from functools import partial
26from typing import Callable, Dict, List, Mapping, Sequence, Tuple
28from .._butler import Butler
29from ..registry import CollectionType, MissingCollectionError
30from ..registry.queries import DatasetQueryResults
33@dataclass
34class RemoveRun:
35 """Represents a RUN collection to remove."""
37 # the name of the run:
38 name: str
39 # parent CHAINED collections the RUN belongs to:
40 parents: List[str]
43@dataclass
44class RemoveRunsResult:
45 """Container to return to the cli command.
47 Contains the names of runs that will be deleted, and a map of dataset type
48 to how many of that dataset will be deleted. Also contains the callback
49 function to execute the remove upon user confirmation.
50 """
52 # the callback function to do the removal
53 onConfirmation: Callable[[], None]
54 # list of the run collections that will be removed
55 runs: Sequence[RemoveRun]
56 # mapping of dataset type name to how many will be removed.
57 datasets: Mapping[str, int]
60def _getCollectionInfo(
61 repo: str,
62 collection: str,
63) -> Tuple[List[RemoveRun], Mapping[str, int]]:
64 """Get the names and types of collections that match the collection
65 string.
67 Parameters
68 ----------
69 repo : `str`
70 The URI to the repostiory.
71 collection : `str`
72 The collection string to search for. Same as the `expression`
73 argument to `registry.queryCollections`.
75 Returns
76 -------
77 runs : `list` of `RemoveRun`
78 Describes the runs that will be removed.
79 datasets : `dict` [`str`, `int`]
80 The dataset types and and how many will be removed.
81 """
82 butler = Butler(repo)
83 try:
84 collectionNames = list(
85 butler.registry.queryCollections(
86 collectionTypes=frozenset((CollectionType.RUN,)),
87 expression=collection,
88 includeChains=False,
89 )
90 )
91 except MissingCollectionError:
92 collectionNames = list()
93 runs = []
94 datasets: Dict[str, int] = defaultdict(int)
95 for collectionName in collectionNames:
96 assert butler.registry.getCollectionType(collectionName).name == "RUN"
97 parents = butler.registry.getCollectionParentChains(collectionName)
98 runs.append(RemoveRun(collectionName, list(parents)))
99 all_results = butler.registry.queryDatasets(..., collections=collectionName)
100 assert isinstance(all_results, DatasetQueryResults)
101 for r in all_results.byParentDatasetType():
102 if r.any(exact=False, execute=False):
103 datasets[r.parentDatasetType.name] += r.count(exact=False)
104 return runs, {k: datasets[k] for k in sorted(datasets.keys())}
107def removeRuns(
108 repo: str,
109 collection: str,
110) -> RemoveRunsResult:
111 """Remove collections.
113 Parameters
114 ----------
115 repo : `str`
116 Same as the ``config`` argument to ``Butler.__init__``
117 collection : `str`
118 Same as the ``name`` argument to ``Butler.pruneCollection``.
120 Returns
121 -------
122 collections : `RemoveRunsResult`
123 Contains information describing what will be removed.
124 """
125 runs, datasets = _getCollectionInfo(repo, collection)
127 def doRemove(runs: Sequence[RemoveRun]) -> None:
128 """Perform the remove step."""
129 butler = Butler(repo, writeable=True)
130 with butler.transaction():
131 for run in runs:
132 for parent in run.parents:
133 children = list(butler.registry.getCollectionChain(parent))
134 children.remove(run.name)
135 butler.registry.setCollectionChain(parent, children, flatten=False)
136 butler.removeRuns([r.name for r in runs], unstore=True)
138 result = RemoveRunsResult(
139 onConfirmation=partial(doRemove, runs),
140 runs=runs,
141 datasets=datasets,
142 )
143 return result