Coverage for python/lsst/daf/butler/script/removeRuns.py: 34%
48 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from collections import defaultdict
30from collections.abc import Callable, Mapping, Sequence
31from dataclasses import dataclass
32from functools import partial
34from .._butler import Butler
35from ..registry import CollectionType, MissingCollectionError
36from ..registry.queries import DatasetQueryResults
39@dataclass
40class RemoveRun:
41 """Represents a RUN collection to remove."""
43 # the name of the run:
44 name: str
45 # parent CHAINED collections the RUN belongs to:
46 parents: list[str]
49@dataclass
50class RemoveRunsResult:
51 """Container to return to the cli command.
53 Contains the names of runs that will be deleted, and a map of dataset type
54 to how many of that dataset will be deleted. Also contains the callback
55 function to execute the remove upon user confirmation.
56 """
58 # the callback function to do the removal
59 onConfirmation: Callable[[], None]
60 # list of the run collections that will be removed
61 runs: Sequence[RemoveRun]
62 # mapping of dataset type name to how many will be removed.
63 datasets: Mapping[str, int]
66def _getCollectionInfo(
67 repo: str,
68 collection: str,
69) -> tuple[list[RemoveRun], Mapping[str, int]]:
70 """Get the names and types of collections that match the collection
71 string.
73 Parameters
74 ----------
75 repo : `str`
76 The URI to the repository.
77 collection : `str`
78 The collection string to search for. Same as the `expression`
79 argument to `registry.queryCollections`.
81 Returns
82 -------
83 runs : `list` of `RemoveRun`
84 Describes the runs that will be removed.
85 datasets : `dict` [`str`, `int`]
86 The dataset types and and how many will be removed.
87 """
88 butler = Butler.from_config(repo)
89 try:
90 collectionNames = list(
91 butler.registry.queryCollections(
92 collectionTypes=frozenset((CollectionType.RUN,)),
93 expression=collection,
94 includeChains=False,
95 )
96 )
97 except MissingCollectionError:
98 collectionNames = []
99 runs = []
100 datasets: dict[str, int] = defaultdict(int)
101 for collectionName in collectionNames:
102 assert butler.registry.getCollectionType(collectionName).name == "RUN"
103 parents = butler.registry.getCollectionParentChains(collectionName)
104 runs.append(RemoveRun(collectionName, list(parents)))
105 all_results = butler.registry.queryDatasets(..., collections=collectionName)
106 assert isinstance(all_results, DatasetQueryResults)
107 for r in all_results.byParentDatasetType():
108 if r.any(exact=False, execute=False):
109 datasets[r.parentDatasetType.name] += r.count(exact=False)
110 return runs, {k: datasets[k] for k in sorted(datasets.keys())}
113def removeRuns(
114 repo: str,
115 collection: str,
116) -> RemoveRunsResult:
117 """Remove collections.
119 Parameters
120 ----------
121 repo : `str`
122 Same as the ``config`` argument to ``Butler.__init__``.
123 collection : `str`
124 Same as the ``name`` argument to ``Butler.removeRuns``.
126 Returns
127 -------
128 collections : `RemoveRunsResult`
129 Contains information describing what will be removed.
130 """
131 runs, datasets = _getCollectionInfo(repo, collection)
133 def _doRemove(runs: Sequence[RemoveRun]) -> None:
134 """Perform the remove step."""
135 butler = Butler.from_config(repo, writeable=True)
136 with butler.transaction():
137 for run in runs:
138 for parent in run.parents:
139 children = list(butler.registry.getCollectionChain(parent))
140 children.remove(run.name)
141 butler.registry.setCollectionChain(parent, children, flatten=False)
142 butler.removeRuns([r.name for r in runs], unstore=True)
144 result = RemoveRunsResult(
145 onConfirmation=partial(_doRemove, runs),
146 runs=runs,
147 datasets=datasets,
148 )
149 return result