Coverage for python/lsst/daf/butler/script/removeRuns.py: 34%

48 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27from __future__ import annotations 

28 

29from collections import defaultdict 

30from collections.abc import Callable, Mapping, Sequence 

31from dataclasses import dataclass 

32from functools import partial 

33 

34from .._butler import Butler 

35from ..registry import CollectionType, MissingCollectionError 

36from ..registry.queries import DatasetQueryResults 

37 

38 

39@dataclass 

40class RemoveRun: 

41 """Represents a RUN collection to remove.""" 

42 

43 # the name of the run: 

44 name: str 

45 # parent CHAINED collections the RUN belongs to: 

46 parents: list[str] 

47 

48 

49@dataclass 

50class RemoveRunsResult: 

51 """Container to return to the cli command. 

52 

53 Contains the names of runs that will be deleted, and a map of dataset type 

54 to how many of that dataset will be deleted. Also contains the callback 

55 function to execute the remove upon user confirmation. 

56 """ 

57 

58 # the callback function to do the removal 

59 onConfirmation: Callable[[], None] 

60 # list of the run collections that will be removed 

61 runs: Sequence[RemoveRun] 

62 # mapping of dataset type name to how many will be removed. 

63 datasets: Mapping[str, int] 

64 

65 

66def _getCollectionInfo( 

67 repo: str, 

68 collection: str, 

69) -> tuple[list[RemoveRun], Mapping[str, int]]: 

70 """Get the names and types of collections that match the collection 

71 string. 

72 

73 Parameters 

74 ---------- 

75 repo : `str` 

76 The URI to the repository. 

77 collection : `str` 

78 The collection string to search for. Same as the `expression` 

79 argument to `registry.queryCollections`. 

80 

81 Returns 

82 ------- 

83 runs : `list` of `RemoveRun` 

84 Describes the runs that will be removed. 

85 datasets : `dict` [`str`, `int`] 

86 The dataset types and and how many will be removed. 

87 """ 

88 butler = Butler.from_config(repo) 

89 try: 

90 collectionNames = list( 

91 butler.registry.queryCollections( 

92 collectionTypes=frozenset((CollectionType.RUN,)), 

93 expression=collection, 

94 includeChains=False, 

95 ) 

96 ) 

97 except MissingCollectionError: 

98 collectionNames = [] 

99 runs = [] 

100 datasets: dict[str, int] = defaultdict(int) 

101 for collectionName in collectionNames: 

102 assert butler.registry.getCollectionType(collectionName).name == "RUN" 

103 parents = butler.registry.getCollectionParentChains(collectionName) 

104 runs.append(RemoveRun(collectionName, list(parents))) 

105 all_results = butler.registry.queryDatasets(..., collections=collectionName) 

106 assert isinstance(all_results, DatasetQueryResults) 

107 for r in all_results.byParentDatasetType(): 

108 if r.any(exact=False, execute=False): 

109 datasets[r.parentDatasetType.name] += r.count(exact=False) 

110 return runs, {k: datasets[k] for k in sorted(datasets.keys())} 

111 

112 

113def removeRuns( 

114 repo: str, 

115 collection: str, 

116) -> RemoveRunsResult: 

117 """Remove collections. 

118 

119 Parameters 

120 ---------- 

121 repo : `str` 

122 Same as the ``config`` argument to ``Butler.__init__``. 

123 collection : `str` 

124 Same as the ``name`` argument to ``Butler.removeRuns``. 

125 

126 Returns 

127 ------- 

128 collections : `RemoveRunsResult` 

129 Contains information describing what will be removed. 

130 """ 

131 runs, datasets = _getCollectionInfo(repo, collection) 

132 

133 def _doRemove(runs: Sequence[RemoveRun]) -> None: 

134 """Perform the remove step.""" 

135 butler = Butler.from_config(repo, writeable=True) 

136 with butler.transaction(): 

137 for run in runs: 

138 for parent in run.parents: 

139 children = list(butler.registry.getCollectionChain(parent)) 

140 children.remove(run.name) 

141 butler.registry.setCollectionChain(parent, children, flatten=False) 

142 butler.removeRuns([r.name for r in runs], unstore=True) 

143 

144 result = RemoveRunsResult( 

145 onConfirmation=partial(_doRemove, runs), 

146 runs=runs, 

147 datasets=datasets, 

148 ) 

149 return result