Coverage for python/lsst/daf/butler/script/removeRuns.py: 34%

48 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-14 19:21 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23from collections import defaultdict 

24from collections.abc import Callable, Mapping, Sequence 

25from dataclasses import dataclass 

26from functools import partial 

27 

28from .._butler import Butler 

29from ..registry import CollectionType, MissingCollectionError 

30from ..registry.queries import DatasetQueryResults 

31 

32 

33@dataclass 

34class RemoveRun: 

35 """Represents a RUN collection to remove.""" 

36 

37 # the name of the run: 

38 name: str 

39 # parent CHAINED collections the RUN belongs to: 

40 parents: list[str] 

41 

42 

43@dataclass 

44class RemoveRunsResult: 

45 """Container to return to the cli command. 

46 

47 Contains the names of runs that will be deleted, and a map of dataset type 

48 to how many of that dataset will be deleted. Also contains the callback 

49 function to execute the remove upon user confirmation. 

50 """ 

51 

52 # the callback function to do the removal 

53 onConfirmation: Callable[[], None] 

54 # list of the run collections that will be removed 

55 runs: Sequence[RemoveRun] 

56 # mapping of dataset type name to how many will be removed. 

57 datasets: Mapping[str, int] 

58 

59 

60def _getCollectionInfo( 

61 repo: str, 

62 collection: str, 

63) -> tuple[list[RemoveRun], Mapping[str, int]]: 

64 """Get the names and types of collections that match the collection 

65 string. 

66 

67 Parameters 

68 ---------- 

69 repo : `str` 

70 The URI to the repository. 

71 collection : `str` 

72 The collection string to search for. Same as the `expression` 

73 argument to `registry.queryCollections`. 

74 

75 Returns 

76 ------- 

77 runs : `list` of `RemoveRun` 

78 Describes the runs that will be removed. 

79 datasets : `dict` [`str`, `int`] 

80 The dataset types and and how many will be removed. 

81 """ 

82 butler = Butler(repo) 

83 try: 

84 collectionNames = list( 

85 butler.registry.queryCollections( 

86 collectionTypes=frozenset((CollectionType.RUN,)), 

87 expression=collection, 

88 includeChains=False, 

89 ) 

90 ) 

91 except MissingCollectionError: 

92 collectionNames = list() 

93 runs = [] 

94 datasets: dict[str, int] = defaultdict(int) 

95 for collectionName in collectionNames: 

96 assert butler.registry.getCollectionType(collectionName).name == "RUN" 

97 parents = butler.registry.getCollectionParentChains(collectionName) 

98 runs.append(RemoveRun(collectionName, list(parents))) 

99 all_results = butler.registry.queryDatasets(..., collections=collectionName) 

100 assert isinstance(all_results, DatasetQueryResults) 

101 for r in all_results.byParentDatasetType(): 

102 if r.any(exact=False, execute=False): 

103 datasets[r.parentDatasetType.name] += r.count(exact=False) 

104 return runs, {k: datasets[k] for k in sorted(datasets.keys())} 

105 

106 

107def removeRuns( 

108 repo: str, 

109 collection: str, 

110) -> RemoveRunsResult: 

111 """Remove collections. 

112 

113 Parameters 

114 ---------- 

115 repo : `str` 

116 Same as the ``config`` argument to ``Butler.__init__`` 

117 collection : `str` 

118 Same as the ``name`` argument to ``Butler.removeRuns``. 

119 

120 Returns 

121 ------- 

122 collections : `RemoveRunsResult` 

123 Contains information describing what will be removed. 

124 """ 

125 runs, datasets = _getCollectionInfo(repo, collection) 

126 

127 def doRemove(runs: Sequence[RemoveRun]) -> None: 

128 """Perform the remove step.""" 

129 butler = Butler(repo, writeable=True) 

130 with butler.transaction(): 

131 for run in runs: 

132 for parent in run.parents: 

133 children = list(butler.registry.getCollectionChain(parent)) 

134 children.remove(run.name) 

135 butler.registry.setCollectionChain(parent, children, flatten=False) 

136 butler.removeRuns([r.name for r in runs], unstore=True) 

137 

138 result = RemoveRunsResult( 

139 onConfirmation=partial(doRemove, runs), 

140 runs=runs, 

141 datasets=datasets, 

142 ) 

143 return result