Coverage for python/lsst/daf/butler/script/pruneCollection.py: 14%

52 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-08 10:28 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24from collections.abc import Callable 

25from dataclasses import dataclass 

26 

27from astropy.table import Table 

28 

29from .._butler import ( 

30 Butler, 

31 PurgeUnsupportedPruneCollectionsError, 

32 PurgeWithoutUnstorePruneCollectionsError, 

33 RunWithoutPurgePruneCollectionsError, 

34) 

35from ..registry import CollectionType 

36from .queryDatasets import QueryDatasets 

37 

38 

39class PruneCollectionResult: 

40 def __init__(self, confirm: bool) -> None: 

41 # if `confirm == True`, will contain the astropy table describing data 

42 # that will be removed. 

43 self.removeTable: None | Table = None 

44 # the callback function to do the work 

45 self.onConfirmation: None | Callable[[], None] = None 

46 # true if the user should be shown what will be removed before pruning 

47 # the collection. 

48 self.confirm: bool = confirm 

49 

50 

51def pruneCollection( 

52 repo: str, collection: str, purge: bool, unstore: bool, unlink: list[str], confirm: bool 

53) -> Table: 

54 """Remove a collection and possibly prune datasets within it. 

55 

56 Parameters 

57 ---------- 

58 repo : `str` 

59 Same as the ``config`` argument to ``Butler.__init__`` 

60 collection : `str` 

61 Same as the ``name`` argument to ``Butler.pruneCollection``. 

62 purge : `bool`, optional 

63 Same as the ``purge`` argument to ``Butler.pruneCollection``. 

64 unstore: `bool`, optional 

65 Same as the ``unstore`` argument to ``Butler.pruneCollection``. 

66 unlink: `list` [`str`] 

67 Same as the ``unlink`` argument to ``Butler.pruneCollection``. 

68 confirm : `bool` 

69 If `True` will produce a table of collections that will be removed for 

70 display to the user. 

71 

72 Returns 

73 ------- 

74 collections : `astropy.table.Table` 

75 The table containing collections that will be removed, their type, and 

76 the number of datasets in the collection if applicable. 

77 """ 

78 

79 @dataclass 

80 class CollectionInfo: 

81 """Lightweight container to hold the type of collection and the number 

82 of datasets in the collection if applicable.""" 

83 

84 count: int | None 

85 type: str 

86 

87 result = PruneCollectionResult(confirm) 

88 if confirm: 

89 print("Searching collections...") 

90 butler = Butler(repo) 

91 collectionNames = list( 

92 butler.registry.queryCollections( 

93 collectionTypes=frozenset( 

94 ( 

95 CollectionType.RUN, 

96 CollectionType.TAGGED, 

97 CollectionType.CHAINED, 

98 CollectionType.CALIBRATION, 

99 ) 

100 ), 

101 expression=(collection,), 

102 includeChains=True, 

103 ) 

104 ) 

105 

106 collections: dict[str, CollectionInfo] = {} 

107 

108 def addCollection(name: str) -> None: 

109 """Add a collection to the collections, recursive if the collection 

110 being added can contain collections.""" 

111 collectionType = butler.registry.getCollectionType(name).name 

112 collections[name] = CollectionInfo(0 if collectionType == "RUN" else None, collectionType) 

113 if collectionType == "CHAINED": 

114 for c in butler.registry.getCollectionChain(name): 

115 addCollection(c) 

116 

117 for name in collectionNames: 

118 addCollection(name) 

119 

120 collections = {k: collections[k] for k in sorted(collections.keys())} 

121 

122 queryDatasets = QueryDatasets( 

123 repo=repo, 

124 glob=[], 

125 collections=[collection], 

126 where="", 

127 find_first=True, 

128 show_uri=False, 

129 ) 

130 for datasetRef in queryDatasets.getDatasets(): 

131 assert datasetRef.run is not None, "This must be a resolved dataset ref" 

132 collectionInfo = collections[datasetRef.run] 

133 if collectionInfo.count is None: 

134 raise RuntimeError(f"Unexpected dataset in collection of type {collectionInfo.type}") 

135 collectionInfo.count += 1 

136 

137 result.removeTable = Table( 

138 [ 

139 list(collections.keys()), 

140 [v.type for v in collections.values()], 

141 [v.count if v.count is not None else "-" for v in collections.values()], 

142 ], 

143 names=("Collection", "Collection Type", "Number of Datasets"), 

144 ) 

145 

146 def doRemove() -> None: 

147 """Perform the prune collection step.""" 

148 butler = Butler(repo, writeable=True) 

149 try: 

150 butler.pruneCollection(collection, purge, unstore, unlink) 

151 except PurgeWithoutUnstorePruneCollectionsError as e: 

152 raise TypeError("Cannot pass --purge without --unstore.") from e 

153 except RunWithoutPurgePruneCollectionsError as e: 

154 raise TypeError(f"Cannot prune RUN collection {e.collectionType.name} without --purge.") from e 

155 except PurgeUnsupportedPruneCollectionsError as e: 

156 raise TypeError( 

157 f"Cannot prune {e.collectionType} collection {e.collectionType.name} with --purge." 

158 ) from e 

159 

160 result.onConfirmation = doRemove 

161 return result