Coverage for python/lsst/daf/butler/script/pruneCollection.py: 15%

50 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-23 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23from dataclasses import dataclass 

24from typing import Callable, Dict, List, Optional, Union 

25 

26from astropy.table import Table 

27 

28from .._butler import ( 

29 Butler, 

30 PurgeUnsupportedPruneCollectionsError, 

31 PurgeWithoutUnstorePruneCollectionsError, 

32 RunWithoutPurgePruneCollectionsError, 

33) 

34from ..registry import CollectionType 

35from .queryDatasets import QueryDatasets 

36 

37 

38class PruneCollectionResult: 

39 def __init__(self, confirm: bool) -> None: 

40 # if `confirm == True`, will contain the astropy table describing data 

41 # that will be removed. 

42 self.removeTable: Union[None, Table] = None 

43 # the callback function to do the work 

44 self.onConfirmation: Union[None, Callable[[], None]] = None 

45 # true if the user should be shown what will be removed before pruning 

46 # the collection. 

47 self.confirm: bool = confirm 

48 

49 

50def pruneCollection( 

51 repo: str, collection: str, purge: bool, unstore: bool, unlink: List[str], confirm: bool 

52) -> Table: 

53 """Remove a collection and possibly prune datasets within it. 

54 

55 Parameters 

56 ---------- 

57 repo : `str` 

58 Same as the ``config`` argument to ``Butler.__init__`` 

59 collection : `str` 

60 Same as the ``name`` argument to ``Butler.pruneCollection``. 

61 purge : `bool`, optional 

62 Same as the ``purge`` argument to ``Butler.pruneCollection``. 

63 unstore: `bool`, optional 

64 Same as the ``unstore`` argument to ``Butler.pruneCollection``. 

65 unlink: `list` [`str`] 

66 Same as the ``unlink`` argument to ``Butler.pruneCollection``. 

67 confirm : `bool` 

68 If `True` will produce a table of collections that will be removed for 

69 display to the user. 

70 

71 Returns 

72 ------- 

73 collections : `astropy.table.Table` 

74 The table containing collections that will be removed, their type, and 

75 the number of datasets in the collection if applicable. 

76 """ 

77 

78 @dataclass 

79 class CollectionInfo: 

80 """Lightweight container to hold the type of collection and the number 

81 of datasets in the collection if applicable.""" 

82 

83 count: Optional[int] 

84 type: str 

85 

86 result = PruneCollectionResult(confirm) 

87 if confirm: 

88 print("Searching collections...") 

89 butler = Butler(repo) 

90 collectionNames = list( 

91 butler.registry.queryCollections( 

92 collectionTypes=frozenset( 

93 ( 

94 CollectionType.RUN, 

95 CollectionType.TAGGED, 

96 CollectionType.CHAINED, 

97 CollectionType.CALIBRATION, 

98 ) 

99 ), 

100 expression=(collection,), 

101 includeChains=True, 

102 ) 

103 ) 

104 

105 collections: Dict[str, CollectionInfo] = {} 

106 

107 def addCollection(name: str) -> None: 

108 """Add a collection to the collections, recursive if the collection 

109 being added can contain collections.""" 

110 collectionType = butler.registry.getCollectionType(name).name 

111 collections[name] = CollectionInfo(0 if collectionType == "RUN" else None, collectionType) 

112 if collectionType == "CHAINED": 

113 for c in butler.registry.getCollectionChain(name): 

114 addCollection(c) 

115 

116 for name in collectionNames: 

117 addCollection(name) 

118 

119 collections = {k: collections[k] for k in sorted(collections.keys())} 

120 

121 queryDatasets = QueryDatasets( 

122 repo=repo, 

123 glob=None, 

124 collections=[collection], 

125 where=None, 

126 find_first=True, 

127 show_uri=False, 

128 ) 

129 for datasetRef in queryDatasets.getDatasets(): 

130 collectionInfo = collections[datasetRef.run] 

131 if collectionInfo.count is None: 

132 raise RuntimeError(f"Unexpected dataset in collection of type {collectionInfo.type}") 

133 collectionInfo.count += 1 

134 

135 result.removeTable = Table( 

136 [ 

137 list(collections.keys()), 

138 [v.type for v in collections.values()], 

139 [v.count if v.count is not None else "-" for v in collections.values()], 

140 ], 

141 names=("Collection", "Collection Type", "Number of Datasets"), 

142 ) 

143 

144 def doRemove() -> None: 

145 """Perform the prune collection step.""" 

146 butler = Butler(repo, writeable=True) 

147 try: 

148 butler.pruneCollection(collection, purge, unstore, unlink) 

149 except PurgeWithoutUnstorePruneCollectionsError as e: 

150 raise TypeError("Cannot pass --purge without --unstore.") from e 

151 except RunWithoutPurgePruneCollectionsError as e: 

152 raise TypeError(f"Cannot prune RUN collection {e.collectionType.name} without --purge.") from e 

153 except PurgeUnsupportedPruneCollectionsError as e: 

154 raise TypeError( 

155 f"Cannot prune {e.collectionType} collection {e.collectionType.name} with --purge." 

156 ) from e 

157 

158 result.onConfirmation = doRemove 

159 return result