Coverage for python/lsst/daf/butler/script/removeRuns.py: 35%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

36 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22 

23from collections import defaultdict 

24from dataclasses import dataclass 

25from functools import partial 

26from typing import Callable, Dict, List, Mapping, Sequence, Tuple 

27 

28from .._butler import Butler 

29from ..registry import CollectionType, MissingCollectionError 

30from ..registry.queries import DatasetQueryResults 

31 

32 

33@dataclass 

34class RemoveRunsResult: 

35 """Container to return to the cli command. 

36 

37 Contains the names of runs that will be deleted, and a map of dataset type 

38 to how many of that dataset will be deleted. Also contains the callback 

39 function to execute the remove upon user confirmation. 

40 """ 

41 

42 # the callback function to do the removal 

43 onConfirmation: Callable[[], None] 

44 # list of the run collections that will be removed 

45 runs: Sequence[str] 

46 # mapping of dataset type name to how many will be removed. 

47 datasets: Mapping[str, int] 

48 

49 

50def _getCollectionInfo( 

51 repo: str, 

52 collection: str, 

53) -> Tuple[List[str], Mapping[str, int]]: 

54 """Get the names and types of collections that match the collection 

55 string. 

56 

57 Parameters 

58 ---------- 

59 repo : `str` 

60 The URI to the repostiory. 

61 collection : `str` 

62 The collection string to search for. Same as the `expression` 

63 argument to `registry.queryCollections`. 

64 

65 Returns 

66 ------- 

67 runs : `list` of `str` 

68 The runs that will be removed. 

69 datasets : `dict` [`str`, `int`] 

70 The dataset types and and how many will be removed. 

71 """ 

72 butler = Butler(repo) 

73 try: 

74 collectionNames = list( 

75 butler.registry.queryCollections( 

76 collectionTypes=frozenset((CollectionType.RUN,)), 

77 expression=collection, 

78 includeChains=False, 

79 ) 

80 ) 

81 except MissingCollectionError: 

82 collectionNames = list() 

83 runs = [] 

84 datasets: Dict[str, int] = defaultdict(int) 

85 for collectionName in collectionNames: 

86 assert butler.registry.getCollectionType(collectionName).name == "RUN" 

87 runs.append(collectionName) 

88 all_results = butler.registry.queryDatasets(..., collections=collectionName) 

89 assert isinstance(all_results, DatasetQueryResults) 

90 for r in all_results.byParentDatasetType(): 

91 if r.any(exact=False, execute=False): 

92 datasets[r.parentDatasetType.name] += r.count(exact=False) 

93 return runs, {k: datasets[k] for k in sorted(datasets.keys())} 

94 

95 

96def removeRuns( 

97 repo: str, 

98 collection: str, 

99) -> RemoveRunsResult: 

100 """Remove collections. 

101 

102 Parameters 

103 ---------- 

104 repo : `str` 

105 Same as the ``config`` argument to ``Butler.__init__`` 

106 collection : `str` 

107 Same as the ``name`` argument to ``Butler.pruneCollection``. 

108 

109 Returns 

110 ------- 

111 collections : `RemoveRunsResult` 

112 Contains information describing what will be removed. 

113 """ 

114 runs, datasets = _getCollectionInfo(repo, collection) 

115 

116 def doRemove(runs: Sequence[str]) -> None: 

117 """Perform the remove step.""" 

118 butler = Butler(repo, writeable=True) 

119 butler.removeRuns(runs, unstore=True) 

120 

121 result = RemoveRunsResult( 

122 onConfirmation=partial(doRemove, runs), 

123 runs=runs, 

124 datasets=datasets, 

125 ) 

126 return result