Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""POSIX datastore.""" 

25 

26__all__ = ("PosixDatastore", ) 

27 

28import logging 

29import os 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 ClassVar, 

34 Optional, 

35 Union 

36) 

37 

38from .fileLikeDatastore import FileLikeDatastore 

39from lsst.daf.butler.core.utils import safeMakeDir 

40from lsst.daf.butler import StoredFileInfo, DatasetRef 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from .fileLikeDatastore import DatastoreFileGetInformation 

44 from lsst.daf.butler import DatastoreConfig 

45 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager 

46 

47log = logging.getLogger(__name__) 

48 

49 

50class PosixDatastore(FileLikeDatastore): 

51 """Basic POSIX filesystem backed Datastore. 

52 

53 Parameters 

54 ---------- 

55 config : `DatastoreConfig` or `str` 

56 Configuration. A string should refer to the name of the config file. 

57 bridgeManager : `DatastoreRegistryBridgeManager` 

58 Object that manages the interface between `Registry` and datastores. 

59 butlerRoot : `str`, optional 

60 New datastore root to use to override the configuration value. 

61 

62 Raises 

63 ------ 

64 ValueError 

65 If root location does not exist and ``create`` is `False` in the 

66 configuration. 

67 

68 Notes 

69 ----- 

70 PosixDatastore supports all transfer modes for file-based ingest: 

71 `"move"`, `"copy"`, `"symlink"`, `"hardlink"`, `"relsymlink"` 

72 and `None` (no transfer). 

73 """ 

74 

75 defaultConfigFile: ClassVar[Optional[str]] = "datastores/posixDatastore.yaml" 

76 """Path to configuration defaults. Accessed within the ``config`` resource 

77 or relative to a search path. Can be None if no defaults specified. 

78 """ 

79 

80 def __init__(self, config: Union[DatastoreConfig, str], 

81 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

82 super().__init__(config, bridgeManager, butlerRoot) 

83 

84 # Check that root is a valid URI for this datastore 

85 if self.root.scheme and self.root.scheme != "file": 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true

86 raise ValueError(f"Root location must only be a file URI not {self.root}") 

87 

88 if not self.root.exists(): 

89 if "create" not in self.config or not self.config["create"]: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true

90 raise ValueError(f"No valid root and not allowed to create one at: {self.root}") 

91 self.root.mkdir() 

92 

93 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation, 

94 ref: DatasetRef, isComponent: bool = False) -> Any: 

95 location = getInfo.location 

96 

97 # Too expensive to recalculate the checksum on fetch 

98 # but we can check size and existence 

99 if not os.path.exists(location.path): 

100 raise FileNotFoundError("Dataset with Id {} does not seem to exist at" 

101 " expected location of {}".format(ref.id, location.path)) 

102 size = location.uri.size() 

103 storedFileInfo = getInfo.info 

104 if size != storedFileInfo.file_size: 104 ↛ 105line 104 didn't jump to line 105, because the condition on line 104 was never true

105 raise RuntimeError("Integrity failure in Datastore. Size of file {} ({}) does not" 

106 " match recorded size of {}".format(location.path, size, 

107 storedFileInfo.file_size)) 

108 

109 formatter = getInfo.formatter 

110 try: 

111 log.debug("Reading %s from location %s with formatter %s", 

112 f"component {getInfo.component}" if isComponent else "", 

113 location.uri, type(formatter).__name__) 

114 result = formatter.read(component=getInfo.component if isComponent else None) 

115 except Exception as e: 

116 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}" 

117 f" ({ref.datasetType.name} from {location.path}): {e}") from e 

118 

119 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams, 

120 isComponent=isComponent) 

121 

122 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo: 

123 # Inherit docstring 

124 

125 location, formatter = self._prepare_for_put(inMemoryDataset, ref) 

126 

127 storageDir = os.path.dirname(location.path) 

128 if not os.path.isdir(storageDir): 

129 # Never try to remove this after creating it since there might 

130 # be a butler ingest process running concurrently that will 

131 # already think this directory exists. 

132 safeMakeDir(storageDir) 

133 

134 # Write the file 

135 predictedFullPath = os.path.join(self.root.ospath, formatter.predictPath()) 

136 

137 if os.path.exists(predictedFullPath): 137 ↛ 144line 137 didn't jump to line 144, because the condition on line 137 was never true

138 # Assume that by this point if registry thinks the file should 

139 # not exist then the file should not exist and therefore we can 

140 # overwrite it. This can happen if a put was interrupted by 

141 # an external interrupt. The only time this could be problematic is 

142 # if the file template is incomplete and multiple dataset refs 

143 # result in identical filenames. 

144 log.warning("Object %s exists in datastore for ref %s", location.uri, ref) 

145 

146 def _removeFileExists(path: str) -> None: 

147 """Remove a file and do not complain if it is not there. 

148 

149 This is important since a formatter might fail before the file 

150 is written and we should not confuse people by writing spurious 

151 error messages to the log. 

152 """ 

153 try: 

154 os.remove(path) 

155 except FileNotFoundError: 

156 pass 

157 

158 if self._transaction is None: 158 ↛ 159line 158 didn't jump to line 159, because the condition on line 158 was never true

159 raise RuntimeError("Attempting to write dataset without transaction enabled") 

160 

161 formatter_exception = None 

162 with self._transaction.undoWith("write", _removeFileExists, predictedFullPath): 

163 try: 

164 path = formatter.write(inMemoryDataset) 

165 log.debug("Wrote file to %s", path) 

166 except Exception as e: 

167 formatter_exception = e 

168 

169 if formatter_exception: 

170 raise formatter_exception 

171 

172 assert predictedFullPath == os.path.join(self.root.ospath, path) 

173 

174 return self._extractIngestInfo(path, ref, formatter=formatter)