Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""POSIX datastore.""" 

25 

26__all__ = ("PosixDatastore", ) 

27 

28import logging 

29import os 

30from typing import ( 

31 TYPE_CHECKING, 

32 Any, 

33 ClassVar, 

34 Optional, 

35 Union 

36) 

37 

38from .fileLikeDatastore import FileLikeDatastore 

39from lsst.daf.butler.core.utils import safeMakeDir 

40from lsst.daf.butler import StoredFileInfo, DatasetRef 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from .fileLikeDatastore import DatastoreFileGetInformation 

44 from lsst.daf.butler import DatastoreConfig 

45 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager 

46 

47log = logging.getLogger(__name__) 

48 

49 

50class PosixDatastore(FileLikeDatastore): 

51 """Basic POSIX filesystem backed Datastore. 

52 

53 Parameters 

54 ---------- 

55 config : `DatastoreConfig` or `str` 

56 Configuration. A string should refer to the name of the config file. 

57 bridgeManager : `DatastoreRegistryBridgeManager` 

58 Object that manages the interface between `Registry` and datastores. 

59 butlerRoot : `str`, optional 

60 New datastore root to use to override the configuration value. 

61 

62 Raises 

63 ------ 

64 ValueError 

65 If root location does not exist and ``create`` is `False` in the 

66 configuration. 

67 

68 Notes 

69 ----- 

70 PosixDatastore supports all transfer modes for file-based ingest: 

71 `"move"`, `"copy"`, `"symlink"`, `"hardlink"`, `"relsymlink"` 

72 and `None` (no transfer). 

73 

74 For PosixDatastore, the `"auto"` transfer mode will operate in-place (like 

75 ``transfer=None``) if the file is already within the datastore root, and 

76 fall back to `"link"` otherwise. 

77 

78 See `Datastore.ingest` for more information on transfer modes. 

79 """ 

80 

81 defaultConfigFile: ClassVar[Optional[str]] = "datastores/posixDatastore.yaml" 

82 """Path to configuration defaults. Accessed within the ``configs`` resource 

83 or relative to a search path. Can be None if no defaults specified. 

84 """ 

85 

86 def __init__(self, config: Union[DatastoreConfig, str], 

87 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

88 super().__init__(config, bridgeManager, butlerRoot) 

89 

90 # Check that root is a valid URI for this datastore 

91 if self.root.scheme and self.root.scheme != "file": 91 ↛ 92line 91 didn't jump to line 92, because the condition on line 91 was never true

92 raise ValueError(f"Root location must only be a file URI not {self.root}") 

93 

94 if not self.root.exists(): 

95 if "create" not in self.config or not self.config["create"]: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 raise ValueError(f"No valid root and not allowed to create one at: {self.root}") 

97 self.root.mkdir() 

98 

99 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation, 

100 ref: DatasetRef, isComponent: bool = False) -> Any: 

101 location = getInfo.location 

102 

103 # Too expensive to recalculate the checksum on fetch 

104 # but we can check size and existence 

105 if not os.path.exists(location.path): 

106 raise FileNotFoundError("Dataset with Id {} does not seem to exist at" 

107 " expected location of {}".format(ref.id, location.path)) 

108 size = location.uri.size() 

109 storedFileInfo = getInfo.info 

110 if size != storedFileInfo.file_size: 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true

111 raise RuntimeError("Integrity failure in Datastore. Size of file {} ({}) does not" 

112 " match recorded size of {}".format(location.path, size, 

113 storedFileInfo.file_size)) 

114 

115 formatter = getInfo.formatter 

116 try: 

117 log.debug("Reading %s from location %s with formatter %s", 

118 f"component {getInfo.component}" if isComponent else "", 

119 location.uri, type(formatter).__name__) 

120 result = formatter.read(component=getInfo.component if isComponent else None) 

121 except Exception as e: 

122 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}" 

123 f" ({ref.datasetType.name} from {location.path}): {e}") from e 

124 

125 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams, 

126 isComponent=isComponent) 

127 

128 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo: 

129 # Inherit docstring 

130 

131 location, formatter = self._prepare_for_put(inMemoryDataset, ref) 

132 

133 storageDir = os.path.dirname(location.path) 

134 if not os.path.isdir(storageDir): 

135 # Never try to remove this after creating it since there might 

136 # be a butler ingest process running concurrently that will 

137 # already think this directory exists. 

138 safeMakeDir(storageDir) 

139 

140 # Write the file 

141 predictedFullPath = os.path.join(self.root.ospath, formatter.predictPath()) 

142 

143 if os.path.exists(predictedFullPath): 143 ↛ 150line 143 didn't jump to line 150, because the condition on line 143 was never true

144 # Assume that by this point if registry thinks the file should 

145 # not exist then the file should not exist and therefore we can 

146 # overwrite it. This can happen if a put was interrupted by 

147 # an external interrupt. The only time this could be problematic is 

148 # if the file template is incomplete and multiple dataset refs 

149 # result in identical filenames. 

150 log.warning("Object %s exists in datastore for ref %s", location.uri, ref) 

151 

152 def _removeFileExists(path: str) -> None: 

153 """Remove a file and do not complain if it is not there. 

154 

155 This is important since a formatter might fail before the file 

156 is written and we should not confuse people by writing spurious 

157 error messages to the log. 

158 """ 

159 try: 

160 os.remove(path) 

161 except FileNotFoundError: 

162 pass 

163 

164 if self._transaction is None: 164 ↛ 165line 164 didn't jump to line 165, because the condition on line 164 was never true

165 raise RuntimeError("Attempting to write dataset without transaction enabled") 

166 

167 formatter_exception = None 

168 with self._transaction.undoWith("write", _removeFileExists, predictedFullPath): 

169 try: 

170 path = formatter.write(inMemoryDataset) 

171 log.debug("Wrote file to %s", path) 

172 except Exception as e: 

173 formatter_exception = e 

174 

175 if formatter_exception: 

176 raise formatter_exception 

177 

178 assert predictedFullPath == os.path.join(self.root.ospath, path) 

179 

180 return self._extractIngestInfo(path, ref, formatter=formatter)