Coverage for python/lsst/daf/butler/datastores/remoteFileDatastore.py : 73%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import logging
25import os.path
26import tempfile
28from typing import (
29 TYPE_CHECKING,
30 Any,
31 Union,
32)
34from .fileLikeDatastore import FileLikeDatastore
36from lsst.daf.butler import (
37 DatasetRef,
38 Location,
39 StoredFileInfo,
40)
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from .fileLikeDatastore import DatastoreFileGetInformation
44 from lsst.daf.butler import DatastoreConfig
45 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
47log = logging.getLogger(__name__)
50class RemoteFileDatastore(FileLikeDatastore):
51 """A datastore designed for files at remote locations.
53 Parameters
54 ----------
55 config : `DatastoreConfig` or `str`
56 Configuration. A string should refer to the name of the config file.
57 bridgeManager : `DatastoreRegistryBridgeManager`
58 Object that manages the interface between `Registry` and datastores.
59 butlerRoot : `str`, optional
60 New datastore root to use to override the configuration value.
62 Raises
63 ------
64 ValueError
65 If root location does not exist and ``create`` is `False` in the
66 configuration.
68 Notes
69 -----
70 Datastore supports non-link transfer modes for file-based ingest:
71 `"move"`, `"copy"`, and `None` (no transfer).
72 """
74 def __init__(self, config: Union[DatastoreConfig, str],
75 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
76 super().__init__(config, bridgeManager, butlerRoot)
77 if not self.root.exists(): 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true
78 if "create" not in self.config or not self.config["create"]:
79 raise ValueError(f"No valid root and not allowed to create one at: {self.root}")
80 try:
81 self.root.mkdir()
82 except ValueError as e:
83 raise ValueError(f"Can not create datastore root '{self.root}', check permissions.") from e
85 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
86 ref: DatasetRef, isComponent: bool = False) -> Any:
87 location = getInfo.location
89 log.debug("Downloading data from %s", location.uri)
90 serializedDataset = location.uri.read()
92 storedFileInfo = getInfo.info
93 if len(serializedDataset) != storedFileInfo.file_size: 93 ↛ 94line 93 didn't jump to line 94, because the condition on line 93 was never true
94 raise RuntimeError("Integrity failure in Datastore. "
95 f"Size of file {location.path} ({len(serializedDataset)}) "
96 f"does not match recorded size of {storedFileInfo.file_size}")
98 # format the downloaded bytes into appropriate object directly, or via
99 # tempfile (when formatter does not support to/from/Bytes). This is
100 # equivalent of PosixDatastore formatter.read try-except block.
101 formatter = getInfo.formatter
102 try:
103 result = formatter.fromBytes(serializedDataset,
104 component=getInfo.component if isComponent else None)
105 except NotImplementedError: 105 ↛ 117line 105 didn't jump to line 117
106 # formatter might not always have an extension so mypy complains
107 # We can either ignore the complaint or use a temporary location
108 tmpLoc = Location(".", "temp")
109 tmpLoc = formatter.makeUpdatedLocation(tmpLoc)
110 with tempfile.NamedTemporaryFile(suffix=tmpLoc.getExtension()) as tmpFile:
111 tmpFile.write(serializedDataset)
112 # Flush the write. Do not close the file because that
113 # will delete it.
114 tmpFile.flush()
115 formatter._fileDescriptor.location = Location(*os.path.split(tmpFile.name))
116 result = formatter.read(component=getInfo.component if isComponent else None)
117 except Exception as e:
118 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
119 f" ({ref.datasetType.name} from {location.uri}): {e}") from e
121 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
122 isComponent=isComponent)
124 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo:
125 location, formatter = self._prepare_for_put(inMemoryDataset, ref)
127 if location.uri.exists(): 127 ↛ 136line 127 didn't jump to line 136, because the condition on line 127 was never true
128 # Assume that by this point if registry thinks the file should
129 # not exist then the file should not exist and therefore we can
130 # overwrite it. This can happen if a put was interrupted by
131 # an external interrupt. The only time this could be problematic is
132 # if the file template is incomplete and multiple dataset refs
133 # result in identical filenames.
134 # Eventually we should remove the check completely (it takes
135 # non-zero time for network).
136 log.warning("Object %s exists in datastore for ref %s", location.uri, ref)
138 if not location.uri.dirname().exists():
139 log.debug("Folder %s does not exist yet.", location.uri.dirname())
140 location.uri.dirname().mkdir()
142 if self._transaction is None: 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true
143 raise RuntimeError("Attempting to write artifact without transaction enabled")
145 # upload the file directly from bytes or by using a temporary file if
146 # _toBytes is not implemented
147 try:
148 serializedDataset = formatter.toBytes(inMemoryDataset)
149 log.debug("Writing bytes directly to %s", location.uri)
150 location.uri.write(serializedDataset, overwrite=True)
151 log.debug("Successfully wrote bytes directly to %s", location.uri)
152 except NotImplementedError:
153 with tempfile.NamedTemporaryFile(suffix=location.getExtension()) as tmpFile:
154 tmpLocation = Location(*os.path.split(tmpFile.name))
155 formatter._fileDescriptor.location = tmpLocation
156 log.debug("Writing dataset to temporary directory at ", tmpLocation.uri)
157 formatter.write(inMemoryDataset)
158 location.uri.transfer_from(tmpLocation.uri, transfer="copy", overwrite=True)
159 log.debug("Successfully wrote dataset to %s via a temporary file.", location.uri)
161 # Register a callback to try to delete the uploaded data if
162 # the ingest fails below
163 self._transaction.registerUndo("remoteWrite", location.uri.remove)
165 # URI is needed to resolve what ingest case are we dealing with
166 return self._extractIngestInfo(location.uri, ref, formatter=formatter)