Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%
140 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-04 09:45 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-04 09:45 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ("RemoteButler",)
30from collections.abc import Collection, Iterable, Sequence
31from contextlib import AbstractContextManager
32from typing import Any, TextIO
34import httpx
35from lsst.daf.butler import __version__
36from lsst.daf.butler.repo_relocation import replaceRoot
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.utils.introspection import get_full_type_name
40from .._butler import Butler
41from .._butler_config import ButlerConfig
42from .._config import Config
43from .._dataset_existence import DatasetExistence
44from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef
45from .._dataset_type import DatasetType, SerializedDatasetType
46from .._deferredDatasetHandle import DeferredDatasetHandle
47from .._file_dataset import FileDataset
48from .._limited_butler import LimitedButler
49from .._storage_class import StorageClass
50from .._timespan import Timespan
51from ..datastore import DatasetRefURIs
52from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate
53from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults
54from ..registry.wildcards import CollectionWildcard
55from ..transfers import RepoExportContext
56from ._authentication import get_authentication_headers, get_authentication_token_from_environment
57from ._config import RemoteButlerConfigModel
58from .server import FindDatasetModel
61class RemoteButler(Butler):
62 def __init__(
63 self,
64 # These parameters are inherited from the Butler() constructor
65 config: Config | ResourcePathExpression | None = None,
66 *,
67 collections: Any = None,
68 run: str | None = None,
69 searchPaths: Sequence[ResourcePathExpression] | None = None,
70 writeable: bool | None = None,
71 inferDefaults: bool = True,
72 # Parameters unique to RemoteButler
73 http_client: httpx.Client | None = None,
74 access_token: str | None = None,
75 **kwargs: Any,
76 ):
77 butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
78 # There is a convention in Butler config files where <butlerRoot> in a
79 # configuration option refers to the directory containing the
80 # configuration file. We allow this for the remote butler's URL so
81 # that the server doesn't have to know which hostname it is being
82 # accessed from.
83 server_url_key = ("remote_butler", "url")
84 if server_url_key in butler_config:
85 butler_config[server_url_key] = replaceRoot(
86 butler_config[server_url_key], butler_config.configDir
87 )
88 self._config = RemoteButlerConfigModel.model_validate(butler_config)
90 self._dimensions: DimensionUniverse | None = None
91 # TODO: RegistryDefaults should have finish() called on it, but this
92 # requires getCollectionSummary() which is not yet implemented
93 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs)
95 if http_client is not None:
96 # We have injected a client explicitly in to the class.
97 # This is generally done for testing.
98 self._client = http_client
99 else:
100 server_url = str(self._config.remote_butler.url)
101 auth_headers = {}
102 if access_token is None:
103 access_token = get_authentication_token_from_environment(server_url)
104 if access_token is not None:
105 auth_headers = get_authentication_headers(access_token)
107 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
108 headers.update(auth_headers)
109 self._client = httpx.Client(headers=headers, base_url=server_url)
111 def isWriteable(self) -> bool:
112 # Docstring inherited.
113 return False
115 @property
116 def dimensions(self) -> DimensionUniverse:
117 # Docstring inherited.
118 if self._dimensions is not None:
119 return self._dimensions
121 response = self._client.get(self._get_url("universe"))
122 response.raise_for_status()
124 config = DimensionConfig.fromString(response.text, format="json")
125 self._dimensions = DimensionUniverse(config)
126 return self._dimensions
128 def _simplify_dataId(
129 self, dataId: DataId | None, **kwargs: dict[str, int | str]
130 ) -> SerializedDataCoordinate | None:
131 """Take a generic Data ID and convert it to a serializable form.
133 Parameters
134 ----------
135 dataId : `dict`, `None`, `DataCoordinate`
136 The data ID to serialize.
137 **kwargs : `dict`
138 Additional values that should be included if this is not
139 a `DataCoordinate`.
141 Returns
142 -------
143 data_id : `SerializedDataCoordinate` or `None`
144 A serializable form.
145 """
146 if dataId is None and not kwargs:
147 return None
148 if isinstance(dataId, DataCoordinate):
149 return dataId.to_simple()
151 if dataId is None:
152 data_id = kwargs
153 elif kwargs:
154 # Change variable because DataId is immutable and mypy complains.
155 data_id = dict(dataId)
156 data_id.update(kwargs)
158 # Assume we can treat it as a dict.
159 return SerializedDataCoordinate(dataId=data_id)
161 def transaction(self) -> AbstractContextManager[None]:
162 """Will always raise NotImplementedError.
163 Transactions are not supported by RemoteButler.
164 """
165 raise NotImplementedError()
167 def put(
168 self,
169 obj: Any,
170 datasetRefOrType: DatasetRef | DatasetType | str,
171 /,
172 dataId: DataId | None = None,
173 *,
174 run: str | None = None,
175 **kwargs: Any,
176 ) -> DatasetRef:
177 # Docstring inherited.
178 raise NotImplementedError()
180 def getDeferred(
181 self,
182 datasetRefOrType: DatasetRef | DatasetType | str,
183 /,
184 dataId: DataId | None = None,
185 *,
186 parameters: dict | None = None,
187 collections: Any = None,
188 storageClass: str | StorageClass | None = None,
189 **kwargs: Any,
190 ) -> DeferredDatasetHandle:
191 # Docstring inherited.
192 raise NotImplementedError()
194 def get(
195 self,
196 datasetRefOrType: DatasetRef | DatasetType | str,
197 /,
198 dataId: DataId | None = None,
199 *,
200 parameters: dict[str, Any] | None = None,
201 collections: Any = None,
202 storageClass: StorageClass | str | None = None,
203 **kwargs: Any,
204 ) -> Any:
205 # Docstring inherited.
206 raise NotImplementedError()
208 def getURIs(
209 self,
210 datasetRefOrType: DatasetRef | DatasetType | str,
211 /,
212 dataId: DataId | None = None,
213 *,
214 predict: bool = False,
215 collections: Any = None,
216 run: str | None = None,
217 **kwargs: Any,
218 ) -> DatasetRefURIs:
219 # Docstring inherited.
220 raise NotImplementedError()
222 def getURI(
223 self,
224 datasetRefOrType: DatasetRef | DatasetType | str,
225 /,
226 dataId: DataId | None = None,
227 *,
228 predict: bool = False,
229 collections: Any = None,
230 run: str | None = None,
231 **kwargs: Any,
232 ) -> ResourcePath:
233 # Docstring inherited.
234 raise NotImplementedError()
236 def get_dataset_type(self, name: str) -> DatasetType:
237 # In future implementation this should directly access the cache
238 # and only go to the server if the dataset type is not known.
239 path = f"dataset_type/{name}"
240 response = self._client.get(self._get_url(path))
241 if response.status_code != httpx.codes.OK:
242 content = response.json()
243 if content["exception"] == "MissingDatasetTypeError":
244 raise MissingDatasetTypeError(content["detail"])
245 response.raise_for_status()
246 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions)
248 def get_dataset(
249 self,
250 id: DatasetId,
251 storage_class: str | StorageClass | None = None,
252 dimension_records: bool = False,
253 datastore_records: bool = False,
254 ) -> DatasetRef | None:
255 path = f"dataset/{id}"
256 if isinstance(storage_class, StorageClass):
257 storage_class_name = storage_class.name
258 elif storage_class:
259 storage_class_name = storage_class
260 params: dict[str, str | bool] = {
261 "dimension_records": dimension_records,
262 "datastore_records": datastore_records,
263 }
264 if datastore_records:
265 raise ValueError("Datastore records can not yet be returned in client/server butler.")
266 if storage_class:
267 params["storage_class"] = storage_class_name
268 response = self._client.get(self._get_url(path), params=params)
269 response.raise_for_status()
270 if response.json() is None:
271 return None
272 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
274 def find_dataset(
275 self,
276 dataset_type: DatasetType | str,
277 data_id: DataId | None = None,
278 *,
279 collections: str | Sequence[str] | None = None,
280 timespan: Timespan | None = None,
281 storage_class: str | StorageClass | None = None,
282 dimension_records: bool = False,
283 datastore_records: bool = False,
284 **kwargs: Any,
285 ) -> DatasetRef | None:
286 if collections is None:
287 if not self.collections:
288 raise NoDefaultCollectionError(
289 "No collections provided to find_dataset, and no defaults from butler construction."
290 )
291 collections = self.collections
292 # Temporary hack. Assume strings for collections. In future
293 # want to construct CollectionWildcard and filter it through collection
294 # cache to generate list of collection names.
295 wildcards = CollectionWildcard.from_expression(collections)
297 if datastore_records:
298 raise ValueError("Datastore records can not yet be returned in client/server butler.")
299 if timespan:
300 raise ValueError("Timespan can not yet be used in butler client/server.")
302 if isinstance(dataset_type, DatasetType):
303 dataset_type = dataset_type.name
305 if isinstance(storage_class, StorageClass):
306 storage_class = storage_class.name
308 query = FindDatasetModel(
309 data_id=self._simplify_dataId(data_id, **kwargs),
310 collections=wildcards.strings,
311 storage_class=storage_class,
312 dimension_records=dimension_records,
313 datastore_records=datastore_records,
314 )
316 path = f"find_dataset/{dataset_type}"
317 response = self._client.post(
318 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True)
319 )
320 response.raise_for_status()
322 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
324 def retrieveArtifacts(
325 self,
326 refs: Iterable[DatasetRef],
327 destination: ResourcePathExpression,
328 transfer: str = "auto",
329 preserve_path: bool = True,
330 overwrite: bool = False,
331 ) -> list[ResourcePath]:
332 # Docstring inherited.
333 raise NotImplementedError()
335 def exists(
336 self,
337 dataset_ref_or_type: DatasetRef | DatasetType | str,
338 /,
339 data_id: DataId | None = None,
340 *,
341 full_check: bool = True,
342 collections: Any = None,
343 **kwargs: Any,
344 ) -> DatasetExistence:
345 # Docstring inherited.
346 raise NotImplementedError()
348 def _exists_many(
349 self,
350 refs: Iterable[DatasetRef],
351 /,
352 *,
353 full_check: bool = True,
354 ) -> dict[DatasetRef, DatasetExistence]:
355 # Docstring inherited.
356 raise NotImplementedError()
358 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
359 # Docstring inherited.
360 raise NotImplementedError()
362 def ingest(
363 self,
364 *datasets: FileDataset,
365 transfer: str | None = "auto",
366 run: str | None = None,
367 idGenerationMode: DatasetIdGenEnum | None = None,
368 record_validation_info: bool = True,
369 ) -> None:
370 # Docstring inherited.
371 raise NotImplementedError()
373 def export(
374 self,
375 *,
376 directory: str | None = None,
377 filename: str | None = None,
378 format: str | None = None,
379 transfer: str | None = None,
380 ) -> AbstractContextManager[RepoExportContext]:
381 # Docstring inherited.
382 raise NotImplementedError()
384 def import_(
385 self,
386 *,
387 directory: ResourcePathExpression | None = None,
388 filename: ResourcePathExpression | TextIO | None = None,
389 format: str | None = None,
390 transfer: str | None = None,
391 skip_dimensions: set | None = None,
392 ) -> None:
393 # Docstring inherited.
394 raise NotImplementedError()
396 def transfer_from(
397 self,
398 source_butler: LimitedButler,
399 source_refs: Iterable[DatasetRef],
400 transfer: str = "auto",
401 skip_missing: bool = True,
402 register_dataset_types: bool = False,
403 transfer_dimensions: bool = False,
404 ) -> Collection[DatasetRef]:
405 # Docstring inherited.
406 raise NotImplementedError()
408 def validateConfiguration(
409 self,
410 logFailures: bool = False,
411 datasetTypeNames: Iterable[str] | None = None,
412 ignore: Iterable[str] | None = None,
413 ) -> None:
414 # Docstring inherited.
415 raise NotImplementedError()
417 @property
418 def collections(self) -> Sequence[str]:
419 # Docstring inherited.
420 return self._registry_defaults.collections
422 @property
423 def run(self) -> str | None:
424 # Docstring inherited.
425 return self._registry_defaults.run
427 @property
428 def registry(self) -> Registry:
429 # Docstring inherited.
430 raise NotImplementedError()
432 def pruneDatasets(
433 self,
434 refs: Iterable[DatasetRef],
435 *,
436 disassociate: bool = True,
437 unstore: bool = False,
438 tags: Iterable[str] = (),
439 purge: bool = False,
440 ) -> None:
441 # Docstring inherited.
442 raise NotImplementedError()
444 def _get_url(self, path: str, version: str = "v1") -> str:
445 """Form the complete path to an endpoint on the server.
447 Parameters
448 ----------
449 path : `str`
450 The relative path to the server endpoint.
451 version : `str`, optional
452 Version string to prepend to path. Defaults to "v1".
454 Returns
455 -------
456 path : `str`
457 The full path to the endpoint.
458 """
459 return f"{version}/{path}"