Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%
139 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 10:55 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 10:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("RemoteButler",)
32from collections.abc import Collection, Iterable, Mapping, Sequence
33from contextlib import AbstractContextManager
34from typing import TYPE_CHECKING, Any, TextIO
36import httpx
37from lsst.daf.butler import __version__
38from lsst.daf.butler.repo_relocation import replaceRoot
39from lsst.resources import ResourcePath, ResourcePathExpression
40from lsst.utils.introspection import get_full_type_name
42from .._butler import Butler
43from .._butler_config import ButlerConfig
44from .._dataset_ref import DatasetRef, SerializedDatasetRef
45from .._dataset_type import DatasetType, SerializedDatasetType
46from .._storage_class import StorageClass
47from ..dimensions import DataCoordinate, DimensionConfig, DimensionUniverse, SerializedDataCoordinate
48from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, RegistryDefaults
49from ..registry.wildcards import CollectionWildcard
50from ._authentication import get_authentication_headers, get_authentication_token_from_environment
51from ._config import RemoteButlerConfigModel
52from .server_models import FindDatasetModel
54if TYPE_CHECKING:
55 from .._config import Config
56 from .._dataset_existence import DatasetExistence
57 from .._dataset_ref import DatasetId, DatasetIdGenEnum
58 from .._deferredDatasetHandle import DeferredDatasetHandle
59 from .._file_dataset import FileDataset
60 from .._limited_butler import LimitedButler
61 from .._query import Query
62 from .._timespan import Timespan
63 from ..datastore import DatasetRefURIs
64 from ..dimensions import DataId, DimensionGroup, DimensionRecord
65 from ..registry import CollectionArgType, Registry
66 from ..transfers import RepoExportContext
69class RemoteButler(Butler):
70 def __init__(
71 self,
72 # These parameters are inherited from the Butler() constructor
73 config: Config | ResourcePathExpression | None = None,
74 *,
75 collections: Any = None,
76 run: str | None = None,
77 searchPaths: Sequence[ResourcePathExpression] | None = None,
78 writeable: bool | None = None,
79 inferDefaults: bool = True,
80 # Parameters unique to RemoteButler
81 http_client: httpx.Client | None = None,
82 access_token: str | None = None,
83 **kwargs: Any,
84 ):
85 butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
86 # There is a convention in Butler config files where <butlerRoot> in a
87 # configuration option refers to the directory containing the
88 # configuration file. We allow this for the remote butler's URL so
89 # that the server doesn't have to know which hostname it is being
90 # accessed from.
91 server_url_key = ("remote_butler", "url")
92 if server_url_key in butler_config:
93 butler_config[server_url_key] = replaceRoot(
94 butler_config[server_url_key], butler_config.configDir
95 )
96 self._config = RemoteButlerConfigModel.model_validate(butler_config)
98 self._dimensions: DimensionUniverse | None = None
99 # TODO: RegistryDefaults should have finish() called on it, but this
100 # requires getCollectionSummary() which is not yet implemented
101 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs)
103 if http_client is not None:
104 # We have injected a client explicitly in to the class.
105 # This is generally done for testing.
106 self._client = http_client
107 else:
108 server_url = str(self._config.remote_butler.url)
109 auth_headers = {}
110 if access_token is None:
111 access_token = get_authentication_token_from_environment(server_url)
112 if access_token is not None:
113 auth_headers = get_authentication_headers(access_token)
115 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
116 headers.update(auth_headers)
117 self._client = httpx.Client(headers=headers, base_url=server_url)
119 def isWriteable(self) -> bool:
120 # Docstring inherited.
121 return False
123 @property
124 def dimensions(self) -> DimensionUniverse:
125 # Docstring inherited.
126 if self._dimensions is not None:
127 return self._dimensions
129 response = self._client.get(self._get_url("universe"))
130 response.raise_for_status()
132 config = DimensionConfig.fromString(response.text, format="json")
133 self._dimensions = DimensionUniverse(config)
134 return self._dimensions
136 def _simplify_dataId(
137 self, dataId: DataId | None, **kwargs: dict[str, int | str]
138 ) -> SerializedDataCoordinate | None:
139 """Take a generic Data ID and convert it to a serializable form.
141 Parameters
142 ----------
143 dataId : `dict`, `None`, `DataCoordinate`
144 The data ID to serialize.
145 **kwargs : `dict`
146 Additional values that should be included if this is not
147 a `DataCoordinate`.
149 Returns
150 -------
151 data_id : `SerializedDataCoordinate` or `None`
152 A serializable form.
153 """
154 if dataId is None and not kwargs:
155 return None
156 if isinstance(dataId, DataCoordinate):
157 return dataId.to_simple()
159 if dataId is None:
160 data_id = kwargs
161 elif kwargs:
162 # Change variable because DataId is immutable and mypy complains.
163 data_id = dict(dataId)
164 data_id.update(kwargs)
166 # Assume we can treat it as a dict.
167 return SerializedDataCoordinate(dataId=data_id)
169 def _caching_context(self) -> AbstractContextManager[None]:
170 # Docstring inherited.
171 # Not implemented for now, will have to think whether this needs to
172 # do something on client side and/or remote side.
173 raise NotImplementedError()
175 def transaction(self) -> AbstractContextManager[None]:
176 """Will always raise NotImplementedError.
177 Transactions are not supported by RemoteButler.
178 """
179 raise NotImplementedError()
181 def put(
182 self,
183 obj: Any,
184 datasetRefOrType: DatasetRef | DatasetType | str,
185 /,
186 dataId: DataId | None = None,
187 *,
188 run: str | None = None,
189 **kwargs: Any,
190 ) -> DatasetRef:
191 # Docstring inherited.
192 raise NotImplementedError()
194 def getDeferred(
195 self,
196 datasetRefOrType: DatasetRef | DatasetType | str,
197 /,
198 dataId: DataId | None = None,
199 *,
200 parameters: dict | None = None,
201 collections: Any = None,
202 storageClass: str | StorageClass | None = None,
203 **kwargs: Any,
204 ) -> DeferredDatasetHandle:
205 # Docstring inherited.
206 raise NotImplementedError()
208 def get(
209 self,
210 datasetRefOrType: DatasetRef | DatasetType | str,
211 /,
212 dataId: DataId | None = None,
213 *,
214 parameters: dict[str, Any] | None = None,
215 collections: Any = None,
216 storageClass: StorageClass | str | None = None,
217 **kwargs: Any,
218 ) -> Any:
219 # Docstring inherited.
220 raise NotImplementedError()
222 def getURIs(
223 self,
224 datasetRefOrType: DatasetRef | DatasetType | str,
225 /,
226 dataId: DataId | None = None,
227 *,
228 predict: bool = False,
229 collections: Any = None,
230 run: str | None = None,
231 **kwargs: Any,
232 ) -> DatasetRefURIs:
233 # Docstring inherited.
234 raise NotImplementedError()
236 def getURI(
237 self,
238 datasetRefOrType: DatasetRef | DatasetType | str,
239 /,
240 dataId: DataId | None = None,
241 *,
242 predict: bool = False,
243 collections: Any = None,
244 run: str | None = None,
245 **kwargs: Any,
246 ) -> ResourcePath:
247 # Docstring inherited.
248 raise NotImplementedError()
250 def get_dataset_type(self, name: str) -> DatasetType:
251 # In future implementation this should directly access the cache
252 # and only go to the server if the dataset type is not known.
253 path = f"dataset_type/{name}"
254 response = self._client.get(self._get_url(path))
255 if response.status_code != httpx.codes.OK:
256 content = response.json()
257 if content["exception"] == "MissingDatasetTypeError":
258 raise MissingDatasetTypeError(content["detail"])
259 response.raise_for_status()
260 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions)
262 def get_dataset(
263 self,
264 id: DatasetId,
265 storage_class: str | StorageClass | None = None,
266 dimension_records: bool = False,
267 datastore_records: bool = False,
268 ) -> DatasetRef | None:
269 path = f"dataset/{id}"
270 if isinstance(storage_class, StorageClass):
271 storage_class_name = storage_class.name
272 elif storage_class:
273 storage_class_name = storage_class
274 params: dict[str, str | bool] = {
275 "dimension_records": dimension_records,
276 "datastore_records": datastore_records,
277 }
278 if datastore_records:
279 raise ValueError("Datastore records can not yet be returned in client/server butler.")
280 if storage_class:
281 params["storage_class"] = storage_class_name
282 response = self._client.get(self._get_url(path), params=params)
283 response.raise_for_status()
284 if response.json() is None:
285 return None
286 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
288 def find_dataset(
289 self,
290 dataset_type: DatasetType | str,
291 data_id: DataId | None = None,
292 *,
293 collections: str | Sequence[str] | None = None,
294 timespan: Timespan | None = None,
295 storage_class: str | StorageClass | None = None,
296 dimension_records: bool = False,
297 datastore_records: bool = False,
298 **kwargs: Any,
299 ) -> DatasetRef | None:
300 if collections is None:
301 if not self.collections:
302 raise NoDefaultCollectionError(
303 "No collections provided to find_dataset, and no defaults from butler construction."
304 )
305 collections = self.collections
306 # Temporary hack. Assume strings for collections. In future
307 # want to construct CollectionWildcard and filter it through collection
308 # cache to generate list of collection names.
309 wildcards = CollectionWildcard.from_expression(collections)
311 if datastore_records:
312 raise ValueError("Datastore records can not yet be returned in client/server butler.")
313 if timespan:
314 raise ValueError("Timespan can not yet be used in butler client/server.")
316 if isinstance(dataset_type, DatasetType):
317 dataset_type = dataset_type.name
319 if isinstance(storage_class, StorageClass):
320 storage_class = storage_class.name
322 query = FindDatasetModel(
323 data_id=self._simplify_dataId(data_id, **kwargs),
324 collections=wildcards.strings,
325 storage_class=storage_class,
326 dimension_records=dimension_records,
327 datastore_records=datastore_records,
328 )
330 path = f"find_dataset/{dataset_type}"
331 response = self._client.post(
332 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True)
333 )
334 response.raise_for_status()
336 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
338 def retrieveArtifacts(
339 self,
340 refs: Iterable[DatasetRef],
341 destination: ResourcePathExpression,
342 transfer: str = "auto",
343 preserve_path: bool = True,
344 overwrite: bool = False,
345 ) -> list[ResourcePath]:
346 # Docstring inherited.
347 raise NotImplementedError()
349 def exists(
350 self,
351 dataset_ref_or_type: DatasetRef | DatasetType | str,
352 /,
353 data_id: DataId | None = None,
354 *,
355 full_check: bool = True,
356 collections: Any = None,
357 **kwargs: Any,
358 ) -> DatasetExistence:
359 # Docstring inherited.
360 raise NotImplementedError()
362 def _exists_many(
363 self,
364 refs: Iterable[DatasetRef],
365 /,
366 *,
367 full_check: bool = True,
368 ) -> dict[DatasetRef, DatasetExistence]:
369 # Docstring inherited.
370 raise NotImplementedError()
372 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
373 # Docstring inherited.
374 raise NotImplementedError()
376 def ingest(
377 self,
378 *datasets: FileDataset,
379 transfer: str | None = "auto",
380 run: str | None = None,
381 idGenerationMode: DatasetIdGenEnum | None = None,
382 record_validation_info: bool = True,
383 ) -> None:
384 # Docstring inherited.
385 raise NotImplementedError()
387 def export(
388 self,
389 *,
390 directory: str | None = None,
391 filename: str | None = None,
392 format: str | None = None,
393 transfer: str | None = None,
394 ) -> AbstractContextManager[RepoExportContext]:
395 # Docstring inherited.
396 raise NotImplementedError()
398 def import_(
399 self,
400 *,
401 directory: ResourcePathExpression | None = None,
402 filename: ResourcePathExpression | TextIO | None = None,
403 format: str | None = None,
404 transfer: str | None = None,
405 skip_dimensions: set | None = None,
406 ) -> None:
407 # Docstring inherited.
408 raise NotImplementedError()
410 def transfer_dimension_records_from(
411 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef]
412 ) -> None:
413 # Docstring inherited.
414 raise NotImplementedError()
416 def transfer_from(
417 self,
418 source_butler: LimitedButler,
419 source_refs: Iterable[DatasetRef],
420 transfer: str = "auto",
421 skip_missing: bool = True,
422 register_dataset_types: bool = False,
423 transfer_dimensions: bool = False,
424 ) -> Collection[DatasetRef]:
425 # Docstring inherited.
426 raise NotImplementedError()
428 def validateConfiguration(
429 self,
430 logFailures: bool = False,
431 datasetTypeNames: Iterable[str] | None = None,
432 ignore: Iterable[str] | None = None,
433 ) -> None:
434 # Docstring inherited.
435 raise NotImplementedError()
437 @property
438 def collections(self) -> Sequence[str]:
439 # Docstring inherited.
440 return self._registry_defaults.collections
442 @property
443 def run(self) -> str | None:
444 # Docstring inherited.
445 return self._registry_defaults.run
447 @property
448 def registry(self) -> Registry:
449 # Docstring inherited.
450 raise NotImplementedError()
452 def _query(self) -> AbstractContextManager[Query]:
453 # Docstring inherited.
454 raise NotImplementedError()
456 def _query_data_ids(
457 self,
458 dimensions: DimensionGroup | Iterable[str] | str,
459 *,
460 data_id: DataId | None = None,
461 where: str = "",
462 bind: Mapping[str, Any] | None = None,
463 expanded: bool = False,
464 order_by: Iterable[str] | str | None = None,
465 limit: int | None = None,
466 offset: int | None = None,
467 explain: bool = True,
468 **kwargs: Any,
469 ) -> list[DataCoordinate]:
470 # Docstring inherited.
471 raise NotImplementedError()
473 def _query_datasets(
474 self,
475 dataset_type: Any,
476 collections: CollectionArgType | None = None,
477 *,
478 find_first: bool = True,
479 data_id: DataId | None = None,
480 where: str = "",
481 bind: Mapping[str, Any] | None = None,
482 expanded: bool = False,
483 explain: bool = True,
484 **kwargs: Any,
485 ) -> list[DatasetRef]:
486 # Docstring inherited.
487 raise NotImplementedError()
489 def _query_dimension_records(
490 self,
491 element: str,
492 *,
493 data_id: DataId | None = None,
494 where: str = "",
495 bind: Mapping[str, Any] | None = None,
496 order_by: Iterable[str] | str | None = None,
497 limit: int | None = None,
498 offset: int | None = None,
499 explain: bool = True,
500 **kwargs: Any,
501 ) -> list[DimensionRecord]:
502 # Docstring inherited.
503 raise NotImplementedError()
505 def pruneDatasets(
506 self,
507 refs: Iterable[DatasetRef],
508 *,
509 disassociate: bool = True,
510 unstore: bool = False,
511 tags: Iterable[str] = (),
512 purge: bool = False,
513 ) -> None:
514 # Docstring inherited.
515 raise NotImplementedError()
517 def _get_url(self, path: str, version: str = "v1") -> str:
518 """Form the complete path to an endpoint on the server.
520 Parameters
521 ----------
522 path : `str`
523 The relative path to the server endpoint.
524 version : `str`, optional
525 Version string to prepend to path. Defaults to "v1".
527 Returns
528 -------
529 path : `str`
530 The full path to the endpoint.
531 """
532 return f"{version}/{path}"