Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%
141 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 10:59 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 10:59 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ("RemoteButler",)
30from collections.abc import Collection, Iterable, Sequence
31from contextlib import AbstractContextManager
32from typing import Any, TextIO
34import httpx
35from lsst.daf.butler import __version__
36from lsst.daf.butler.repo_relocation import replaceRoot
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.utils.introspection import get_full_type_name
40from .._butler import Butler
41from .._butler_config import ButlerConfig
42from .._config import Config
43from .._dataset_existence import DatasetExistence
44from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef
45from .._dataset_type import DatasetType, SerializedDatasetType
46from .._deferredDatasetHandle import DeferredDatasetHandle
47from .._file_dataset import FileDataset
48from .._limited_butler import LimitedButler
49from .._storage_class import StorageClass
50from .._timespan import Timespan
51from ..datastore import DatasetRefURIs
52from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate
53from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults
54from ..registry.wildcards import CollectionWildcard
55from ..transfers import RepoExportContext
56from ._authentication import get_authentication_headers, get_authentication_token_from_environment
57from ._config import RemoteButlerConfigModel
58from .server_models import FindDatasetModel
61class RemoteButler(Butler):
62 def __init__(
63 self,
64 # These parameters are inherited from the Butler() constructor
65 config: Config | ResourcePathExpression | None = None,
66 *,
67 collections: Any = None,
68 run: str | None = None,
69 searchPaths: Sequence[ResourcePathExpression] | None = None,
70 writeable: bool | None = None,
71 inferDefaults: bool = True,
72 # Parameters unique to RemoteButler
73 http_client: httpx.Client | None = None,
74 access_token: str | None = None,
75 **kwargs: Any,
76 ):
77 butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
78 # There is a convention in Butler config files where <butlerRoot> in a
79 # configuration option refers to the directory containing the
80 # configuration file. We allow this for the remote butler's URL so
81 # that the server doesn't have to know which hostname it is being
82 # accessed from.
83 server_url_key = ("remote_butler", "url")
84 if server_url_key in butler_config:
85 butler_config[server_url_key] = replaceRoot(
86 butler_config[server_url_key], butler_config.configDir
87 )
88 self._config = RemoteButlerConfigModel.model_validate(butler_config)
90 self._dimensions: DimensionUniverse | None = None
91 # TODO: RegistryDefaults should have finish() called on it, but this
92 # requires getCollectionSummary() which is not yet implemented
93 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs)
95 if http_client is not None:
96 # We have injected a client explicitly in to the class.
97 # This is generally done for testing.
98 self._client = http_client
99 else:
100 server_url = str(self._config.remote_butler.url)
101 auth_headers = {}
102 if access_token is None:
103 access_token = get_authentication_token_from_environment(server_url)
104 if access_token is not None:
105 auth_headers = get_authentication_headers(access_token)
107 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
108 headers.update(auth_headers)
109 self._client = httpx.Client(headers=headers, base_url=server_url)
111 def isWriteable(self) -> bool:
112 # Docstring inherited.
113 return False
115 @property
116 def dimensions(self) -> DimensionUniverse:
117 # Docstring inherited.
118 if self._dimensions is not None:
119 return self._dimensions
121 response = self._client.get(self._get_url("universe"))
122 response.raise_for_status()
124 config = DimensionConfig.fromString(response.text, format="json")
125 self._dimensions = DimensionUniverse(config)
126 return self._dimensions
128 def _simplify_dataId(
129 self, dataId: DataId | None, **kwargs: dict[str, int | str]
130 ) -> SerializedDataCoordinate | None:
131 """Take a generic Data ID and convert it to a serializable form.
133 Parameters
134 ----------
135 dataId : `dict`, `None`, `DataCoordinate`
136 The data ID to serialize.
137 **kwargs : `dict`
138 Additional values that should be included if this is not
139 a `DataCoordinate`.
141 Returns
142 -------
143 data_id : `SerializedDataCoordinate` or `None`
144 A serializable form.
145 """
146 if dataId is None and not kwargs:
147 return None
148 if isinstance(dataId, DataCoordinate):
149 return dataId.to_simple()
151 if dataId is None:
152 data_id = kwargs
153 elif kwargs:
154 # Change variable because DataId is immutable and mypy complains.
155 data_id = dict(dataId)
156 data_id.update(kwargs)
158 # Assume we can treat it as a dict.
159 return SerializedDataCoordinate(dataId=data_id)
161 def _caching_context(self) -> AbstractContextManager[None]:
162 # Docstring inherited.
163 # Not implemented for now, will have to think whether this needs to
164 # do something on client side and/or remote side.
165 raise NotImplementedError()
167 def transaction(self) -> AbstractContextManager[None]:
168 """Will always raise NotImplementedError.
169 Transactions are not supported by RemoteButler.
170 """
171 raise NotImplementedError()
173 def put(
174 self,
175 obj: Any,
176 datasetRefOrType: DatasetRef | DatasetType | str,
177 /,
178 dataId: DataId | None = None,
179 *,
180 run: str | None = None,
181 **kwargs: Any,
182 ) -> DatasetRef:
183 # Docstring inherited.
184 raise NotImplementedError()
186 def getDeferred(
187 self,
188 datasetRefOrType: DatasetRef | DatasetType | str,
189 /,
190 dataId: DataId | None = None,
191 *,
192 parameters: dict | None = None,
193 collections: Any = None,
194 storageClass: str | StorageClass | None = None,
195 **kwargs: Any,
196 ) -> DeferredDatasetHandle:
197 # Docstring inherited.
198 raise NotImplementedError()
200 def get(
201 self,
202 datasetRefOrType: DatasetRef | DatasetType | str,
203 /,
204 dataId: DataId | None = None,
205 *,
206 parameters: dict[str, Any] | None = None,
207 collections: Any = None,
208 storageClass: StorageClass | str | None = None,
209 **kwargs: Any,
210 ) -> Any:
211 # Docstring inherited.
212 raise NotImplementedError()
214 def getURIs(
215 self,
216 datasetRefOrType: DatasetRef | DatasetType | str,
217 /,
218 dataId: DataId | None = None,
219 *,
220 predict: bool = False,
221 collections: Any = None,
222 run: str | None = None,
223 **kwargs: Any,
224 ) -> DatasetRefURIs:
225 # Docstring inherited.
226 raise NotImplementedError()
228 def getURI(
229 self,
230 datasetRefOrType: DatasetRef | DatasetType | str,
231 /,
232 dataId: DataId | None = None,
233 *,
234 predict: bool = False,
235 collections: Any = None,
236 run: str | None = None,
237 **kwargs: Any,
238 ) -> ResourcePath:
239 # Docstring inherited.
240 raise NotImplementedError()
242 def get_dataset_type(self, name: str) -> DatasetType:
243 # In future implementation this should directly access the cache
244 # and only go to the server if the dataset type is not known.
245 path = f"dataset_type/{name}"
246 response = self._client.get(self._get_url(path))
247 if response.status_code != httpx.codes.OK:
248 content = response.json()
249 if content["exception"] == "MissingDatasetTypeError":
250 raise MissingDatasetTypeError(content["detail"])
251 response.raise_for_status()
252 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions)
254 def get_dataset(
255 self,
256 id: DatasetId,
257 storage_class: str | StorageClass | None = None,
258 dimension_records: bool = False,
259 datastore_records: bool = False,
260 ) -> DatasetRef | None:
261 path = f"dataset/{id}"
262 if isinstance(storage_class, StorageClass):
263 storage_class_name = storage_class.name
264 elif storage_class:
265 storage_class_name = storage_class
266 params: dict[str, str | bool] = {
267 "dimension_records": dimension_records,
268 "datastore_records": datastore_records,
269 }
270 if datastore_records:
271 raise ValueError("Datastore records can not yet be returned in client/server butler.")
272 if storage_class:
273 params["storage_class"] = storage_class_name
274 response = self._client.get(self._get_url(path), params=params)
275 response.raise_for_status()
276 if response.json() is None:
277 return None
278 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
280 def find_dataset(
281 self,
282 dataset_type: DatasetType | str,
283 data_id: DataId | None = None,
284 *,
285 collections: str | Sequence[str] | None = None,
286 timespan: Timespan | None = None,
287 storage_class: str | StorageClass | None = None,
288 dimension_records: bool = False,
289 datastore_records: bool = False,
290 **kwargs: Any,
291 ) -> DatasetRef | None:
292 if collections is None:
293 if not self.collections:
294 raise NoDefaultCollectionError(
295 "No collections provided to find_dataset, and no defaults from butler construction."
296 )
297 collections = self.collections
298 # Temporary hack. Assume strings for collections. In future
299 # want to construct CollectionWildcard and filter it through collection
300 # cache to generate list of collection names.
301 wildcards = CollectionWildcard.from_expression(collections)
303 if datastore_records:
304 raise ValueError("Datastore records can not yet be returned in client/server butler.")
305 if timespan:
306 raise ValueError("Timespan can not yet be used in butler client/server.")
308 if isinstance(dataset_type, DatasetType):
309 dataset_type = dataset_type.name
311 if isinstance(storage_class, StorageClass):
312 storage_class = storage_class.name
314 query = FindDatasetModel(
315 data_id=self._simplify_dataId(data_id, **kwargs),
316 collections=wildcards.strings,
317 storage_class=storage_class,
318 dimension_records=dimension_records,
319 datastore_records=datastore_records,
320 )
322 path = f"find_dataset/{dataset_type}"
323 response = self._client.post(
324 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True)
325 )
326 response.raise_for_status()
328 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
330 def retrieveArtifacts(
331 self,
332 refs: Iterable[DatasetRef],
333 destination: ResourcePathExpression,
334 transfer: str = "auto",
335 preserve_path: bool = True,
336 overwrite: bool = False,
337 ) -> list[ResourcePath]:
338 # Docstring inherited.
339 raise NotImplementedError()
341 def exists(
342 self,
343 dataset_ref_or_type: DatasetRef | DatasetType | str,
344 /,
345 data_id: DataId | None = None,
346 *,
347 full_check: bool = True,
348 collections: Any = None,
349 **kwargs: Any,
350 ) -> DatasetExistence:
351 # Docstring inherited.
352 raise NotImplementedError()
354 def _exists_many(
355 self,
356 refs: Iterable[DatasetRef],
357 /,
358 *,
359 full_check: bool = True,
360 ) -> dict[DatasetRef, DatasetExistence]:
361 # Docstring inherited.
362 raise NotImplementedError()
364 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
365 # Docstring inherited.
366 raise NotImplementedError()
368 def ingest(
369 self,
370 *datasets: FileDataset,
371 transfer: str | None = "auto",
372 run: str | None = None,
373 idGenerationMode: DatasetIdGenEnum | None = None,
374 record_validation_info: bool = True,
375 ) -> None:
376 # Docstring inherited.
377 raise NotImplementedError()
379 def export(
380 self,
381 *,
382 directory: str | None = None,
383 filename: str | None = None,
384 format: str | None = None,
385 transfer: str | None = None,
386 ) -> AbstractContextManager[RepoExportContext]:
387 # Docstring inherited.
388 raise NotImplementedError()
390 def import_(
391 self,
392 *,
393 directory: ResourcePathExpression | None = None,
394 filename: ResourcePathExpression | TextIO | None = None,
395 format: str | None = None,
396 transfer: str | None = None,
397 skip_dimensions: set | None = None,
398 ) -> None:
399 # Docstring inherited.
400 raise NotImplementedError()
402 def transfer_from(
403 self,
404 source_butler: LimitedButler,
405 source_refs: Iterable[DatasetRef],
406 transfer: str = "auto",
407 skip_missing: bool = True,
408 register_dataset_types: bool = False,
409 transfer_dimensions: bool = False,
410 ) -> Collection[DatasetRef]:
411 # Docstring inherited.
412 raise NotImplementedError()
414 def validateConfiguration(
415 self,
416 logFailures: bool = False,
417 datasetTypeNames: Iterable[str] | None = None,
418 ignore: Iterable[str] | None = None,
419 ) -> None:
420 # Docstring inherited.
421 raise NotImplementedError()
423 @property
424 def collections(self) -> Sequence[str]:
425 # Docstring inherited.
426 return self._registry_defaults.collections
428 @property
429 def run(self) -> str | None:
430 # Docstring inherited.
431 return self._registry_defaults.run
433 @property
434 def registry(self) -> Registry:
435 # Docstring inherited.
436 raise NotImplementedError()
438 def pruneDatasets(
439 self,
440 refs: Iterable[DatasetRef],
441 *,
442 disassociate: bool = True,
443 unstore: bool = False,
444 tags: Iterable[str] = (),
445 purge: bool = False,
446 ) -> None:
447 # Docstring inherited.
448 raise NotImplementedError()
450 def _get_url(self, path: str, version: str = "v1") -> str:
451 """Form the complete path to an endpoint on the server.
453 Parameters
454 ----------
455 path : `str`
456 The relative path to the server endpoint.
457 version : `str`, optional
458 Version string to prepend to path. Defaults to "v1".
460 Returns
461 -------
462 path : `str`
463 The full path to the endpoint.
464 """
465 return f"{version}/{path}"