Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%
129 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-03 16:25 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-03 16:25 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ("RemoteButler",)
30from collections.abc import Collection, Iterable, Sequence
31from contextlib import AbstractContextManager
32from typing import Any, TextIO
34import httpx
35from lsst.daf.butler import __version__
36from lsst.resources import ResourcePath, ResourcePathExpression
37from lsst.utils.introspection import get_full_type_name
39from .._butler import Butler
40from .._butler_config import ButlerConfig
41from .._config import Config
42from .._dataset_existence import DatasetExistence
43from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef
44from .._dataset_type import DatasetType, SerializedDatasetType
45from .._deferredDatasetHandle import DeferredDatasetHandle
46from .._file_dataset import FileDataset
47from .._limited_butler import LimitedButler
48from .._storage_class import StorageClass
49from .._timespan import Timespan
50from ..datastore import DatasetRefURIs
51from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate
52from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults
53from ..registry.wildcards import CollectionWildcard
54from ..transfers import RepoExportContext
55from ._config import RemoteButlerConfigModel
56from .server import FindDatasetModel
59class RemoteButler(Butler):
60 def __init__(
61 self,
62 # These parameters are inherited from the Butler() constructor
63 config: Config | ResourcePathExpression | None = None,
64 *,
65 collections: Any = None,
66 run: str | None = None,
67 searchPaths: Sequence[ResourcePathExpression] | None = None,
68 writeable: bool | None = None,
69 inferDefaults: bool = True,
70 # Parameters unique to RemoteButler
71 http_client: httpx.Client | None = None,
72 **kwargs: Any,
73 ):
74 butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
75 self._config = RemoteButlerConfigModel.model_validate(butler_config)
76 self._dimensions: DimensionUniverse | None = None
77 # TODO: RegistryDefaults should have finish() called on it, but this
78 # requires getCollectionSummary() which is not yet implemented
79 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs)
81 if http_client is not None:
82 # We have injected a client explicitly in to the class.
83 # This is generally done for testing.
84 self._client = http_client
85 else:
86 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
87 self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url))
89 def isWriteable(self) -> bool:
90 # Docstring inherited.
91 return False
93 @property
94 def dimensions(self) -> DimensionUniverse:
95 # Docstring inherited.
96 if self._dimensions is not None:
97 return self._dimensions
99 response = self._client.get(self._get_url("universe"))
100 response.raise_for_status()
102 config = DimensionConfig.fromString(response.text, format="json")
103 self._dimensions = DimensionUniverse(config)
104 return self._dimensions
106 def _simplify_dataId(
107 self, dataId: DataId | None, **kwargs: dict[str, int | str]
108 ) -> SerializedDataCoordinate | None:
109 """Take a generic Data ID and convert it to a serializable form.
111 Parameters
112 ----------
113 dataId : `dict`, `None`, `DataCoordinate`
114 The data ID to serialize.
115 **kwargs : `dict`
116 Additional values that should be included if this is not
117 a `DataCoordinate`.
119 Returns
120 -------
121 data_id : `SerializedDataCoordinate` or `None`
122 A serializable form.
123 """
124 if dataId is None and not kwargs:
125 return None
126 if isinstance(dataId, DataCoordinate):
127 return dataId.to_simple()
129 if dataId is None:
130 data_id = kwargs
131 elif kwargs:
132 # Change variable because DataId is immutable and mypy complains.
133 data_id = dict(dataId)
134 data_id.update(kwargs)
136 # Assume we can treat it as a dict.
137 return SerializedDataCoordinate(dataId=data_id)
139 def transaction(self) -> AbstractContextManager[None]:
140 """Will always raise NotImplementedError.
141 Transactions are not supported by RemoteButler.
142 """
143 raise NotImplementedError()
145 def put(
146 self,
147 obj: Any,
148 datasetRefOrType: DatasetRef | DatasetType | str,
149 /,
150 dataId: DataId | None = None,
151 *,
152 run: str | None = None,
153 **kwargs: Any,
154 ) -> DatasetRef:
155 # Docstring inherited.
156 raise NotImplementedError()
158 def getDeferred(
159 self,
160 datasetRefOrType: DatasetRef | DatasetType | str,
161 /,
162 dataId: DataId | None = None,
163 *,
164 parameters: dict | None = None,
165 collections: Any = None,
166 storageClass: str | StorageClass | None = None,
167 **kwargs: Any,
168 ) -> DeferredDatasetHandle:
169 # Docstring inherited.
170 raise NotImplementedError()
172 def get(
173 self,
174 datasetRefOrType: DatasetRef | DatasetType | str,
175 /,
176 dataId: DataId | None = None,
177 *,
178 parameters: dict[str, Any] | None = None,
179 collections: Any = None,
180 storageClass: StorageClass | str | None = None,
181 **kwargs: Any,
182 ) -> Any:
183 # Docstring inherited.
184 raise NotImplementedError()
186 def getURIs(
187 self,
188 datasetRefOrType: DatasetRef | DatasetType | str,
189 /,
190 dataId: DataId | None = None,
191 *,
192 predict: bool = False,
193 collections: Any = None,
194 run: str | None = None,
195 **kwargs: Any,
196 ) -> DatasetRefURIs:
197 # Docstring inherited.
198 raise NotImplementedError()
200 def getURI(
201 self,
202 datasetRefOrType: DatasetRef | DatasetType | str,
203 /,
204 dataId: DataId | None = None,
205 *,
206 predict: bool = False,
207 collections: Any = None,
208 run: str | None = None,
209 **kwargs: Any,
210 ) -> ResourcePath:
211 # Docstring inherited.
212 raise NotImplementedError()
214 def get_dataset_type(self, name: str) -> DatasetType:
215 # In future implementation this should directly access the cache
216 # and only go to the server if the dataset type is not known.
217 path = f"dataset_type/{name}"
218 response = self._client.get(self._get_url(path))
219 if response.status_code != httpx.codes.OK:
220 content = response.json()
221 if content["exception"] == "MissingDatasetTypeError":
222 raise MissingDatasetTypeError(content["detail"])
223 response.raise_for_status()
224 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions)
226 def get_dataset(
227 self,
228 id: DatasetId,
229 storage_class: str | StorageClass | None = None,
230 dimension_records: bool = False,
231 datastore_records: bool = False,
232 ) -> DatasetRef | None:
233 path = f"dataset/{id}"
234 if isinstance(storage_class, StorageClass):
235 storage_class_name = storage_class.name
236 elif storage_class:
237 storage_class_name = storage_class
238 params: dict[str, str | bool] = {
239 "dimension_records": dimension_records,
240 "datastore_records": datastore_records,
241 }
242 if datastore_records:
243 raise ValueError("Datastore records can not yet be returned in client/server butler.")
244 if storage_class:
245 params["storage_class"] = storage_class_name
246 response = self._client.get(self._get_url(path), params=params)
247 response.raise_for_status()
248 if response.json() is None:
249 return None
250 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
252 def find_dataset(
253 self,
254 dataset_type: DatasetType | str,
255 data_id: DataId | None = None,
256 *,
257 collections: str | Sequence[str] | None = None,
258 timespan: Timespan | None = None,
259 storage_class: str | StorageClass | None = None,
260 dimension_records: bool = False,
261 datastore_records: bool = False,
262 **kwargs: Any,
263 ) -> DatasetRef | None:
264 if collections is None:
265 if not self.collections:
266 raise NoDefaultCollectionError(
267 "No collections provided to find_dataset, and no defaults from butler construction."
268 )
269 collections = self.collections
270 # Temporary hack. Assume strings for collections. In future
271 # want to construct CollectionWildcard and filter it through collection
272 # cache to generate list of collection names.
273 wildcards = CollectionWildcard.from_expression(collections)
275 if datastore_records:
276 raise ValueError("Datastore records can not yet be returned in client/server butler.")
277 if timespan:
278 raise ValueError("Timespan can not yet be used in butler client/server.")
280 if isinstance(dataset_type, DatasetType):
281 dataset_type = dataset_type.name
283 if isinstance(storage_class, StorageClass):
284 storage_class = storage_class.name
286 query = FindDatasetModel(
287 data_id=self._simplify_dataId(data_id, **kwargs),
288 collections=wildcards.strings,
289 storage_class=storage_class,
290 dimension_records=dimension_records,
291 datastore_records=datastore_records,
292 )
294 path = f"find_dataset/{dataset_type}"
295 response = self._client.post(
296 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True)
297 )
298 response.raise_for_status()
300 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)
302 def retrieveArtifacts(
303 self,
304 refs: Iterable[DatasetRef],
305 destination: ResourcePathExpression,
306 transfer: str = "auto",
307 preserve_path: bool = True,
308 overwrite: bool = False,
309 ) -> list[ResourcePath]:
310 # Docstring inherited.
311 raise NotImplementedError()
313 def exists(
314 self,
315 dataset_ref_or_type: DatasetRef | DatasetType | str,
316 /,
317 data_id: DataId | None = None,
318 *,
319 full_check: bool = True,
320 collections: Any = None,
321 **kwargs: Any,
322 ) -> DatasetExistence:
323 # Docstring inherited.
324 raise NotImplementedError()
326 def _exists_many(
327 self,
328 refs: Iterable[DatasetRef],
329 /,
330 *,
331 full_check: bool = True,
332 ) -> dict[DatasetRef, DatasetExistence]:
333 # Docstring inherited.
334 raise NotImplementedError()
336 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
337 # Docstring inherited.
338 raise NotImplementedError()
340 def ingest(
341 self,
342 *datasets: FileDataset,
343 transfer: str | None = "auto",
344 run: str | None = None,
345 idGenerationMode: DatasetIdGenEnum | None = None,
346 record_validation_info: bool = True,
347 ) -> None:
348 # Docstring inherited.
349 raise NotImplementedError()
351 def export(
352 self,
353 *,
354 directory: str | None = None,
355 filename: str | None = None,
356 format: str | None = None,
357 transfer: str | None = None,
358 ) -> AbstractContextManager[RepoExportContext]:
359 # Docstring inherited.
360 raise NotImplementedError()
362 def import_(
363 self,
364 *,
365 directory: ResourcePathExpression | None = None,
366 filename: ResourcePathExpression | TextIO | None = None,
367 format: str | None = None,
368 transfer: str | None = None,
369 skip_dimensions: set | None = None,
370 ) -> None:
371 # Docstring inherited.
372 raise NotImplementedError()
374 def transfer_from(
375 self,
376 source_butler: LimitedButler,
377 source_refs: Iterable[DatasetRef],
378 transfer: str = "auto",
379 skip_missing: bool = True,
380 register_dataset_types: bool = False,
381 transfer_dimensions: bool = False,
382 ) -> Collection[DatasetRef]:
383 # Docstring inherited.
384 raise NotImplementedError()
386 def validateConfiguration(
387 self,
388 logFailures: bool = False,
389 datasetTypeNames: Iterable[str] | None = None,
390 ignore: Iterable[str] | None = None,
391 ) -> None:
392 # Docstring inherited.
393 raise NotImplementedError()
395 @property
396 def collections(self) -> Sequence[str]:
397 # Docstring inherited.
398 return self._registry_defaults.collections
400 @property
401 def run(self) -> str | None:
402 # Docstring inherited.
403 return self._registry_defaults.run
405 @property
406 def registry(self) -> Registry:
407 # Docstring inherited.
408 raise NotImplementedError()
410 def pruneDatasets(
411 self,
412 refs: Iterable[DatasetRef],
413 *,
414 disassociate: bool = True,
415 unstore: bool = False,
416 tags: Iterable[str] = (),
417 purge: bool = False,
418 ) -> None:
419 # Docstring inherited.
420 raise NotImplementedError()
422 def _get_url(self, path: str, version: str = "v1") -> str:
423 """Form the complete path to an endpoint on the server
425 Parameters
426 ----------
427 path : `str`
428 The relative path to the server endpoint. Should not include the
429 "/butler" prefix.
430 version : `str`, optional
431 Version string to prepend to path. Defaults to "v1".
433 Returns
434 -------
435 path : `str`
436 The full path to the endpoint
437 """
438 prefix = "butler"
439 return f"{prefix}/{version}/{path}"