Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%

139 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 10:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("RemoteButler",) 

31 

32from collections.abc import Collection, Iterable, Mapping, Sequence 

33from contextlib import AbstractContextManager 

34from typing import TYPE_CHECKING, Any, TextIO 

35 

36import httpx 

37from lsst.daf.butler import __version__ 

38from lsst.daf.butler.repo_relocation import replaceRoot 

39from lsst.resources import ResourcePath, ResourcePathExpression 

40from lsst.utils.introspection import get_full_type_name 

41 

42from .._butler import Butler 

43from .._butler_config import ButlerConfig 

44from .._dataset_ref import DatasetRef, SerializedDatasetRef 

45from .._dataset_type import DatasetType, SerializedDatasetType 

46from .._storage_class import StorageClass 

47from ..dimensions import DataCoordinate, DimensionConfig, DimensionUniverse, SerializedDataCoordinate 

48from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, RegistryDefaults 

49from ..registry.wildcards import CollectionWildcard 

50from ._authentication import get_authentication_headers, get_authentication_token_from_environment 

51from ._config import RemoteButlerConfigModel 

52from .server_models import FindDatasetModel 

53 

54if TYPE_CHECKING: 

55 from .._config import Config 

56 from .._dataset_existence import DatasetExistence 

57 from .._dataset_ref import DatasetId, DatasetIdGenEnum 

58 from .._deferredDatasetHandle import DeferredDatasetHandle 

59 from .._file_dataset import FileDataset 

60 from .._limited_butler import LimitedButler 

61 from .._query import Query 

62 from .._timespan import Timespan 

63 from ..datastore import DatasetRefURIs 

64 from ..dimensions import DataId, DimensionGroup, DimensionRecord 

65 from ..registry import CollectionArgType, Registry 

66 from ..transfers import RepoExportContext 

67 

68 

69class RemoteButler(Butler): 

70 def __init__( 

71 self, 

72 # These parameters are inherited from the Butler() constructor 

73 config: Config | ResourcePathExpression | None = None, 

74 *, 

75 collections: Any = None, 

76 run: str | None = None, 

77 searchPaths: Sequence[ResourcePathExpression] | None = None, 

78 writeable: bool | None = None, 

79 inferDefaults: bool = True, 

80 # Parameters unique to RemoteButler 

81 http_client: httpx.Client | None = None, 

82 access_token: str | None = None, 

83 **kwargs: Any, 

84 ): 

85 butler_config = ButlerConfig(config, searchPaths, without_datastore=True) 

86 # There is a convention in Butler config files where <butlerRoot> in a 

87 # configuration option refers to the directory containing the 

88 # configuration file. We allow this for the remote butler's URL so 

89 # that the server doesn't have to know which hostname it is being 

90 # accessed from. 

91 server_url_key = ("remote_butler", "url") 

92 if server_url_key in butler_config: 

93 butler_config[server_url_key] = replaceRoot( 

94 butler_config[server_url_key], butler_config.configDir 

95 ) 

96 self._config = RemoteButlerConfigModel.model_validate(butler_config) 

97 

98 self._dimensions: DimensionUniverse | None = None 

99 # TODO: RegistryDefaults should have finish() called on it, but this 

100 # requires getCollectionSummary() which is not yet implemented 

101 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs) 

102 

103 if http_client is not None: 

104 # We have injected a client explicitly in to the class. 

105 # This is generally done for testing. 

106 self._client = http_client 

107 else: 

108 server_url = str(self._config.remote_butler.url) 

109 auth_headers = {} 

110 if access_token is None: 

111 access_token = get_authentication_token_from_environment(server_url) 

112 if access_token is not None: 

113 auth_headers = get_authentication_headers(access_token) 

114 

115 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} 

116 headers.update(auth_headers) 

117 self._client = httpx.Client(headers=headers, base_url=server_url) 

118 

119 def isWriteable(self) -> bool: 

120 # Docstring inherited. 

121 return False 

122 

123 @property 

124 def dimensions(self) -> DimensionUniverse: 

125 # Docstring inherited. 

126 if self._dimensions is not None: 

127 return self._dimensions 

128 

129 response = self._client.get(self._get_url("universe")) 

130 response.raise_for_status() 

131 

132 config = DimensionConfig.fromString(response.text, format="json") 

133 self._dimensions = DimensionUniverse(config) 

134 return self._dimensions 

135 

136 def _simplify_dataId( 

137 self, dataId: DataId | None, **kwargs: dict[str, int | str] 

138 ) -> SerializedDataCoordinate | None: 

139 """Take a generic Data ID and convert it to a serializable form. 

140 

141 Parameters 

142 ---------- 

143 dataId : `dict`, `None`, `DataCoordinate` 

144 The data ID to serialize. 

145 **kwargs : `dict` 

146 Additional values that should be included if this is not 

147 a `DataCoordinate`. 

148 

149 Returns 

150 ------- 

151 data_id : `SerializedDataCoordinate` or `None` 

152 A serializable form. 

153 """ 

154 if dataId is None and not kwargs: 

155 return None 

156 if isinstance(dataId, DataCoordinate): 

157 return dataId.to_simple() 

158 

159 if dataId is None: 

160 data_id = kwargs 

161 elif kwargs: 

162 # Change variable because DataId is immutable and mypy complains. 

163 data_id = dict(dataId) 

164 data_id.update(kwargs) 

165 

166 # Assume we can treat it as a dict. 

167 return SerializedDataCoordinate(dataId=data_id) 

168 

169 def _caching_context(self) -> AbstractContextManager[None]: 

170 # Docstring inherited. 

171 # Not implemented for now, will have to think whether this needs to 

172 # do something on client side and/or remote side. 

173 raise NotImplementedError() 

174 

175 def transaction(self) -> AbstractContextManager[None]: 

176 """Will always raise NotImplementedError. 

177 Transactions are not supported by RemoteButler. 

178 """ 

179 raise NotImplementedError() 

180 

181 def put( 

182 self, 

183 obj: Any, 

184 datasetRefOrType: DatasetRef | DatasetType | str, 

185 /, 

186 dataId: DataId | None = None, 

187 *, 

188 run: str | None = None, 

189 **kwargs: Any, 

190 ) -> DatasetRef: 

191 # Docstring inherited. 

192 raise NotImplementedError() 

193 

194 def getDeferred( 

195 self, 

196 datasetRefOrType: DatasetRef | DatasetType | str, 

197 /, 

198 dataId: DataId | None = None, 

199 *, 

200 parameters: dict | None = None, 

201 collections: Any = None, 

202 storageClass: str | StorageClass | None = None, 

203 **kwargs: Any, 

204 ) -> DeferredDatasetHandle: 

205 # Docstring inherited. 

206 raise NotImplementedError() 

207 

208 def get( 

209 self, 

210 datasetRefOrType: DatasetRef | DatasetType | str, 

211 /, 

212 dataId: DataId | None = None, 

213 *, 

214 parameters: dict[str, Any] | None = None, 

215 collections: Any = None, 

216 storageClass: StorageClass | str | None = None, 

217 **kwargs: Any, 

218 ) -> Any: 

219 # Docstring inherited. 

220 raise NotImplementedError() 

221 

222 def getURIs( 

223 self, 

224 datasetRefOrType: DatasetRef | DatasetType | str, 

225 /, 

226 dataId: DataId | None = None, 

227 *, 

228 predict: bool = False, 

229 collections: Any = None, 

230 run: str | None = None, 

231 **kwargs: Any, 

232 ) -> DatasetRefURIs: 

233 # Docstring inherited. 

234 raise NotImplementedError() 

235 

236 def getURI( 

237 self, 

238 datasetRefOrType: DatasetRef | DatasetType | str, 

239 /, 

240 dataId: DataId | None = None, 

241 *, 

242 predict: bool = False, 

243 collections: Any = None, 

244 run: str | None = None, 

245 **kwargs: Any, 

246 ) -> ResourcePath: 

247 # Docstring inherited. 

248 raise NotImplementedError() 

249 

250 def get_dataset_type(self, name: str) -> DatasetType: 

251 # In future implementation this should directly access the cache 

252 # and only go to the server if the dataset type is not known. 

253 path = f"dataset_type/{name}" 

254 response = self._client.get(self._get_url(path)) 

255 if response.status_code != httpx.codes.OK: 

256 content = response.json() 

257 if content["exception"] == "MissingDatasetTypeError": 

258 raise MissingDatasetTypeError(content["detail"]) 

259 response.raise_for_status() 

260 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions) 

261 

262 def get_dataset( 

263 self, 

264 id: DatasetId, 

265 storage_class: str | StorageClass | None = None, 

266 dimension_records: bool = False, 

267 datastore_records: bool = False, 

268 ) -> DatasetRef | None: 

269 path = f"dataset/{id}" 

270 if isinstance(storage_class, StorageClass): 

271 storage_class_name = storage_class.name 

272 elif storage_class: 

273 storage_class_name = storage_class 

274 params: dict[str, str | bool] = { 

275 "dimension_records": dimension_records, 

276 "datastore_records": datastore_records, 

277 } 

278 if datastore_records: 

279 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

280 if storage_class: 

281 params["storage_class"] = storage_class_name 

282 response = self._client.get(self._get_url(path), params=params) 

283 response.raise_for_status() 

284 if response.json() is None: 

285 return None 

286 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

287 

288 def find_dataset( 

289 self, 

290 dataset_type: DatasetType | str, 

291 data_id: DataId | None = None, 

292 *, 

293 collections: str | Sequence[str] | None = None, 

294 timespan: Timespan | None = None, 

295 storage_class: str | StorageClass | None = None, 

296 dimension_records: bool = False, 

297 datastore_records: bool = False, 

298 **kwargs: Any, 

299 ) -> DatasetRef | None: 

300 if collections is None: 

301 if not self.collections: 

302 raise NoDefaultCollectionError( 

303 "No collections provided to find_dataset, and no defaults from butler construction." 

304 ) 

305 collections = self.collections 

306 # Temporary hack. Assume strings for collections. In future 

307 # want to construct CollectionWildcard and filter it through collection 

308 # cache to generate list of collection names. 

309 wildcards = CollectionWildcard.from_expression(collections) 

310 

311 if datastore_records: 

312 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

313 if timespan: 

314 raise ValueError("Timespan can not yet be used in butler client/server.") 

315 

316 if isinstance(dataset_type, DatasetType): 

317 dataset_type = dataset_type.name 

318 

319 if isinstance(storage_class, StorageClass): 

320 storage_class = storage_class.name 

321 

322 query = FindDatasetModel( 

323 data_id=self._simplify_dataId(data_id, **kwargs), 

324 collections=wildcards.strings, 

325 storage_class=storage_class, 

326 dimension_records=dimension_records, 

327 datastore_records=datastore_records, 

328 ) 

329 

330 path = f"find_dataset/{dataset_type}" 

331 response = self._client.post( 

332 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True) 

333 ) 

334 response.raise_for_status() 

335 

336 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

337 

338 def retrieveArtifacts( 

339 self, 

340 refs: Iterable[DatasetRef], 

341 destination: ResourcePathExpression, 

342 transfer: str = "auto", 

343 preserve_path: bool = True, 

344 overwrite: bool = False, 

345 ) -> list[ResourcePath]: 

346 # Docstring inherited. 

347 raise NotImplementedError() 

348 

349 def exists( 

350 self, 

351 dataset_ref_or_type: DatasetRef | DatasetType | str, 

352 /, 

353 data_id: DataId | None = None, 

354 *, 

355 full_check: bool = True, 

356 collections: Any = None, 

357 **kwargs: Any, 

358 ) -> DatasetExistence: 

359 # Docstring inherited. 

360 raise NotImplementedError() 

361 

362 def _exists_many( 

363 self, 

364 refs: Iterable[DatasetRef], 

365 /, 

366 *, 

367 full_check: bool = True, 

368 ) -> dict[DatasetRef, DatasetExistence]: 

369 # Docstring inherited. 

370 raise NotImplementedError() 

371 

372 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

373 # Docstring inherited. 

374 raise NotImplementedError() 

375 

376 def ingest( 

377 self, 

378 *datasets: FileDataset, 

379 transfer: str | None = "auto", 

380 run: str | None = None, 

381 idGenerationMode: DatasetIdGenEnum | None = None, 

382 record_validation_info: bool = True, 

383 ) -> None: 

384 # Docstring inherited. 

385 raise NotImplementedError() 

386 

387 def export( 

388 self, 

389 *, 

390 directory: str | None = None, 

391 filename: str | None = None, 

392 format: str | None = None, 

393 transfer: str | None = None, 

394 ) -> AbstractContextManager[RepoExportContext]: 

395 # Docstring inherited. 

396 raise NotImplementedError() 

397 

398 def import_( 

399 self, 

400 *, 

401 directory: ResourcePathExpression | None = None, 

402 filename: ResourcePathExpression | TextIO | None = None, 

403 format: str | None = None, 

404 transfer: str | None = None, 

405 skip_dimensions: set | None = None, 

406 ) -> None: 

407 # Docstring inherited. 

408 raise NotImplementedError() 

409 

410 def transfer_dimension_records_from( 

411 self, source_butler: LimitedButler | Butler, source_refs: Iterable[DatasetRef] 

412 ) -> None: 

413 # Docstring inherited. 

414 raise NotImplementedError() 

415 

416 def transfer_from( 

417 self, 

418 source_butler: LimitedButler, 

419 source_refs: Iterable[DatasetRef], 

420 transfer: str = "auto", 

421 skip_missing: bool = True, 

422 register_dataset_types: bool = False, 

423 transfer_dimensions: bool = False, 

424 ) -> Collection[DatasetRef]: 

425 # Docstring inherited. 

426 raise NotImplementedError() 

427 

428 def validateConfiguration( 

429 self, 

430 logFailures: bool = False, 

431 datasetTypeNames: Iterable[str] | None = None, 

432 ignore: Iterable[str] | None = None, 

433 ) -> None: 

434 # Docstring inherited. 

435 raise NotImplementedError() 

436 

437 @property 

438 def collections(self) -> Sequence[str]: 

439 # Docstring inherited. 

440 return self._registry_defaults.collections 

441 

442 @property 

443 def run(self) -> str | None: 

444 # Docstring inherited. 

445 return self._registry_defaults.run 

446 

447 @property 

448 def registry(self) -> Registry: 

449 # Docstring inherited. 

450 raise NotImplementedError() 

451 

452 def _query(self) -> AbstractContextManager[Query]: 

453 # Docstring inherited. 

454 raise NotImplementedError() 

455 

456 def _query_data_ids( 

457 self, 

458 dimensions: DimensionGroup | Iterable[str] | str, 

459 *, 

460 data_id: DataId | None = None, 

461 where: str = "", 

462 bind: Mapping[str, Any] | None = None, 

463 expanded: bool = False, 

464 order_by: Iterable[str] | str | None = None, 

465 limit: int | None = None, 

466 offset: int | None = None, 

467 explain: bool = True, 

468 **kwargs: Any, 

469 ) -> list[DataCoordinate]: 

470 # Docstring inherited. 

471 raise NotImplementedError() 

472 

473 def _query_datasets( 

474 self, 

475 dataset_type: Any, 

476 collections: CollectionArgType | None = None, 

477 *, 

478 find_first: bool = True, 

479 data_id: DataId | None = None, 

480 where: str = "", 

481 bind: Mapping[str, Any] | None = None, 

482 expanded: bool = False, 

483 explain: bool = True, 

484 **kwargs: Any, 

485 ) -> list[DatasetRef]: 

486 # Docstring inherited. 

487 raise NotImplementedError() 

488 

489 def _query_dimension_records( 

490 self, 

491 element: str, 

492 *, 

493 data_id: DataId | None = None, 

494 where: str = "", 

495 bind: Mapping[str, Any] | None = None, 

496 order_by: Iterable[str] | str | None = None, 

497 limit: int | None = None, 

498 offset: int | None = None, 

499 explain: bool = True, 

500 **kwargs: Any, 

501 ) -> list[DimensionRecord]: 

502 # Docstring inherited. 

503 raise NotImplementedError() 

504 

505 def pruneDatasets( 

506 self, 

507 refs: Iterable[DatasetRef], 

508 *, 

509 disassociate: bool = True, 

510 unstore: bool = False, 

511 tags: Iterable[str] = (), 

512 purge: bool = False, 

513 ) -> None: 

514 # Docstring inherited. 

515 raise NotImplementedError() 

516 

517 def _get_url(self, path: str, version: str = "v1") -> str: 

518 """Form the complete path to an endpoint on the server. 

519 

520 Parameters 

521 ---------- 

522 path : `str` 

523 The relative path to the server endpoint. 

524 version : `str`, optional 

525 Version string to prepend to path. Defaults to "v1". 

526 

527 Returns 

528 ------- 

529 path : `str` 

530 The full path to the endpoint. 

531 """ 

532 return f"{version}/{path}"