Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%

141 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 10:59 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ("RemoteButler",) 

29 

30from collections.abc import Collection, Iterable, Sequence 

31from contextlib import AbstractContextManager 

32from typing import Any, TextIO 

33 

34import httpx 

35from lsst.daf.butler import __version__ 

36from lsst.daf.butler.repo_relocation import replaceRoot 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils.introspection import get_full_type_name 

39 

40from .._butler import Butler 

41from .._butler_config import ButlerConfig 

42from .._config import Config 

43from .._dataset_existence import DatasetExistence 

44from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef 

45from .._dataset_type import DatasetType, SerializedDatasetType 

46from .._deferredDatasetHandle import DeferredDatasetHandle 

47from .._file_dataset import FileDataset 

48from .._limited_butler import LimitedButler 

49from .._storage_class import StorageClass 

50from .._timespan import Timespan 

51from ..datastore import DatasetRefURIs 

52from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate 

53from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults 

54from ..registry.wildcards import CollectionWildcard 

55from ..transfers import RepoExportContext 

56from ._authentication import get_authentication_headers, get_authentication_token_from_environment 

57from ._config import RemoteButlerConfigModel 

58from .server_models import FindDatasetModel 

59 

60 

61class RemoteButler(Butler): 

62 def __init__( 

63 self, 

64 # These parameters are inherited from the Butler() constructor 

65 config: Config | ResourcePathExpression | None = None, 

66 *, 

67 collections: Any = None, 

68 run: str | None = None, 

69 searchPaths: Sequence[ResourcePathExpression] | None = None, 

70 writeable: bool | None = None, 

71 inferDefaults: bool = True, 

72 # Parameters unique to RemoteButler 

73 http_client: httpx.Client | None = None, 

74 access_token: str | None = None, 

75 **kwargs: Any, 

76 ): 

77 butler_config = ButlerConfig(config, searchPaths, without_datastore=True) 

78 # There is a convention in Butler config files where <butlerRoot> in a 

79 # configuration option refers to the directory containing the 

80 # configuration file. We allow this for the remote butler's URL so 

81 # that the server doesn't have to know which hostname it is being 

82 # accessed from. 

83 server_url_key = ("remote_butler", "url") 

84 if server_url_key in butler_config: 

85 butler_config[server_url_key] = replaceRoot( 

86 butler_config[server_url_key], butler_config.configDir 

87 ) 

88 self._config = RemoteButlerConfigModel.model_validate(butler_config) 

89 

90 self._dimensions: DimensionUniverse | None = None 

91 # TODO: RegistryDefaults should have finish() called on it, but this 

92 # requires getCollectionSummary() which is not yet implemented 

93 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs) 

94 

95 if http_client is not None: 

96 # We have injected a client explicitly in to the class. 

97 # This is generally done for testing. 

98 self._client = http_client 

99 else: 

100 server_url = str(self._config.remote_butler.url) 

101 auth_headers = {} 

102 if access_token is None: 

103 access_token = get_authentication_token_from_environment(server_url) 

104 if access_token is not None: 

105 auth_headers = get_authentication_headers(access_token) 

106 

107 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} 

108 headers.update(auth_headers) 

109 self._client = httpx.Client(headers=headers, base_url=server_url) 

110 

111 def isWriteable(self) -> bool: 

112 # Docstring inherited. 

113 return False 

114 

115 @property 

116 def dimensions(self) -> DimensionUniverse: 

117 # Docstring inherited. 

118 if self._dimensions is not None: 

119 return self._dimensions 

120 

121 response = self._client.get(self._get_url("universe")) 

122 response.raise_for_status() 

123 

124 config = DimensionConfig.fromString(response.text, format="json") 

125 self._dimensions = DimensionUniverse(config) 

126 return self._dimensions 

127 

128 def _simplify_dataId( 

129 self, dataId: DataId | None, **kwargs: dict[str, int | str] 

130 ) -> SerializedDataCoordinate | None: 

131 """Take a generic Data ID and convert it to a serializable form. 

132 

133 Parameters 

134 ---------- 

135 dataId : `dict`, `None`, `DataCoordinate` 

136 The data ID to serialize. 

137 **kwargs : `dict` 

138 Additional values that should be included if this is not 

139 a `DataCoordinate`. 

140 

141 Returns 

142 ------- 

143 data_id : `SerializedDataCoordinate` or `None` 

144 A serializable form. 

145 """ 

146 if dataId is None and not kwargs: 

147 return None 

148 if isinstance(dataId, DataCoordinate): 

149 return dataId.to_simple() 

150 

151 if dataId is None: 

152 data_id = kwargs 

153 elif kwargs: 

154 # Change variable because DataId is immutable and mypy complains. 

155 data_id = dict(dataId) 

156 data_id.update(kwargs) 

157 

158 # Assume we can treat it as a dict. 

159 return SerializedDataCoordinate(dataId=data_id) 

160 

161 def _caching_context(self) -> AbstractContextManager[None]: 

162 # Docstring inherited. 

163 # Not implemented for now, will have to think whether this needs to 

164 # do something on client side and/or remote side. 

165 raise NotImplementedError() 

166 

167 def transaction(self) -> AbstractContextManager[None]: 

168 """Will always raise NotImplementedError. 

169 Transactions are not supported by RemoteButler. 

170 """ 

171 raise NotImplementedError() 

172 

173 def put( 

174 self, 

175 obj: Any, 

176 datasetRefOrType: DatasetRef | DatasetType | str, 

177 /, 

178 dataId: DataId | None = None, 

179 *, 

180 run: str | None = None, 

181 **kwargs: Any, 

182 ) -> DatasetRef: 

183 # Docstring inherited. 

184 raise NotImplementedError() 

185 

186 def getDeferred( 

187 self, 

188 datasetRefOrType: DatasetRef | DatasetType | str, 

189 /, 

190 dataId: DataId | None = None, 

191 *, 

192 parameters: dict | None = None, 

193 collections: Any = None, 

194 storageClass: str | StorageClass | None = None, 

195 **kwargs: Any, 

196 ) -> DeferredDatasetHandle: 

197 # Docstring inherited. 

198 raise NotImplementedError() 

199 

200 def get( 

201 self, 

202 datasetRefOrType: DatasetRef | DatasetType | str, 

203 /, 

204 dataId: DataId | None = None, 

205 *, 

206 parameters: dict[str, Any] | None = None, 

207 collections: Any = None, 

208 storageClass: StorageClass | str | None = None, 

209 **kwargs: Any, 

210 ) -> Any: 

211 # Docstring inherited. 

212 raise NotImplementedError() 

213 

214 def getURIs( 

215 self, 

216 datasetRefOrType: DatasetRef | DatasetType | str, 

217 /, 

218 dataId: DataId | None = None, 

219 *, 

220 predict: bool = False, 

221 collections: Any = None, 

222 run: str | None = None, 

223 **kwargs: Any, 

224 ) -> DatasetRefURIs: 

225 # Docstring inherited. 

226 raise NotImplementedError() 

227 

228 def getURI( 

229 self, 

230 datasetRefOrType: DatasetRef | DatasetType | str, 

231 /, 

232 dataId: DataId | None = None, 

233 *, 

234 predict: bool = False, 

235 collections: Any = None, 

236 run: str | None = None, 

237 **kwargs: Any, 

238 ) -> ResourcePath: 

239 # Docstring inherited. 

240 raise NotImplementedError() 

241 

242 def get_dataset_type(self, name: str) -> DatasetType: 

243 # In future implementation this should directly access the cache 

244 # and only go to the server if the dataset type is not known. 

245 path = f"dataset_type/{name}" 

246 response = self._client.get(self._get_url(path)) 

247 if response.status_code != httpx.codes.OK: 

248 content = response.json() 

249 if content["exception"] == "MissingDatasetTypeError": 

250 raise MissingDatasetTypeError(content["detail"]) 

251 response.raise_for_status() 

252 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions) 

253 

254 def get_dataset( 

255 self, 

256 id: DatasetId, 

257 storage_class: str | StorageClass | None = None, 

258 dimension_records: bool = False, 

259 datastore_records: bool = False, 

260 ) -> DatasetRef | None: 

261 path = f"dataset/{id}" 

262 if isinstance(storage_class, StorageClass): 

263 storage_class_name = storage_class.name 

264 elif storage_class: 

265 storage_class_name = storage_class 

266 params: dict[str, str | bool] = { 

267 "dimension_records": dimension_records, 

268 "datastore_records": datastore_records, 

269 } 

270 if datastore_records: 

271 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

272 if storage_class: 

273 params["storage_class"] = storage_class_name 

274 response = self._client.get(self._get_url(path), params=params) 

275 response.raise_for_status() 

276 if response.json() is None: 

277 return None 

278 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

279 

280 def find_dataset( 

281 self, 

282 dataset_type: DatasetType | str, 

283 data_id: DataId | None = None, 

284 *, 

285 collections: str | Sequence[str] | None = None, 

286 timespan: Timespan | None = None, 

287 storage_class: str | StorageClass | None = None, 

288 dimension_records: bool = False, 

289 datastore_records: bool = False, 

290 **kwargs: Any, 

291 ) -> DatasetRef | None: 

292 if collections is None: 

293 if not self.collections: 

294 raise NoDefaultCollectionError( 

295 "No collections provided to find_dataset, and no defaults from butler construction." 

296 ) 

297 collections = self.collections 

298 # Temporary hack. Assume strings for collections. In future 

299 # want to construct CollectionWildcard and filter it through collection 

300 # cache to generate list of collection names. 

301 wildcards = CollectionWildcard.from_expression(collections) 

302 

303 if datastore_records: 

304 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

305 if timespan: 

306 raise ValueError("Timespan can not yet be used in butler client/server.") 

307 

308 if isinstance(dataset_type, DatasetType): 

309 dataset_type = dataset_type.name 

310 

311 if isinstance(storage_class, StorageClass): 

312 storage_class = storage_class.name 

313 

314 query = FindDatasetModel( 

315 data_id=self._simplify_dataId(data_id, **kwargs), 

316 collections=wildcards.strings, 

317 storage_class=storage_class, 

318 dimension_records=dimension_records, 

319 datastore_records=datastore_records, 

320 ) 

321 

322 path = f"find_dataset/{dataset_type}" 

323 response = self._client.post( 

324 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True) 

325 ) 

326 response.raise_for_status() 

327 

328 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

329 

330 def retrieveArtifacts( 

331 self, 

332 refs: Iterable[DatasetRef], 

333 destination: ResourcePathExpression, 

334 transfer: str = "auto", 

335 preserve_path: bool = True, 

336 overwrite: bool = False, 

337 ) -> list[ResourcePath]: 

338 # Docstring inherited. 

339 raise NotImplementedError() 

340 

341 def exists( 

342 self, 

343 dataset_ref_or_type: DatasetRef | DatasetType | str, 

344 /, 

345 data_id: DataId | None = None, 

346 *, 

347 full_check: bool = True, 

348 collections: Any = None, 

349 **kwargs: Any, 

350 ) -> DatasetExistence: 

351 # Docstring inherited. 

352 raise NotImplementedError() 

353 

354 def _exists_many( 

355 self, 

356 refs: Iterable[DatasetRef], 

357 /, 

358 *, 

359 full_check: bool = True, 

360 ) -> dict[DatasetRef, DatasetExistence]: 

361 # Docstring inherited. 

362 raise NotImplementedError() 

363 

364 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

365 # Docstring inherited. 

366 raise NotImplementedError() 

367 

368 def ingest( 

369 self, 

370 *datasets: FileDataset, 

371 transfer: str | None = "auto", 

372 run: str | None = None, 

373 idGenerationMode: DatasetIdGenEnum | None = None, 

374 record_validation_info: bool = True, 

375 ) -> None: 

376 # Docstring inherited. 

377 raise NotImplementedError() 

378 

379 def export( 

380 self, 

381 *, 

382 directory: str | None = None, 

383 filename: str | None = None, 

384 format: str | None = None, 

385 transfer: str | None = None, 

386 ) -> AbstractContextManager[RepoExportContext]: 

387 # Docstring inherited. 

388 raise NotImplementedError() 

389 

390 def import_( 

391 self, 

392 *, 

393 directory: ResourcePathExpression | None = None, 

394 filename: ResourcePathExpression | TextIO | None = None, 

395 format: str | None = None, 

396 transfer: str | None = None, 

397 skip_dimensions: set | None = None, 

398 ) -> None: 

399 # Docstring inherited. 

400 raise NotImplementedError() 

401 

402 def transfer_from( 

403 self, 

404 source_butler: LimitedButler, 

405 source_refs: Iterable[DatasetRef], 

406 transfer: str = "auto", 

407 skip_missing: bool = True, 

408 register_dataset_types: bool = False, 

409 transfer_dimensions: bool = False, 

410 ) -> Collection[DatasetRef]: 

411 # Docstring inherited. 

412 raise NotImplementedError() 

413 

414 def validateConfiguration( 

415 self, 

416 logFailures: bool = False, 

417 datasetTypeNames: Iterable[str] | None = None, 

418 ignore: Iterable[str] | None = None, 

419 ) -> None: 

420 # Docstring inherited. 

421 raise NotImplementedError() 

422 

423 @property 

424 def collections(self) -> Sequence[str]: 

425 # Docstring inherited. 

426 return self._registry_defaults.collections 

427 

428 @property 

429 def run(self) -> str | None: 

430 # Docstring inherited. 

431 return self._registry_defaults.run 

432 

433 @property 

434 def registry(self) -> Registry: 

435 # Docstring inherited. 

436 raise NotImplementedError() 

437 

438 def pruneDatasets( 

439 self, 

440 refs: Iterable[DatasetRef], 

441 *, 

442 disassociate: bool = True, 

443 unstore: bool = False, 

444 tags: Iterable[str] = (), 

445 purge: bool = False, 

446 ) -> None: 

447 # Docstring inherited. 

448 raise NotImplementedError() 

449 

450 def _get_url(self, path: str, version: str = "v1") -> str: 

451 """Form the complete path to an endpoint on the server. 

452 

453 Parameters 

454 ---------- 

455 path : `str` 

456 The relative path to the server endpoint. 

457 version : `str`, optional 

458 Version string to prepend to path. Defaults to "v1". 

459 

460 Returns 

461 ------- 

462 path : `str` 

463 The full path to the endpoint. 

464 """ 

465 return f"{version}/{path}"