Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%

140 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-04 09:45 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ("RemoteButler",) 

29 

30from collections.abc import Collection, Iterable, Sequence 

31from contextlib import AbstractContextManager 

32from typing import Any, TextIO 

33 

34import httpx 

35from lsst.daf.butler import __version__ 

36from lsst.daf.butler.repo_relocation import replaceRoot 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.utils.introspection import get_full_type_name 

39 

40from .._butler import Butler 

41from .._butler_config import ButlerConfig 

42from .._config import Config 

43from .._dataset_existence import DatasetExistence 

44from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef 

45from .._dataset_type import DatasetType, SerializedDatasetType 

46from .._deferredDatasetHandle import DeferredDatasetHandle 

47from .._file_dataset import FileDataset 

48from .._limited_butler import LimitedButler 

49from .._storage_class import StorageClass 

50from .._timespan import Timespan 

51from ..datastore import DatasetRefURIs 

52from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate 

53from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults 

54from ..registry.wildcards import CollectionWildcard 

55from ..transfers import RepoExportContext 

56from ._authentication import get_authentication_headers, get_authentication_token_from_environment 

57from ._config import RemoteButlerConfigModel 

58from .server import FindDatasetModel 

59 

60 

61class RemoteButler(Butler): 

62 def __init__( 

63 self, 

64 # These parameters are inherited from the Butler() constructor 

65 config: Config | ResourcePathExpression | None = None, 

66 *, 

67 collections: Any = None, 

68 run: str | None = None, 

69 searchPaths: Sequence[ResourcePathExpression] | None = None, 

70 writeable: bool | None = None, 

71 inferDefaults: bool = True, 

72 # Parameters unique to RemoteButler 

73 http_client: httpx.Client | None = None, 

74 access_token: str | None = None, 

75 **kwargs: Any, 

76 ): 

77 butler_config = ButlerConfig(config, searchPaths, without_datastore=True) 

78 # There is a convention in Butler config files where <butlerRoot> in a 

79 # configuration option refers to the directory containing the 

80 # configuration file. We allow this for the remote butler's URL so 

81 # that the server doesn't have to know which hostname it is being 

82 # accessed from. 

83 server_url_key = ("remote_butler", "url") 

84 if server_url_key in butler_config: 

85 butler_config[server_url_key] = replaceRoot( 

86 butler_config[server_url_key], butler_config.configDir 

87 ) 

88 self._config = RemoteButlerConfigModel.model_validate(butler_config) 

89 

90 self._dimensions: DimensionUniverse | None = None 

91 # TODO: RegistryDefaults should have finish() called on it, but this 

92 # requires getCollectionSummary() which is not yet implemented 

93 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs) 

94 

95 if http_client is not None: 

96 # We have injected a client explicitly in to the class. 

97 # This is generally done for testing. 

98 self._client = http_client 

99 else: 

100 server_url = str(self._config.remote_butler.url) 

101 auth_headers = {} 

102 if access_token is None: 

103 access_token = get_authentication_token_from_environment(server_url) 

104 if access_token is not None: 

105 auth_headers = get_authentication_headers(access_token) 

106 

107 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} 

108 headers.update(auth_headers) 

109 self._client = httpx.Client(headers=headers, base_url=server_url) 

110 

111 def isWriteable(self) -> bool: 

112 # Docstring inherited. 

113 return False 

114 

115 @property 

116 def dimensions(self) -> DimensionUniverse: 

117 # Docstring inherited. 

118 if self._dimensions is not None: 

119 return self._dimensions 

120 

121 response = self._client.get(self._get_url("universe")) 

122 response.raise_for_status() 

123 

124 config = DimensionConfig.fromString(response.text, format="json") 

125 self._dimensions = DimensionUniverse(config) 

126 return self._dimensions 

127 

128 def _simplify_dataId( 

129 self, dataId: DataId | None, **kwargs: dict[str, int | str] 

130 ) -> SerializedDataCoordinate | None: 

131 """Take a generic Data ID and convert it to a serializable form. 

132 

133 Parameters 

134 ---------- 

135 dataId : `dict`, `None`, `DataCoordinate` 

136 The data ID to serialize. 

137 **kwargs : `dict` 

138 Additional values that should be included if this is not 

139 a `DataCoordinate`. 

140 

141 Returns 

142 ------- 

143 data_id : `SerializedDataCoordinate` or `None` 

144 A serializable form. 

145 """ 

146 if dataId is None and not kwargs: 

147 return None 

148 if isinstance(dataId, DataCoordinate): 

149 return dataId.to_simple() 

150 

151 if dataId is None: 

152 data_id = kwargs 

153 elif kwargs: 

154 # Change variable because DataId is immutable and mypy complains. 

155 data_id = dict(dataId) 

156 data_id.update(kwargs) 

157 

158 # Assume we can treat it as a dict. 

159 return SerializedDataCoordinate(dataId=data_id) 

160 

161 def transaction(self) -> AbstractContextManager[None]: 

162 """Will always raise NotImplementedError. 

163 Transactions are not supported by RemoteButler. 

164 """ 

165 raise NotImplementedError() 

166 

167 def put( 

168 self, 

169 obj: Any, 

170 datasetRefOrType: DatasetRef | DatasetType | str, 

171 /, 

172 dataId: DataId | None = None, 

173 *, 

174 run: str | None = None, 

175 **kwargs: Any, 

176 ) -> DatasetRef: 

177 # Docstring inherited. 

178 raise NotImplementedError() 

179 

180 def getDeferred( 

181 self, 

182 datasetRefOrType: DatasetRef | DatasetType | str, 

183 /, 

184 dataId: DataId | None = None, 

185 *, 

186 parameters: dict | None = None, 

187 collections: Any = None, 

188 storageClass: str | StorageClass | None = None, 

189 **kwargs: Any, 

190 ) -> DeferredDatasetHandle: 

191 # Docstring inherited. 

192 raise NotImplementedError() 

193 

194 def get( 

195 self, 

196 datasetRefOrType: DatasetRef | DatasetType | str, 

197 /, 

198 dataId: DataId | None = None, 

199 *, 

200 parameters: dict[str, Any] | None = None, 

201 collections: Any = None, 

202 storageClass: StorageClass | str | None = None, 

203 **kwargs: Any, 

204 ) -> Any: 

205 # Docstring inherited. 

206 raise NotImplementedError() 

207 

208 def getURIs( 

209 self, 

210 datasetRefOrType: DatasetRef | DatasetType | str, 

211 /, 

212 dataId: DataId | None = None, 

213 *, 

214 predict: bool = False, 

215 collections: Any = None, 

216 run: str | None = None, 

217 **kwargs: Any, 

218 ) -> DatasetRefURIs: 

219 # Docstring inherited. 

220 raise NotImplementedError() 

221 

222 def getURI( 

223 self, 

224 datasetRefOrType: DatasetRef | DatasetType | str, 

225 /, 

226 dataId: DataId | None = None, 

227 *, 

228 predict: bool = False, 

229 collections: Any = None, 

230 run: str | None = None, 

231 **kwargs: Any, 

232 ) -> ResourcePath: 

233 # Docstring inherited. 

234 raise NotImplementedError() 

235 

236 def get_dataset_type(self, name: str) -> DatasetType: 

237 # In future implementation this should directly access the cache 

238 # and only go to the server if the dataset type is not known. 

239 path = f"dataset_type/{name}" 

240 response = self._client.get(self._get_url(path)) 

241 if response.status_code != httpx.codes.OK: 

242 content = response.json() 

243 if content["exception"] == "MissingDatasetTypeError": 

244 raise MissingDatasetTypeError(content["detail"]) 

245 response.raise_for_status() 

246 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions) 

247 

248 def get_dataset( 

249 self, 

250 id: DatasetId, 

251 storage_class: str | StorageClass | None = None, 

252 dimension_records: bool = False, 

253 datastore_records: bool = False, 

254 ) -> DatasetRef | None: 

255 path = f"dataset/{id}" 

256 if isinstance(storage_class, StorageClass): 

257 storage_class_name = storage_class.name 

258 elif storage_class: 

259 storage_class_name = storage_class 

260 params: dict[str, str | bool] = { 

261 "dimension_records": dimension_records, 

262 "datastore_records": datastore_records, 

263 } 

264 if datastore_records: 

265 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

266 if storage_class: 

267 params["storage_class"] = storage_class_name 

268 response = self._client.get(self._get_url(path), params=params) 

269 response.raise_for_status() 

270 if response.json() is None: 

271 return None 

272 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

273 

274 def find_dataset( 

275 self, 

276 dataset_type: DatasetType | str, 

277 data_id: DataId | None = None, 

278 *, 

279 collections: str | Sequence[str] | None = None, 

280 timespan: Timespan | None = None, 

281 storage_class: str | StorageClass | None = None, 

282 dimension_records: bool = False, 

283 datastore_records: bool = False, 

284 **kwargs: Any, 

285 ) -> DatasetRef | None: 

286 if collections is None: 

287 if not self.collections: 

288 raise NoDefaultCollectionError( 

289 "No collections provided to find_dataset, and no defaults from butler construction." 

290 ) 

291 collections = self.collections 

292 # Temporary hack. Assume strings for collections. In future 

293 # want to construct CollectionWildcard and filter it through collection 

294 # cache to generate list of collection names. 

295 wildcards = CollectionWildcard.from_expression(collections) 

296 

297 if datastore_records: 

298 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

299 if timespan: 

300 raise ValueError("Timespan can not yet be used in butler client/server.") 

301 

302 if isinstance(dataset_type, DatasetType): 

303 dataset_type = dataset_type.name 

304 

305 if isinstance(storage_class, StorageClass): 

306 storage_class = storage_class.name 

307 

308 query = FindDatasetModel( 

309 data_id=self._simplify_dataId(data_id, **kwargs), 

310 collections=wildcards.strings, 

311 storage_class=storage_class, 

312 dimension_records=dimension_records, 

313 datastore_records=datastore_records, 

314 ) 

315 

316 path = f"find_dataset/{dataset_type}" 

317 response = self._client.post( 

318 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True) 

319 ) 

320 response.raise_for_status() 

321 

322 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

323 

324 def retrieveArtifacts( 

325 self, 

326 refs: Iterable[DatasetRef], 

327 destination: ResourcePathExpression, 

328 transfer: str = "auto", 

329 preserve_path: bool = True, 

330 overwrite: bool = False, 

331 ) -> list[ResourcePath]: 

332 # Docstring inherited. 

333 raise NotImplementedError() 

334 

335 def exists( 

336 self, 

337 dataset_ref_or_type: DatasetRef | DatasetType | str, 

338 /, 

339 data_id: DataId | None = None, 

340 *, 

341 full_check: bool = True, 

342 collections: Any = None, 

343 **kwargs: Any, 

344 ) -> DatasetExistence: 

345 # Docstring inherited. 

346 raise NotImplementedError() 

347 

348 def _exists_many( 

349 self, 

350 refs: Iterable[DatasetRef], 

351 /, 

352 *, 

353 full_check: bool = True, 

354 ) -> dict[DatasetRef, DatasetExistence]: 

355 # Docstring inherited. 

356 raise NotImplementedError() 

357 

358 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

359 # Docstring inherited. 

360 raise NotImplementedError() 

361 

362 def ingest( 

363 self, 

364 *datasets: FileDataset, 

365 transfer: str | None = "auto", 

366 run: str | None = None, 

367 idGenerationMode: DatasetIdGenEnum | None = None, 

368 record_validation_info: bool = True, 

369 ) -> None: 

370 # Docstring inherited. 

371 raise NotImplementedError() 

372 

373 def export( 

374 self, 

375 *, 

376 directory: str | None = None, 

377 filename: str | None = None, 

378 format: str | None = None, 

379 transfer: str | None = None, 

380 ) -> AbstractContextManager[RepoExportContext]: 

381 # Docstring inherited. 

382 raise NotImplementedError() 

383 

384 def import_( 

385 self, 

386 *, 

387 directory: ResourcePathExpression | None = None, 

388 filename: ResourcePathExpression | TextIO | None = None, 

389 format: str | None = None, 

390 transfer: str | None = None, 

391 skip_dimensions: set | None = None, 

392 ) -> None: 

393 # Docstring inherited. 

394 raise NotImplementedError() 

395 

396 def transfer_from( 

397 self, 

398 source_butler: LimitedButler, 

399 source_refs: Iterable[DatasetRef], 

400 transfer: str = "auto", 

401 skip_missing: bool = True, 

402 register_dataset_types: bool = False, 

403 transfer_dimensions: bool = False, 

404 ) -> Collection[DatasetRef]: 

405 # Docstring inherited. 

406 raise NotImplementedError() 

407 

408 def validateConfiguration( 

409 self, 

410 logFailures: bool = False, 

411 datasetTypeNames: Iterable[str] | None = None, 

412 ignore: Iterable[str] | None = None, 

413 ) -> None: 

414 # Docstring inherited. 

415 raise NotImplementedError() 

416 

417 @property 

418 def collections(self) -> Sequence[str]: 

419 # Docstring inherited. 

420 return self._registry_defaults.collections 

421 

422 @property 

423 def run(self) -> str | None: 

424 # Docstring inherited. 

425 return self._registry_defaults.run 

426 

427 @property 

428 def registry(self) -> Registry: 

429 # Docstring inherited. 

430 raise NotImplementedError() 

431 

432 def pruneDatasets( 

433 self, 

434 refs: Iterable[DatasetRef], 

435 *, 

436 disassociate: bool = True, 

437 unstore: bool = False, 

438 tags: Iterable[str] = (), 

439 purge: bool = False, 

440 ) -> None: 

441 # Docstring inherited. 

442 raise NotImplementedError() 

443 

444 def _get_url(self, path: str, version: str = "v1") -> str: 

445 """Form the complete path to an endpoint on the server. 

446 

447 Parameters 

448 ---------- 

449 path : `str` 

450 The relative path to the server endpoint. 

451 version : `str`, optional 

452 Version string to prepend to path. Defaults to "v1". 

453 

454 Returns 

455 ------- 

456 path : `str` 

457 The full path to the endpoint. 

458 """ 

459 return f"{version}/{path}"