Coverage for python/lsst/daf/butler/remote_butler/_remote_butler.py: 3%

129 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-03 16:25 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ("RemoteButler",) 

29 

30from collections.abc import Collection, Iterable, Sequence 

31from contextlib import AbstractContextManager 

32from typing import Any, TextIO 

33 

34import httpx 

35from lsst.daf.butler import __version__ 

36from lsst.resources import ResourcePath, ResourcePathExpression 

37from lsst.utils.introspection import get_full_type_name 

38 

39from .._butler import Butler 

40from .._butler_config import ButlerConfig 

41from .._config import Config 

42from .._dataset_existence import DatasetExistence 

43from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef 

44from .._dataset_type import DatasetType, SerializedDatasetType 

45from .._deferredDatasetHandle import DeferredDatasetHandle 

46from .._file_dataset import FileDataset 

47from .._limited_butler import LimitedButler 

48from .._storage_class import StorageClass 

49from .._timespan import Timespan 

50from ..datastore import DatasetRefURIs 

51from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate 

52from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults 

53from ..registry.wildcards import CollectionWildcard 

54from ..transfers import RepoExportContext 

55from ._config import RemoteButlerConfigModel 

56from .server import FindDatasetModel 

57 

58 

59class RemoteButler(Butler): 

60 def __init__( 

61 self, 

62 # These parameters are inherited from the Butler() constructor 

63 config: Config | ResourcePathExpression | None = None, 

64 *, 

65 collections: Any = None, 

66 run: str | None = None, 

67 searchPaths: Sequence[ResourcePathExpression] | None = None, 

68 writeable: bool | None = None, 

69 inferDefaults: bool = True, 

70 # Parameters unique to RemoteButler 

71 http_client: httpx.Client | None = None, 

72 **kwargs: Any, 

73 ): 

74 butler_config = ButlerConfig(config, searchPaths, without_datastore=True) 

75 self._config = RemoteButlerConfigModel.model_validate(butler_config) 

76 self._dimensions: DimensionUniverse | None = None 

77 # TODO: RegistryDefaults should have finish() called on it, but this 

78 # requires getCollectionSummary() which is not yet implemented 

79 self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs) 

80 

81 if http_client is not None: 

82 # We have injected a client explicitly in to the class. 

83 # This is generally done for testing. 

84 self._client = http_client 

85 else: 

86 headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} 

87 self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url)) 

88 

89 def isWriteable(self) -> bool: 

90 # Docstring inherited. 

91 return False 

92 

93 @property 

94 def dimensions(self) -> DimensionUniverse: 

95 # Docstring inherited. 

96 if self._dimensions is not None: 

97 return self._dimensions 

98 

99 response = self._client.get(self._get_url("universe")) 

100 response.raise_for_status() 

101 

102 config = DimensionConfig.fromString(response.text, format="json") 

103 self._dimensions = DimensionUniverse(config) 

104 return self._dimensions 

105 

106 def _simplify_dataId( 

107 self, dataId: DataId | None, **kwargs: dict[str, int | str] 

108 ) -> SerializedDataCoordinate | None: 

109 """Take a generic Data ID and convert it to a serializable form. 

110 

111 Parameters 

112 ---------- 

113 dataId : `dict`, `None`, `DataCoordinate` 

114 The data ID to serialize. 

115 **kwargs : `dict` 

116 Additional values that should be included if this is not 

117 a `DataCoordinate`. 

118 

119 Returns 

120 ------- 

121 data_id : `SerializedDataCoordinate` or `None` 

122 A serializable form. 

123 """ 

124 if dataId is None and not kwargs: 

125 return None 

126 if isinstance(dataId, DataCoordinate): 

127 return dataId.to_simple() 

128 

129 if dataId is None: 

130 data_id = kwargs 

131 elif kwargs: 

132 # Change variable because DataId is immutable and mypy complains. 

133 data_id = dict(dataId) 

134 data_id.update(kwargs) 

135 

136 # Assume we can treat it as a dict. 

137 return SerializedDataCoordinate(dataId=data_id) 

138 

139 def transaction(self) -> AbstractContextManager[None]: 

140 """Will always raise NotImplementedError. 

141 Transactions are not supported by RemoteButler. 

142 """ 

143 raise NotImplementedError() 

144 

145 def put( 

146 self, 

147 obj: Any, 

148 datasetRefOrType: DatasetRef | DatasetType | str, 

149 /, 

150 dataId: DataId | None = None, 

151 *, 

152 run: str | None = None, 

153 **kwargs: Any, 

154 ) -> DatasetRef: 

155 # Docstring inherited. 

156 raise NotImplementedError() 

157 

158 def getDeferred( 

159 self, 

160 datasetRefOrType: DatasetRef | DatasetType | str, 

161 /, 

162 dataId: DataId | None = None, 

163 *, 

164 parameters: dict | None = None, 

165 collections: Any = None, 

166 storageClass: str | StorageClass | None = None, 

167 **kwargs: Any, 

168 ) -> DeferredDatasetHandle: 

169 # Docstring inherited. 

170 raise NotImplementedError() 

171 

172 def get( 

173 self, 

174 datasetRefOrType: DatasetRef | DatasetType | str, 

175 /, 

176 dataId: DataId | None = None, 

177 *, 

178 parameters: dict[str, Any] | None = None, 

179 collections: Any = None, 

180 storageClass: StorageClass | str | None = None, 

181 **kwargs: Any, 

182 ) -> Any: 

183 # Docstring inherited. 

184 raise NotImplementedError() 

185 

186 def getURIs( 

187 self, 

188 datasetRefOrType: DatasetRef | DatasetType | str, 

189 /, 

190 dataId: DataId | None = None, 

191 *, 

192 predict: bool = False, 

193 collections: Any = None, 

194 run: str | None = None, 

195 **kwargs: Any, 

196 ) -> DatasetRefURIs: 

197 # Docstring inherited. 

198 raise NotImplementedError() 

199 

200 def getURI( 

201 self, 

202 datasetRefOrType: DatasetRef | DatasetType | str, 

203 /, 

204 dataId: DataId | None = None, 

205 *, 

206 predict: bool = False, 

207 collections: Any = None, 

208 run: str | None = None, 

209 **kwargs: Any, 

210 ) -> ResourcePath: 

211 # Docstring inherited. 

212 raise NotImplementedError() 

213 

214 def get_dataset_type(self, name: str) -> DatasetType: 

215 # In future implementation this should directly access the cache 

216 # and only go to the server if the dataset type is not known. 

217 path = f"dataset_type/{name}" 

218 response = self._client.get(self._get_url(path)) 

219 if response.status_code != httpx.codes.OK: 

220 content = response.json() 

221 if content["exception"] == "MissingDatasetTypeError": 

222 raise MissingDatasetTypeError(content["detail"]) 

223 response.raise_for_status() 

224 return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions) 

225 

226 def get_dataset( 

227 self, 

228 id: DatasetId, 

229 storage_class: str | StorageClass | None = None, 

230 dimension_records: bool = False, 

231 datastore_records: bool = False, 

232 ) -> DatasetRef | None: 

233 path = f"dataset/{id}" 

234 if isinstance(storage_class, StorageClass): 

235 storage_class_name = storage_class.name 

236 elif storage_class: 

237 storage_class_name = storage_class 

238 params: dict[str, str | bool] = { 

239 "dimension_records": dimension_records, 

240 "datastore_records": datastore_records, 

241 } 

242 if datastore_records: 

243 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

244 if storage_class: 

245 params["storage_class"] = storage_class_name 

246 response = self._client.get(self._get_url(path), params=params) 

247 response.raise_for_status() 

248 if response.json() is None: 

249 return None 

250 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

251 

252 def find_dataset( 

253 self, 

254 dataset_type: DatasetType | str, 

255 data_id: DataId | None = None, 

256 *, 

257 collections: str | Sequence[str] | None = None, 

258 timespan: Timespan | None = None, 

259 storage_class: str | StorageClass | None = None, 

260 dimension_records: bool = False, 

261 datastore_records: bool = False, 

262 **kwargs: Any, 

263 ) -> DatasetRef | None: 

264 if collections is None: 

265 if not self.collections: 

266 raise NoDefaultCollectionError( 

267 "No collections provided to find_dataset, and no defaults from butler construction." 

268 ) 

269 collections = self.collections 

270 # Temporary hack. Assume strings for collections. In future 

271 # want to construct CollectionWildcard and filter it through collection 

272 # cache to generate list of collection names. 

273 wildcards = CollectionWildcard.from_expression(collections) 

274 

275 if datastore_records: 

276 raise ValueError("Datastore records can not yet be returned in client/server butler.") 

277 if timespan: 

278 raise ValueError("Timespan can not yet be used in butler client/server.") 

279 

280 if isinstance(dataset_type, DatasetType): 

281 dataset_type = dataset_type.name 

282 

283 if isinstance(storage_class, StorageClass): 

284 storage_class = storage_class.name 

285 

286 query = FindDatasetModel( 

287 data_id=self._simplify_dataId(data_id, **kwargs), 

288 collections=wildcards.strings, 

289 storage_class=storage_class, 

290 dimension_records=dimension_records, 

291 datastore_records=datastore_records, 

292 ) 

293 

294 path = f"find_dataset/{dataset_type}" 

295 response = self._client.post( 

296 self._get_url(path), json=query.model_dump(mode="json", exclude_unset=True, exclude_defaults=True) 

297 ) 

298 response.raise_for_status() 

299 

300 return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) 

301 

302 def retrieveArtifacts( 

303 self, 

304 refs: Iterable[DatasetRef], 

305 destination: ResourcePathExpression, 

306 transfer: str = "auto", 

307 preserve_path: bool = True, 

308 overwrite: bool = False, 

309 ) -> list[ResourcePath]: 

310 # Docstring inherited. 

311 raise NotImplementedError() 

312 

313 def exists( 

314 self, 

315 dataset_ref_or_type: DatasetRef | DatasetType | str, 

316 /, 

317 data_id: DataId | None = None, 

318 *, 

319 full_check: bool = True, 

320 collections: Any = None, 

321 **kwargs: Any, 

322 ) -> DatasetExistence: 

323 # Docstring inherited. 

324 raise NotImplementedError() 

325 

326 def _exists_many( 

327 self, 

328 refs: Iterable[DatasetRef], 

329 /, 

330 *, 

331 full_check: bool = True, 

332 ) -> dict[DatasetRef, DatasetExistence]: 

333 # Docstring inherited. 

334 raise NotImplementedError() 

335 

336 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

337 # Docstring inherited. 

338 raise NotImplementedError() 

339 

340 def ingest( 

341 self, 

342 *datasets: FileDataset, 

343 transfer: str | None = "auto", 

344 run: str | None = None, 

345 idGenerationMode: DatasetIdGenEnum | None = None, 

346 record_validation_info: bool = True, 

347 ) -> None: 

348 # Docstring inherited. 

349 raise NotImplementedError() 

350 

351 def export( 

352 self, 

353 *, 

354 directory: str | None = None, 

355 filename: str | None = None, 

356 format: str | None = None, 

357 transfer: str | None = None, 

358 ) -> AbstractContextManager[RepoExportContext]: 

359 # Docstring inherited. 

360 raise NotImplementedError() 

361 

362 def import_( 

363 self, 

364 *, 

365 directory: ResourcePathExpression | None = None, 

366 filename: ResourcePathExpression | TextIO | None = None, 

367 format: str | None = None, 

368 transfer: str | None = None, 

369 skip_dimensions: set | None = None, 

370 ) -> None: 

371 # Docstring inherited. 

372 raise NotImplementedError() 

373 

374 def transfer_from( 

375 self, 

376 source_butler: LimitedButler, 

377 source_refs: Iterable[DatasetRef], 

378 transfer: str = "auto", 

379 skip_missing: bool = True, 

380 register_dataset_types: bool = False, 

381 transfer_dimensions: bool = False, 

382 ) -> Collection[DatasetRef]: 

383 # Docstring inherited. 

384 raise NotImplementedError() 

385 

386 def validateConfiguration( 

387 self, 

388 logFailures: bool = False, 

389 datasetTypeNames: Iterable[str] | None = None, 

390 ignore: Iterable[str] | None = None, 

391 ) -> None: 

392 # Docstring inherited. 

393 raise NotImplementedError() 

394 

395 @property 

396 def collections(self) -> Sequence[str]: 

397 # Docstring inherited. 

398 return self._registry_defaults.collections 

399 

400 @property 

401 def run(self) -> str | None: 

402 # Docstring inherited. 

403 return self._registry_defaults.run 

404 

405 @property 

406 def registry(self) -> Registry: 

407 # Docstring inherited. 

408 raise NotImplementedError() 

409 

410 def pruneDatasets( 

411 self, 

412 refs: Iterable[DatasetRef], 

413 *, 

414 disassociate: bool = True, 

415 unstore: bool = False, 

416 tags: Iterable[str] = (), 

417 purge: bool = False, 

418 ) -> None: 

419 # Docstring inherited. 

420 raise NotImplementedError() 

421 

422 def _get_url(self, path: str, version: str = "v1") -> str: 

423 """Form the complete path to an endpoint on the server 

424 

425 Parameters 

426 ---------- 

427 path : `str` 

428 The relative path to the server endpoint. Should not include the 

429 "/butler" prefix. 

430 version : `str`, optional 

431 Version string to prepend to path. Defaults to "v1". 

432 

433 Returns 

434 ------- 

435 path : `str` 

436 The full path to the endpoint 

437 """ 

438 prefix = "butler" 

439 return f"{prefix}/{version}/{path}"