Coverage for python/lsst/daf/butler/server.py: 4%

141 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-05 02:03 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = () 

25 

26import logging 

27from collections.abc import Mapping 

28from enum import Enum, auto 

29from typing import Any 

30 

31from fastapi import Depends, FastAPI, HTTPException, Query 

32from fastapi.middleware.gzip import GZipMiddleware 

33from lsst.daf.butler import ( 

34 Butler, 

35 Config, 

36 DataCoordinate, 

37 DatasetId, 

38 DatasetRef, 

39 DimensionConfig, 

40 SerializedDataCoordinate, 

41 SerializedDatasetRef, 

42 SerializedDatasetType, 

43 SerializedDimensionRecord, 

44) 

45from lsst.daf.butler.core.serverModels import ( 

46 ExpressionQueryParameter, 

47 QueryDataIdsModel, 

48 QueryDatasetsModel, 

49 QueryDimensionRecordsModel, 

50) 

51from lsst.daf.butler.registry import CollectionType 

52 

53BUTLER_ROOT = "ci_hsc_gen3/DATA" 

54 

55log = logging.getLogger("excalibur") 

56 

57 

58class CollectionTypeNames(str, Enum): 

59 """Collection type names supported by the interface.""" 

60 

61 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805 

62 # Use the name directly as the value 

63 return name 

64 

65 RUN = auto() 

66 CALIBRATION = auto() 

67 CHAINED = auto() 

68 TAGGED = auto() 

69 

70 

71app = FastAPI() 

72app.add_middleware(GZipMiddleware, minimum_size=1000) 

73 

74 

75GLOBAL_READWRITE_BUTLER = None 

76GLOBAL_READONLY_BUTLER = None 

77 

78 

79def _make_global_butler() -> None: 

80 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER 

81 if GLOBAL_READONLY_BUTLER is None: 

82 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False) 

83 if GLOBAL_READWRITE_BUTLER is None: 

84 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True) 

85 

86 

87def butler_readonly_dependency() -> Butler: 

88 _make_global_butler() 

89 return Butler(butler=GLOBAL_READONLY_BUTLER) 

90 

91 

92def butler_readwrite_dependency() -> Butler: 

93 _make_global_butler() 

94 return Butler(butler=GLOBAL_READWRITE_BUTLER) 

95 

96 

97def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: 

98 """Convert the serialized dataId back to full DataCoordinate. 

99 

100 Parameters 

101 ---------- 

102 butler : `lsst.daf.butler.Butler` 

103 The butler to use for registry and universe. 

104 data_id : `SerializedDataCoordinate` or `None` 

105 The serialized form. 

106 

107 Returns 

108 ------- 

109 dataId : `DataCoordinate` or `None` 

110 The DataId usable by registry. 

111 """ 

112 if data_id is None: 

113 return None 

114 return DataCoordinate.from_simple(data_id, registry=butler.registry) 

115 

116 

117@app.get("/butler/") 

118def read_root() -> str: 

119 return "Welcome to Excalibur... aka your Butler Server" 

120 

121 

122@app.get("/butler/butler.json", response_model=dict[str, Any]) 

123def read_server_config() -> Mapping: 

124 """Return the butler configuration that the client should use.""" 

125 config_str = f""" 

126datastore: 

127 root: {BUTLER_ROOT} 

128registry: 

129 cls: lsst.daf.butler.registries.remote.RemoteRegistry 

130 db: <butlerRoot> 

131""" 

132 config = Config.fromString(config_str, format="yaml") 

133 return config 

134 

135 

136@app.get("/butler/v1/universe", response_model=dict[str, Any]) 

137def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> DimensionConfig: 

138 """Allow remote client to get dimensions definition.""" 

139 return butler.registry.dimensions.dimensionConfig 

140 

141 

142@app.get("/butler/v1/uri/{id}", response_model=str) 

143def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

144 """Return a single URI of non-disassembled dataset.""" 

145 ref = butler.registry.getDataset(id) 

146 if not ref: 

147 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.") 

148 

149 uri = butler.datastore.getURI(ref) 

150 

151 # In reality would have to convert this to a signed URL 

152 return str(uri) 

153 

154 

155@app.put("/butler/v1/registry/refresh") 

156def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None: 

157 # Unclear whether this should exist. Which butler is really being 

158 # refreshed? How do we know the server we are refreshing is used later? 

159 # For testing at the moment it is important if a test adds a dataset type 

160 # directly in the server since the test client will not see it. 

161 butler.registry.refresh() 

162 

163 

164@app.get( 

165 "/butler/v1/registry/datasetType/{datasetTypeName}", 

166 summary="Retrieve this dataset type definition.", 

167 response_model=SerializedDatasetType, 

168 response_model_exclude_unset=True, 

169 response_model_exclude_defaults=True, 

170 response_model_exclude_none=True, 

171) 

172def get_dataset_type( 

173 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency) 

174) -> SerializedDatasetType: 

175 datasetType = butler.registry.getDatasetType(datasetTypeName) 

176 return datasetType.to_simple() 

177 

178 

179@app.get( 

180 "/butler/v1/registry/datasetTypes", 

181 summary="Retrieve all dataset type definitions.", 

182 response_model=list[SerializedDatasetType], 

183 response_model_exclude_unset=True, 

184 response_model_exclude_defaults=True, 

185 response_model_exclude_none=True, 

186) 

187def query_all_dataset_types( 

188 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency) 

189) -> list[SerializedDatasetType]: 

190 datasetTypes = butler.registry.queryDatasetTypes(..., components=components) 

191 return [d.to_simple() for d in datasetTypes] 

192 

193 

194@app.get( 

195 "/butler/v1/registry/datasetTypes/re", 

196 summary="Retrieve dataset type definitions matching expressions", 

197 response_model=list[SerializedDatasetType], 

198 response_model_exclude_unset=True, 

199 response_model_exclude_defaults=True, 

200 response_model_exclude_none=True, 

201) 

202def query_dataset_types_re( 

203 regex: list[str] | None = Query(None), 

204 glob: list[str] | None = Query(None), 

205 components: bool | None = Query(None), 

206 butler: Butler = Depends(butler_readonly_dependency), 

207) -> list[SerializedDatasetType]: 

208 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

209 

210 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components) 

211 return [d.to_simple() for d in datasetTypes] 

212 

213 

214@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str]) 

215def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

216 chain = butler.registry.getCollectionChain(parent) 

217 return list(chain) 

218 

219 

220@app.get("/butler/v1/registry/collections", response_model=list[str]) 

221def query_collections( 

222 regex: list[str] | None = Query(None), 

223 glob: list[str] | None = Query(None), 

224 datasetType: str | None = Query(None), 

225 flattenChains: bool = Query(False), 

226 collectionType: list[CollectionTypeNames] | None = Query(None), 

227 includeChains: bool | None = Query(None), 

228 butler: Butler = Depends(butler_readonly_dependency), 

229) -> list[str]: 

230 expression_params = ExpressionQueryParameter(regex=regex, glob=glob) 

231 collectionTypes = CollectionType.from_names(collectionType) 

232 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None 

233 

234 collections = butler.registry.queryCollections( 

235 expression=expression_params.expression(), 

236 datasetType=dataset_type, 

237 collectionTypes=collectionTypes, 

238 flattenChains=flattenChains, 

239 includeChains=includeChains, 

240 ) 

241 return list(collections) 

242 

243 

244@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str) 

245def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str: 

246 collectionType = butler.registry.getCollectionType(name) 

247 return collectionType.name 

248 

249 

250@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str) 

251def register_collection( 

252 name: str, 

253 collectionTypeName: CollectionTypeNames, 

254 doc: str | None = Query(None), 

255 butler: Butler = Depends(butler_readwrite_dependency), 

256) -> str: 

257 collectionType = CollectionType.from_name(collectionTypeName) 

258 butler.registry.registerCollection(name, collectionType, doc) 

259 

260 # Need to refresh the global read only butler otherwise other clients 

261 # may not see this change. 

262 if GLOBAL_READONLY_BUTLER is not None: # for mypy 

263 GLOBAL_READONLY_BUTLER.registry.refresh() 

264 

265 return name 

266 

267 

268@app.get( 

269 "/butler/v1/registry/dataset/{id}", 

270 summary="Retrieve this dataset definition.", 

271 response_model=SerializedDatasetRef | None, 

272 response_model_exclude_unset=True, 

273 response_model_exclude_defaults=True, 

274 response_model_exclude_none=True, 

275) 

276def get_dataset( 

277 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency) 

278) -> SerializedDatasetRef | None: 

279 ref = butler.registry.getDataset(id) 

280 if ref is not None: 

281 return ref.to_simple() 

282 # This could raise a 404 since id is not found. The standard regsitry 

283 # getDataset method returns without error so follow that example here. 

284 return ref 

285 

286 

287@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str]) 

288def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: 

289 # Takes an ID so need to convert to a real DatasetRef 

290 fake_ref = SerializedDatasetRef(id=id) 

291 

292 try: 

293 # Converting this to a real DatasetRef takes time and is not 

294 # needed internally since only the ID is used. 

295 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry) 

296 except Exception: 

297 # SQL getDatasetLocations looks at ID in datastore and does not 

298 # check it is in registry. Follow that example and return without 

299 # error. 

300 return [] 

301 

302 return list(butler.registry.getDatasetLocations(ref)) 

303 

304 

305# TimeSpan not yet a pydantic model 

306@app.post( 

307 "/butler/v1/registry/findDataset/{datasetType}", 

308 summary="Retrieve this dataset definition from collection, dataset type, and dataId", 

309 response_model=SerializedDatasetRef, 

310 response_model_exclude_unset=True, 

311 response_model_exclude_defaults=True, 

312 response_model_exclude_none=True, 

313) 

314def find_dataset( 

315 datasetType: str, 

316 dataId: SerializedDataCoordinate | None = None, 

317 collections: list[str] | None = Query(None), 

318 butler: Butler = Depends(butler_readonly_dependency), 

319) -> SerializedDatasetRef | None: 

320 collection_query = collections if collections else None 

321 

322 ref = butler.registry.findDataset( 

323 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query 

324 ) 

325 return ref.to_simple() if ref else None 

326 

327 

328# POST is used for the complex dict data structures 

329@app.post( 

330 "/butler/v1/registry/datasets", 

331 summary="Query all dataset holdings.", 

332 response_model=list[SerializedDatasetRef], 

333 response_model_exclude_unset=True, 

334 response_model_exclude_defaults=True, 

335 response_model_exclude_none=True, 

336) 

337def query_datasets( 

338 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency) 

339) -> list[SerializedDatasetRef]: 

340 # This method might return a lot of results 

341 

342 if query.collections: 

343 collections = query.collections.expression() 

344 else: 

345 collections = None 

346 

347 datasets = butler.registry.queryDatasets( 

348 query.datasetType.expression(), 

349 collections=collections, 

350 dimensions=query.dimensions, 

351 dataId=unpack_dataId(butler, query.dataId), 

352 where=query.where, 

353 findFirst=query.findFirst, 

354 components=query.components, 

355 bind=query.bind, 

356 check=query.check, 

357 **query.kwargs(), 

358 ) 

359 return [ref.to_simple() for ref in datasets] 

360 

361 

362# POST is used for the complex dict data structures 

363@app.post( 

364 "/butler/v1/registry/dataIds", 

365 summary="Query all data IDs.", 

366 response_model=list[SerializedDataCoordinate], 

367 response_model_exclude_unset=True, 

368 response_model_exclude_defaults=True, 

369 response_model_exclude_none=True, 

370) 

371def query_data_ids( 

372 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency) 

373) -> list[SerializedDataCoordinate]: 

374 if query.datasets: 

375 datasets = query.datasets.expression() 

376 else: 

377 datasets = None 

378 if query.collections: 

379 collections = query.collections.expression() 

380 else: 

381 collections = None 

382 

383 dataIds = butler.registry.queryDataIds( 

384 query.dimensions, 

385 collections=collections, 

386 datasets=datasets, 

387 dataId=unpack_dataId(butler, query.dataId), 

388 where=query.where, 

389 components=query.components, 

390 bind=query.bind, 

391 check=query.check, 

392 **query.kwargs(), 

393 ) 

394 return [coord.to_simple() for coord in dataIds] 

395 

396 

397# Uses POST to handle the DataId 

398@app.post( 

399 "/butler/v1/registry/dimensionRecords/{element}", 

400 summary="Retrieve dimension records matching query", 

401 response_model=list[SerializedDimensionRecord], 

402 response_model_exclude_unset=True, 

403 response_model_exclude_defaults=True, 

404 response_model_exclude_none=True, 

405) 

406def query_dimension_records( 

407 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency) 

408) -> list[SerializedDimensionRecord]: 

409 if query.datasets: 

410 datasets = query.datasets.expression() 

411 else: 

412 datasets = None 

413 if query.collections: 

414 collections = query.collections.expression() 

415 else: 

416 collections = None 

417 

418 records = butler.registry.queryDimensionRecords( 

419 element, 

420 dataId=unpack_dataId(butler, query.dataId), 

421 collections=collections, 

422 where=query.where, 

423 datasets=datasets, 

424 components=query.components, 

425 bind=query.bind, 

426 check=query.check, 

427 **query.kwargs(), 

428 ) 

429 return [r.to_simple() for r in records]