Coverage for python/lsst/daf/butler/server.py: 3%
141 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:13 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ()
26import logging
27from collections.abc import Mapping
28from enum import Enum, auto
29from typing import Any
31from fastapi import Depends, FastAPI, HTTPException, Query
32from fastapi.middleware.gzip import GZipMiddleware
33from lsst.daf.butler import (
34 Butler,
35 Config,
36 DataCoordinate,
37 DatasetId,
38 DatasetRef,
39 SerializedDataCoordinate,
40 SerializedDatasetRef,
41 SerializedDatasetType,
42 SerializedDimensionRecord,
43)
44from lsst.daf.butler.core.serverModels import (
45 ExpressionQueryParameter,
46 QueryDataIdsModel,
47 QueryDatasetsModel,
48 QueryDimensionRecordsModel,
49)
50from lsst.daf.butler.registry import CollectionType
52BUTLER_ROOT = "ci_hsc_gen3/DATA"
54log = logging.getLogger("excalibur")
57class CollectionTypeNames(str, Enum):
58 """Collection type names supported by the interface."""
60 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805
61 # Use the name directly as the value
62 return name
64 RUN = auto()
65 CALIBRATION = auto()
66 CHAINED = auto()
67 TAGGED = auto()
70app = FastAPI()
71app.add_middleware(GZipMiddleware, minimum_size=1000)
74GLOBAL_READWRITE_BUTLER: Butler | None = None
75GLOBAL_READONLY_BUTLER: Butler | None = None
78def _make_global_butler() -> None:
79 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER
80 if GLOBAL_READONLY_BUTLER is None:
81 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False)
82 if GLOBAL_READWRITE_BUTLER is None:
83 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True)
86def butler_readonly_dependency() -> Butler:
87 """Return global read-only butler."""
88 _make_global_butler()
89 return Butler(butler=GLOBAL_READONLY_BUTLER)
92def butler_readwrite_dependency() -> Butler:
93 """Return read-write butler."""
94 _make_global_butler()
95 return Butler(butler=GLOBAL_READWRITE_BUTLER)
98def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None:
99 """Convert the serialized dataId back to full DataCoordinate.
101 Parameters
102 ----------
103 butler : `lsst.daf.butler.Butler`
104 The butler to use for registry and universe.
105 data_id : `SerializedDataCoordinate` or `None`
106 The serialized form.
108 Returns
109 -------
110 dataId : `DataCoordinate` or `None`
111 The DataId usable by registry.
112 """
113 if data_id is None:
114 return None
115 return DataCoordinate.from_simple(data_id, registry=butler.registry)
118@app.get("/butler/")
119def read_root() -> str:
120 """Return message when accessing the root URL."""
121 return "Welcome to Excalibur... aka your Butler Server"
124@app.get("/butler/butler.json", response_model=dict[str, Any])
125def read_server_config() -> Mapping:
126 """Return the butler configuration that the client should use."""
127 config_str = f"""
128datastore:
129 root: {BUTLER_ROOT}
130registry:
131 cls: lsst.daf.butler.registries.remote.RemoteRegistry
132 db: <butlerRoot>
133"""
134 config = Config.fromString(config_str, format="yaml")
135 return config.toDict()
138@app.get("/butler/v1/universe", response_model=dict[str, Any])
139def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> dict[str, Any]:
140 """Allow remote client to get dimensions definition."""
141 return butler.dimensions.dimensionConfig.toDict()
144@app.get("/butler/v1/uri/{id}", response_model=str)
145def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str:
146 """Return a single URI of non-disassembled dataset."""
147 ref = butler.registry.getDataset(id)
148 if not ref:
149 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.")
151 uri = butler.getURI(ref)
153 # In reality would have to convert this to a signed URL
154 return str(uri)
157@app.put("/butler/v1/registry/refresh")
158def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None:
159 """Refresh the registry cache."""
160 # Unclear whether this should exist. Which butler is really being
161 # refreshed? How do we know the server we are refreshing is used later?
162 # For testing at the moment it is important if a test adds a dataset type
163 # directly in the server since the test client will not see it.
164 butler.registry.refresh()
167@app.get(
168 "/butler/v1/registry/datasetType/{datasetTypeName}",
169 summary="Retrieve this dataset type definition.",
170 response_model=SerializedDatasetType,
171 response_model_exclude_unset=True,
172 response_model_exclude_defaults=True,
173 response_model_exclude_none=True,
174)
175def get_dataset_type(
176 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency)
177) -> SerializedDatasetType:
178 """Return the dataset type."""
179 datasetType = butler.registry.getDatasetType(datasetTypeName)
180 return datasetType.to_simple()
183@app.get(
184 "/butler/v1/registry/datasetTypes",
185 summary="Retrieve all dataset type definitions.",
186 response_model=list[SerializedDatasetType],
187 response_model_exclude_unset=True,
188 response_model_exclude_defaults=True,
189 response_model_exclude_none=True,
190)
191def query_all_dataset_types(
192 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency)
193) -> list[SerializedDatasetType]:
194 """Return all dataset types."""
195 datasetTypes = butler.registry.queryDatasetTypes(..., components=components)
196 return [d.to_simple() for d in datasetTypes]
199@app.get(
200 "/butler/v1/registry/datasetTypes/re",
201 summary="Retrieve dataset type definitions matching expressions",
202 response_model=list[SerializedDatasetType],
203 response_model_exclude_unset=True,
204 response_model_exclude_defaults=True,
205 response_model_exclude_none=True,
206)
207def query_dataset_types_re(
208 regex: list[str] | None = Query(None),
209 glob: list[str] | None = Query(None),
210 components: bool | None = Query(None),
211 butler: Butler = Depends(butler_readonly_dependency),
212) -> list[SerializedDatasetType]:
213 """Return all dataset types matching a regular expression."""
214 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
216 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components)
217 return [d.to_simple() for d in datasetTypes]
220@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str])
221def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
222 """Return the collection chain members."""
223 chain = butler.registry.getCollectionChain(parent)
224 return list(chain)
227@app.get("/butler/v1/registry/collections", response_model=list[str])
228def query_collections(
229 regex: list[str] | None = Query(None),
230 glob: list[str] | None = Query(None),
231 datasetType: str | None = Query(None),
232 flattenChains: bool = Query(False),
233 collectionType: list[CollectionTypeNames] | None = Query(None),
234 includeChains: bool | None = Query(None),
235 butler: Butler = Depends(butler_readonly_dependency),
236) -> list[str]:
237 """Return collections matching query."""
238 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
239 collectionTypes = CollectionType.from_names(collectionType)
240 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None
242 collections = butler.registry.queryCollections(
243 expression=expression_params.expression(),
244 datasetType=dataset_type,
245 collectionTypes=collectionTypes,
246 flattenChains=flattenChains,
247 includeChains=includeChains,
248 )
249 return list(collections)
252@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str)
253def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str:
254 """Return type for named collection."""
255 collectionType = butler.registry.getCollectionType(name)
256 return collectionType.name
259@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str)
260def register_collection(
261 name: str,
262 collectionTypeName: CollectionTypeNames,
263 doc: str | None = Query(None),
264 butler: Butler = Depends(butler_readwrite_dependency),
265) -> str:
266 """Register a collection."""
267 collectionType = CollectionType.from_name(collectionTypeName)
268 butler.registry.registerCollection(name, collectionType, doc)
270 # Need to refresh the global read only butler otherwise other clients
271 # may not see this change.
272 if GLOBAL_READONLY_BUTLER is not None: # for mypy
273 GLOBAL_READONLY_BUTLER.registry.refresh()
275 return name
278@app.get(
279 "/butler/v1/registry/dataset/{id}",
280 summary="Retrieve this dataset definition.",
281 response_model=SerializedDatasetRef | None,
282 response_model_exclude_unset=True,
283 response_model_exclude_defaults=True,
284 response_model_exclude_none=True,
285)
286def get_dataset(
287 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)
288) -> SerializedDatasetRef | None:
289 """Return a single dataset reference."""
290 ref = butler.registry.getDataset(id)
291 if ref is not None:
292 return ref.to_simple()
293 # This could raise a 404 since id is not found. The standard regsitry
294 # getDataset method returns without error so follow that example here.
295 return ref
298@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str])
299def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
300 """Return locations of datasets."""
301 # Takes an ID so need to convert to a real DatasetRef
302 fake_ref = SerializedDatasetRef(id=id)
304 try:
305 # Converting this to a real DatasetRef takes time and is not
306 # needed internally since only the ID is used.
307 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry)
308 except Exception:
309 # SQL getDatasetLocations looks at ID in datastore and does not
310 # check it is in registry. Follow that example and return without
311 # error.
312 return []
314 return list(butler.registry.getDatasetLocations(ref))
317# TimeSpan not yet a pydantic model
318@app.post(
319 "/butler/v1/registry/findDataset/{datasetType}",
320 summary="Retrieve this dataset definition from collection, dataset type, and dataId",
321 response_model=SerializedDatasetRef,
322 response_model_exclude_unset=True,
323 response_model_exclude_defaults=True,
324 response_model_exclude_none=True,
325)
326def find_dataset(
327 datasetType: str,
328 dataId: SerializedDataCoordinate | None = None,
329 collections: list[str] | None = Query(None),
330 butler: Butler = Depends(butler_readonly_dependency),
331) -> SerializedDatasetRef | None:
332 """Return a single dataset reference matching query."""
333 collection_query = collections if collections else None
335 ref = butler.registry.findDataset(
336 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query
337 )
338 return ref.to_simple() if ref else None
341# POST is used for the complex dict data structures
342@app.post(
343 "/butler/v1/registry/datasets",
344 summary="Query all dataset holdings.",
345 response_model=list[SerializedDatasetRef],
346 response_model_exclude_unset=True,
347 response_model_exclude_defaults=True,
348 response_model_exclude_none=True,
349)
350def query_datasets(
351 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency)
352) -> list[SerializedDatasetRef]:
353 """Return datasets matching query."""
354 # This method might return a lot of results
356 if query.collections:
357 collections = query.collections.expression()
358 else:
359 collections = None
361 datasets = butler.registry.queryDatasets(
362 query.datasetType.expression(),
363 collections=collections,
364 dimensions=query.dimensions,
365 dataId=unpack_dataId(butler, query.dataId),
366 where=query.where,
367 findFirst=query.findFirst,
368 components=query.components,
369 bind=query.bind,
370 check=query.check,
371 **query.kwargs(),
372 )
373 return [ref.to_simple() for ref in datasets]
376# POST is used for the complex dict data structures
377@app.post(
378 "/butler/v1/registry/dataIds",
379 summary="Query all data IDs.",
380 response_model=list[SerializedDataCoordinate],
381 response_model_exclude_unset=True,
382 response_model_exclude_defaults=True,
383 response_model_exclude_none=True,
384)
385def query_data_ids(
386 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency)
387) -> list[SerializedDataCoordinate]:
388 """Return data IDs matching query."""
389 if query.datasets:
390 datasets = query.datasets.expression()
391 else:
392 datasets = None
393 if query.collections:
394 collections = query.collections.expression()
395 else:
396 collections = None
398 dataIds = butler.registry.queryDataIds(
399 query.dimensions,
400 collections=collections,
401 datasets=datasets,
402 dataId=unpack_dataId(butler, query.dataId),
403 where=query.where,
404 components=query.components,
405 bind=query.bind,
406 check=query.check,
407 **query.kwargs(),
408 )
409 return [coord.to_simple() for coord in dataIds]
412# Uses POST to handle the DataId
413@app.post(
414 "/butler/v1/registry/dimensionRecords/{element}",
415 summary="Retrieve dimension records matching query",
416 response_model=list[SerializedDimensionRecord],
417 response_model_exclude_unset=True,
418 response_model_exclude_defaults=True,
419 response_model_exclude_none=True,
420)
421def query_dimension_records(
422 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency)
423) -> list[SerializedDimensionRecord]:
424 """Return dimension records matching query."""
425 if query.datasets:
426 datasets = query.datasets.expression()
427 else:
428 datasets = None
429 if query.collections:
430 collections = query.collections.expression()
431 else:
432 collections = None
434 records = butler.registry.queryDimensionRecords(
435 element,
436 dataId=unpack_dataId(butler, query.dataId),
437 collections=collections,
438 where=query.where,
439 datasets=datasets,
440 components=query.components,
441 bind=query.bind,
442 check=query.check,
443 **query.kwargs(),
444 )
445 return [r.to_simple() for r in records]