Coverage for python/lsst/daf/butler/server.py: 3%
141 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:20 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-14 19:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ()
26import logging
27from collections.abc import Mapping
28from enum import Enum, auto
29from typing import Any
31from fastapi import Depends, FastAPI, HTTPException, Query
32from fastapi.middleware.gzip import GZipMiddleware
33from lsst.daf.butler import (
34 Butler,
35 Config,
36 DataCoordinate,
37 DatasetId,
38 DatasetRef,
39 DimensionConfig,
40 SerializedDataCoordinate,
41 SerializedDatasetRef,
42 SerializedDatasetType,
43 SerializedDimensionRecord,
44)
45from lsst.daf.butler.core.serverModels import (
46 ExpressionQueryParameter,
47 QueryDataIdsModel,
48 QueryDatasetsModel,
49 QueryDimensionRecordsModel,
50)
51from lsst.daf.butler.registry import CollectionType
53BUTLER_ROOT = "ci_hsc_gen3/DATA"
55log = logging.getLogger("excalibur")
58class CollectionTypeNames(str, Enum):
59 """Collection type names supported by the interface."""
61 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805
62 # Use the name directly as the value
63 return name
65 RUN = auto()
66 CALIBRATION = auto()
67 CHAINED = auto()
68 TAGGED = auto()
71app = FastAPI()
72app.add_middleware(GZipMiddleware, minimum_size=1000)
75GLOBAL_READWRITE_BUTLER: Butler | None = None
76GLOBAL_READONLY_BUTLER: Butler | None = None
79def _make_global_butler() -> None:
80 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER
81 if GLOBAL_READONLY_BUTLER is None:
82 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False)
83 if GLOBAL_READWRITE_BUTLER is None:
84 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True)
87def butler_readonly_dependency() -> Butler:
88 """Return global read-only butler."""
89 _make_global_butler()
90 return Butler(butler=GLOBAL_READONLY_BUTLER)
93def butler_readwrite_dependency() -> Butler:
94 """Return read-write butler."""
95 _make_global_butler()
96 return Butler(butler=GLOBAL_READWRITE_BUTLER)
99def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None:
100 """Convert the serialized dataId back to full DataCoordinate.
102 Parameters
103 ----------
104 butler : `lsst.daf.butler.Butler`
105 The butler to use for registry and universe.
106 data_id : `SerializedDataCoordinate` or `None`
107 The serialized form.
109 Returns
110 -------
111 dataId : `DataCoordinate` or `None`
112 The DataId usable by registry.
113 """
114 if data_id is None:
115 return None
116 return DataCoordinate.from_simple(data_id, registry=butler.registry)
119@app.get("/butler/")
120def read_root() -> str:
121 """Return message when accessing the root URL."""
122 return "Welcome to Excalibur... aka your Butler Server"
125@app.get("/butler/butler.json", response_model=dict[str, Any])
126def read_server_config() -> Mapping:
127 """Return the butler configuration that the client should use."""
128 config_str = f"""
129datastore:
130 root: {BUTLER_ROOT}
131registry:
132 cls: lsst.daf.butler.registries.remote.RemoteRegistry
133 db: <butlerRoot>
134"""
135 config = Config.fromString(config_str, format="yaml")
136 return config
139@app.get("/butler/v1/universe", response_model=dict[str, Any])
140def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> DimensionConfig:
141 """Allow remote client to get dimensions definition."""
142 return butler.dimensions.dimensionConfig
145@app.get("/butler/v1/uri/{id}", response_model=str)
146def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str:
147 """Return a single URI of non-disassembled dataset."""
148 ref = butler.registry.getDataset(id)
149 if not ref:
150 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.")
152 uri = butler.getURI(ref)
154 # In reality would have to convert this to a signed URL
155 return str(uri)
158@app.put("/butler/v1/registry/refresh")
159def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None:
160 """Refresh the registry cache."""
161 # Unclear whether this should exist. Which butler is really being
162 # refreshed? How do we know the server we are refreshing is used later?
163 # For testing at the moment it is important if a test adds a dataset type
164 # directly in the server since the test client will not see it.
165 butler.registry.refresh()
168@app.get(
169 "/butler/v1/registry/datasetType/{datasetTypeName}",
170 summary="Retrieve this dataset type definition.",
171 response_model=SerializedDatasetType,
172 response_model_exclude_unset=True,
173 response_model_exclude_defaults=True,
174 response_model_exclude_none=True,
175)
176def get_dataset_type(
177 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency)
178) -> SerializedDatasetType:
179 """Return the dataset type."""
180 datasetType = butler.registry.getDatasetType(datasetTypeName)
181 return datasetType.to_simple()
184@app.get(
185 "/butler/v1/registry/datasetTypes",
186 summary="Retrieve all dataset type definitions.",
187 response_model=list[SerializedDatasetType],
188 response_model_exclude_unset=True,
189 response_model_exclude_defaults=True,
190 response_model_exclude_none=True,
191)
192def query_all_dataset_types(
193 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency)
194) -> list[SerializedDatasetType]:
195 """Return all dataset types."""
196 datasetTypes = butler.registry.queryDatasetTypes(..., components=components)
197 return [d.to_simple() for d in datasetTypes]
200@app.get(
201 "/butler/v1/registry/datasetTypes/re",
202 summary="Retrieve dataset type definitions matching expressions",
203 response_model=list[SerializedDatasetType],
204 response_model_exclude_unset=True,
205 response_model_exclude_defaults=True,
206 response_model_exclude_none=True,
207)
208def query_dataset_types_re(
209 regex: list[str] | None = Query(None),
210 glob: list[str] | None = Query(None),
211 components: bool | None = Query(None),
212 butler: Butler = Depends(butler_readonly_dependency),
213) -> list[SerializedDatasetType]:
214 """Return all dataset types matching a regular expression."""
215 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
217 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components)
218 return [d.to_simple() for d in datasetTypes]
221@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str])
222def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
223 """Return the collection chain members."""
224 chain = butler.registry.getCollectionChain(parent)
225 return list(chain)
228@app.get("/butler/v1/registry/collections", response_model=list[str])
229def query_collections(
230 regex: list[str] | None = Query(None),
231 glob: list[str] | None = Query(None),
232 datasetType: str | None = Query(None),
233 flattenChains: bool = Query(False),
234 collectionType: list[CollectionTypeNames] | None = Query(None),
235 includeChains: bool | None = Query(None),
236 butler: Butler = Depends(butler_readonly_dependency),
237) -> list[str]:
238 """Return collections matching query."""
239 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
240 collectionTypes = CollectionType.from_names(collectionType)
241 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None
243 collections = butler.registry.queryCollections(
244 expression=expression_params.expression(),
245 datasetType=dataset_type,
246 collectionTypes=collectionTypes,
247 flattenChains=flattenChains,
248 includeChains=includeChains,
249 )
250 return list(collections)
253@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str)
254def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str:
255 """Return type for named collection."""
256 collectionType = butler.registry.getCollectionType(name)
257 return collectionType.name
260@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str)
261def register_collection(
262 name: str,
263 collectionTypeName: CollectionTypeNames,
264 doc: str | None = Query(None),
265 butler: Butler = Depends(butler_readwrite_dependency),
266) -> str:
267 """Register a collection."""
268 collectionType = CollectionType.from_name(collectionTypeName)
269 butler.registry.registerCollection(name, collectionType, doc)
271 # Need to refresh the global read only butler otherwise other clients
272 # may not see this change.
273 if GLOBAL_READONLY_BUTLER is not None: # for mypy
274 GLOBAL_READONLY_BUTLER.registry.refresh()
276 return name
279@app.get(
280 "/butler/v1/registry/dataset/{id}",
281 summary="Retrieve this dataset definition.",
282 response_model=SerializedDatasetRef | None,
283 response_model_exclude_unset=True,
284 response_model_exclude_defaults=True,
285 response_model_exclude_none=True,
286)
287def get_dataset(
288 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)
289) -> SerializedDatasetRef | None:
290 """Return a single dataset reference."""
291 ref = butler.registry.getDataset(id)
292 if ref is not None:
293 return ref.to_simple()
294 # This could raise a 404 since id is not found. The standard regsitry
295 # getDataset method returns without error so follow that example here.
296 return ref
299@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str])
300def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
301 """Return locations of datasets."""
302 # Takes an ID so need to convert to a real DatasetRef
303 fake_ref = SerializedDatasetRef(id=id)
305 try:
306 # Converting this to a real DatasetRef takes time and is not
307 # needed internally since only the ID is used.
308 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry)
309 except Exception:
310 # SQL getDatasetLocations looks at ID in datastore and does not
311 # check it is in registry. Follow that example and return without
312 # error.
313 return []
315 return list(butler.registry.getDatasetLocations(ref))
318# TimeSpan not yet a pydantic model
319@app.post(
320 "/butler/v1/registry/findDataset/{datasetType}",
321 summary="Retrieve this dataset definition from collection, dataset type, and dataId",
322 response_model=SerializedDatasetRef,
323 response_model_exclude_unset=True,
324 response_model_exclude_defaults=True,
325 response_model_exclude_none=True,
326)
327def find_dataset(
328 datasetType: str,
329 dataId: SerializedDataCoordinate | None = None,
330 collections: list[str] | None = Query(None),
331 butler: Butler = Depends(butler_readonly_dependency),
332) -> SerializedDatasetRef | None:
333 """Return a single dataset reference matching query."""
334 collection_query = collections if collections else None
336 ref = butler.registry.findDataset(
337 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query
338 )
339 return ref.to_simple() if ref else None
342# POST is used for the complex dict data structures
343@app.post(
344 "/butler/v1/registry/datasets",
345 summary="Query all dataset holdings.",
346 response_model=list[SerializedDatasetRef],
347 response_model_exclude_unset=True,
348 response_model_exclude_defaults=True,
349 response_model_exclude_none=True,
350)
351def query_datasets(
352 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency)
353) -> list[SerializedDatasetRef]:
354 """Return datasets matching query."""
355 # This method might return a lot of results
357 if query.collections:
358 collections = query.collections.expression()
359 else:
360 collections = None
362 datasets = butler.registry.queryDatasets(
363 query.datasetType.expression(),
364 collections=collections,
365 dimensions=query.dimensions,
366 dataId=unpack_dataId(butler, query.dataId),
367 where=query.where,
368 findFirst=query.findFirst,
369 components=query.components,
370 bind=query.bind,
371 check=query.check,
372 **query.kwargs(),
373 )
374 return [ref.to_simple() for ref in datasets]
377# POST is used for the complex dict data structures
378@app.post(
379 "/butler/v1/registry/dataIds",
380 summary="Query all data IDs.",
381 response_model=list[SerializedDataCoordinate],
382 response_model_exclude_unset=True,
383 response_model_exclude_defaults=True,
384 response_model_exclude_none=True,
385)
386def query_data_ids(
387 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency)
388) -> list[SerializedDataCoordinate]:
389 """Return data IDs matching query."""
390 if query.datasets:
391 datasets = query.datasets.expression()
392 else:
393 datasets = None
394 if query.collections:
395 collections = query.collections.expression()
396 else:
397 collections = None
399 dataIds = butler.registry.queryDataIds(
400 query.dimensions,
401 collections=collections,
402 datasets=datasets,
403 dataId=unpack_dataId(butler, query.dataId),
404 where=query.where,
405 components=query.components,
406 bind=query.bind,
407 check=query.check,
408 **query.kwargs(),
409 )
410 return [coord.to_simple() for coord in dataIds]
413# Uses POST to handle the DataId
414@app.post(
415 "/butler/v1/registry/dimensionRecords/{element}",
416 summary="Retrieve dimension records matching query",
417 response_model=list[SerializedDimensionRecord],
418 response_model_exclude_unset=True,
419 response_model_exclude_defaults=True,
420 response_model_exclude_none=True,
421)
422def query_dimension_records(
423 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency)
424) -> list[SerializedDimensionRecord]:
425 """Return dimension records matching query."""
426 if query.datasets:
427 datasets = query.datasets.expression()
428 else:
429 datasets = None
430 if query.collections:
431 collections = query.collections.expression()
432 else:
433 collections = None
435 records = butler.registry.queryDimensionRecords(
436 element,
437 dataId=unpack_dataId(butler, query.dataId),
438 collections=collections,
439 where=query.where,
440 datasets=datasets,
441 components=query.components,
442 bind=query.bind,
443 check=query.check,
444 **query.kwargs(),
445 )
446 return [r.to_simple() for r in records]