Coverage for python/lsst/daf/butler/server.py: 4%
141 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 09:29 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 09:29 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ()
26import logging
27from collections.abc import Mapping
28from enum import Enum, auto
29from typing import Any
31from fastapi import Depends, FastAPI, HTTPException, Query
32from fastapi.middleware.gzip import GZipMiddleware
33from lsst.daf.butler import (
34 Butler,
35 Config,
36 DataCoordinate,
37 DatasetId,
38 DatasetRef,
39 DimensionConfig,
40 SerializedDataCoordinate,
41 SerializedDatasetRef,
42 SerializedDatasetType,
43 SerializedDimensionRecord,
44)
45from lsst.daf.butler.core.serverModels import (
46 ExpressionQueryParameter,
47 QueryDataIdsModel,
48 QueryDatasetsModel,
49 QueryDimensionRecordsModel,
50)
51from lsst.daf.butler.registry import CollectionType
53BUTLER_ROOT = "ci_hsc_gen3/DATA"
55log = logging.getLogger("excalibur")
58class CollectionTypeNames(str, Enum):
59 """Collection type names supported by the interface."""
61 def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805
62 # Use the name directly as the value
63 return name
65 RUN = auto()
66 CALIBRATION = auto()
67 CHAINED = auto()
68 TAGGED = auto()
71app = FastAPI()
72app.add_middleware(GZipMiddleware, minimum_size=1000)
75GLOBAL_READWRITE_BUTLER: Butler | None = None
76GLOBAL_READONLY_BUTLER: Butler | None = None
79def _make_global_butler() -> None:
80 global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER
81 if GLOBAL_READONLY_BUTLER is None:
82 GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False)
83 if GLOBAL_READWRITE_BUTLER is None:
84 GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True)
87def butler_readonly_dependency() -> Butler:
88 _make_global_butler()
89 return Butler(butler=GLOBAL_READONLY_BUTLER)
92def butler_readwrite_dependency() -> Butler:
93 _make_global_butler()
94 return Butler(butler=GLOBAL_READWRITE_BUTLER)
97def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None:
98 """Convert the serialized dataId back to full DataCoordinate.
100 Parameters
101 ----------
102 butler : `lsst.daf.butler.Butler`
103 The butler to use for registry and universe.
104 data_id : `SerializedDataCoordinate` or `None`
105 The serialized form.
107 Returns
108 -------
109 dataId : `DataCoordinate` or `None`
110 The DataId usable by registry.
111 """
112 if data_id is None:
113 return None
114 return DataCoordinate.from_simple(data_id, registry=butler.registry)
117@app.get("/butler/")
118def read_root() -> str:
119 return "Welcome to Excalibur... aka your Butler Server"
122@app.get("/butler/butler.json", response_model=dict[str, Any])
123def read_server_config() -> Mapping:
124 """Return the butler configuration that the client should use."""
125 config_str = f"""
126datastore:
127 root: {BUTLER_ROOT}
128registry:
129 cls: lsst.daf.butler.registries.remote.RemoteRegistry
130 db: <butlerRoot>
131"""
132 config = Config.fromString(config_str, format="yaml")
133 return config
136@app.get("/butler/v1/universe", response_model=dict[str, Any])
137def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> DimensionConfig:
138 """Allow remote client to get dimensions definition."""
139 return butler.dimensions.dimensionConfig
142@app.get("/butler/v1/uri/{id}", response_model=str)
143def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str:
144 """Return a single URI of non-disassembled dataset."""
145 ref = butler.registry.getDataset(id)
146 if not ref:
147 raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.")
149 uri = butler.datastore.getURI(ref)
151 # In reality would have to convert this to a signed URL
152 return str(uri)
155@app.put("/butler/v1/registry/refresh")
156def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None:
157 # Unclear whether this should exist. Which butler is really being
158 # refreshed? How do we know the server we are refreshing is used later?
159 # For testing at the moment it is important if a test adds a dataset type
160 # directly in the server since the test client will not see it.
161 butler.registry.refresh()
164@app.get(
165 "/butler/v1/registry/datasetType/{datasetTypeName}",
166 summary="Retrieve this dataset type definition.",
167 response_model=SerializedDatasetType,
168 response_model_exclude_unset=True,
169 response_model_exclude_defaults=True,
170 response_model_exclude_none=True,
171)
172def get_dataset_type(
173 datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency)
174) -> SerializedDatasetType:
175 datasetType = butler.registry.getDatasetType(datasetTypeName)
176 return datasetType.to_simple()
179@app.get(
180 "/butler/v1/registry/datasetTypes",
181 summary="Retrieve all dataset type definitions.",
182 response_model=list[SerializedDatasetType],
183 response_model_exclude_unset=True,
184 response_model_exclude_defaults=True,
185 response_model_exclude_none=True,
186)
187def query_all_dataset_types(
188 components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency)
189) -> list[SerializedDatasetType]:
190 datasetTypes = butler.registry.queryDatasetTypes(..., components=components)
191 return [d.to_simple() for d in datasetTypes]
194@app.get(
195 "/butler/v1/registry/datasetTypes/re",
196 summary="Retrieve dataset type definitions matching expressions",
197 response_model=list[SerializedDatasetType],
198 response_model_exclude_unset=True,
199 response_model_exclude_defaults=True,
200 response_model_exclude_none=True,
201)
202def query_dataset_types_re(
203 regex: list[str] | None = Query(None),
204 glob: list[str] | None = Query(None),
205 components: bool | None = Query(None),
206 butler: Butler = Depends(butler_readonly_dependency),
207) -> list[SerializedDatasetType]:
208 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
210 datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components)
211 return [d.to_simple() for d in datasetTypes]
214@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str])
215def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
216 chain = butler.registry.getCollectionChain(parent)
217 return list(chain)
220@app.get("/butler/v1/registry/collections", response_model=list[str])
221def query_collections(
222 regex: list[str] | None = Query(None),
223 glob: list[str] | None = Query(None),
224 datasetType: str | None = Query(None),
225 flattenChains: bool = Query(False),
226 collectionType: list[CollectionTypeNames] | None = Query(None),
227 includeChains: bool | None = Query(None),
228 butler: Butler = Depends(butler_readonly_dependency),
229) -> list[str]:
230 expression_params = ExpressionQueryParameter(regex=regex, glob=glob)
231 collectionTypes = CollectionType.from_names(collectionType)
232 dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None
234 collections = butler.registry.queryCollections(
235 expression=expression_params.expression(),
236 datasetType=dataset_type,
237 collectionTypes=collectionTypes,
238 flattenChains=flattenChains,
239 includeChains=includeChains,
240 )
241 return list(collections)
244@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str)
245def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str:
246 collectionType = butler.registry.getCollectionType(name)
247 return collectionType.name
250@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str)
251def register_collection(
252 name: str,
253 collectionTypeName: CollectionTypeNames,
254 doc: str | None = Query(None),
255 butler: Butler = Depends(butler_readwrite_dependency),
256) -> str:
257 collectionType = CollectionType.from_name(collectionTypeName)
258 butler.registry.registerCollection(name, collectionType, doc)
260 # Need to refresh the global read only butler otherwise other clients
261 # may not see this change.
262 if GLOBAL_READONLY_BUTLER is not None: # for mypy
263 GLOBAL_READONLY_BUTLER.registry.refresh()
265 return name
268@app.get(
269 "/butler/v1/registry/dataset/{id}",
270 summary="Retrieve this dataset definition.",
271 response_model=SerializedDatasetRef | None,
272 response_model_exclude_unset=True,
273 response_model_exclude_defaults=True,
274 response_model_exclude_none=True,
275)
276def get_dataset(
277 id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)
278) -> SerializedDatasetRef | None:
279 ref = butler.registry.getDataset(id)
280 if ref is not None:
281 return ref.to_simple()
282 # This could raise a 404 since id is not found. The standard regsitry
283 # getDataset method returns without error so follow that example here.
284 return ref
287@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str])
288def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]:
289 # Takes an ID so need to convert to a real DatasetRef
290 fake_ref = SerializedDatasetRef(id=id)
292 try:
293 # Converting this to a real DatasetRef takes time and is not
294 # needed internally since only the ID is used.
295 ref = DatasetRef.from_simple(fake_ref, registry=butler.registry)
296 except Exception:
297 # SQL getDatasetLocations looks at ID in datastore and does not
298 # check it is in registry. Follow that example and return without
299 # error.
300 return []
302 return list(butler.registry.getDatasetLocations(ref))
305# TimeSpan not yet a pydantic model
306@app.post(
307 "/butler/v1/registry/findDataset/{datasetType}",
308 summary="Retrieve this dataset definition from collection, dataset type, and dataId",
309 response_model=SerializedDatasetRef,
310 response_model_exclude_unset=True,
311 response_model_exclude_defaults=True,
312 response_model_exclude_none=True,
313)
314def find_dataset(
315 datasetType: str,
316 dataId: SerializedDataCoordinate | None = None,
317 collections: list[str] | None = Query(None),
318 butler: Butler = Depends(butler_readonly_dependency),
319) -> SerializedDatasetRef | None:
320 collection_query = collections if collections else None
322 ref = butler.registry.findDataset(
323 datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query
324 )
325 return ref.to_simple() if ref else None
328# POST is used for the complex dict data structures
329@app.post(
330 "/butler/v1/registry/datasets",
331 summary="Query all dataset holdings.",
332 response_model=list[SerializedDatasetRef],
333 response_model_exclude_unset=True,
334 response_model_exclude_defaults=True,
335 response_model_exclude_none=True,
336)
337def query_datasets(
338 query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency)
339) -> list[SerializedDatasetRef]:
340 # This method might return a lot of results
342 if query.collections:
343 collections = query.collections.expression()
344 else:
345 collections = None
347 datasets = butler.registry.queryDatasets(
348 query.datasetType.expression(),
349 collections=collections,
350 dimensions=query.dimensions,
351 dataId=unpack_dataId(butler, query.dataId),
352 where=query.where,
353 findFirst=query.findFirst,
354 components=query.components,
355 bind=query.bind,
356 check=query.check,
357 **query.kwargs(),
358 )
359 return [ref.to_simple() for ref in datasets]
362# POST is used for the complex dict data structures
363@app.post(
364 "/butler/v1/registry/dataIds",
365 summary="Query all data IDs.",
366 response_model=list[SerializedDataCoordinate],
367 response_model_exclude_unset=True,
368 response_model_exclude_defaults=True,
369 response_model_exclude_none=True,
370)
371def query_data_ids(
372 query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency)
373) -> list[SerializedDataCoordinate]:
374 if query.datasets:
375 datasets = query.datasets.expression()
376 else:
377 datasets = None
378 if query.collections:
379 collections = query.collections.expression()
380 else:
381 collections = None
383 dataIds = butler.registry.queryDataIds(
384 query.dimensions,
385 collections=collections,
386 datasets=datasets,
387 dataId=unpack_dataId(butler, query.dataId),
388 where=query.where,
389 components=query.components,
390 bind=query.bind,
391 check=query.check,
392 **query.kwargs(),
393 )
394 return [coord.to_simple() for coord in dataIds]
397# Uses POST to handle the DataId
398@app.post(
399 "/butler/v1/registry/dimensionRecords/{element}",
400 summary="Retrieve dimension records matching query",
401 response_model=list[SerializedDimensionRecord],
402 response_model_exclude_unset=True,
403 response_model_exclude_defaults=True,
404 response_model_exclude_none=True,
405)
406def query_dimension_records(
407 element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency)
408) -> list[SerializedDimensionRecord]:
409 if query.datasets:
410 datasets = query.datasets.expression()
411 else:
412 datasets = None
413 if query.collections:
414 collections = query.collections.expression()
415 else:
416 collections = None
418 records = butler.registry.queryDimensionRecords(
419 element,
420 dataId=unpack_dataId(butler, query.dataId),
421 collections=collections,
422 where=query.where,
423 datasets=datasets,
424 components=query.components,
425 bind=query.bind,
426 check=query.check,
427 **query.kwargs(),
428 )
429 return [r.to_simple() for r in records]