Coverage for python/lsst/daf/butler/_column_categorization.py: 43%

35 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-19 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("ColumnCategorization",) 

31 

32import dataclasses 

33from collections import defaultdict 

34from collections.abc import Iterable, Iterator 

35from typing import Any 

36 

37from ._column_tags import DatasetColumnTag, DimensionKeyColumnTag, DimensionRecordColumnTag 

38from .dimensions import DimensionUniverse, GovernorDimension, SkyPixDimension 

39 

40 

41@dataclasses.dataclass 

42class ColumnCategorization: 

43 """Split an iterable of ColumnTag objects by type.""" 

44 

45 dimension_keys: set[str] = dataclasses.field(default_factory=set) 

46 dimension_records: defaultdict[str, set[str]] = dataclasses.field( 46 ↛ exitline 46 didn't jump to the function exit

47 default_factory=lambda: defaultdict(set) 

48 ) 

49 datasets: defaultdict[str, set[str]] = dataclasses.field(default_factory=lambda: defaultdict(set)) 49 ↛ exitline 49 didn't run the lambda on line 49

50 

51 @classmethod 

52 def from_iterable(cls, iterable: Iterable[Any]) -> ColumnCategorization: 

53 result = cls() 

54 for tag in iterable: 

55 match tag: 

56 case DimensionKeyColumnTag(dimension=dimension): 

57 result.dimension_keys.add(dimension) 

58 case DimensionRecordColumnTag(element=element, column=column): 

59 result.dimension_records[element].add(column) 

60 case DatasetColumnTag(dataset_type=dataset_type, column=column): 

61 result.datasets[dataset_type].add(column) 

62 return result 

63 

64 def filter_skypix(self, universe: DimensionUniverse) -> Iterator[SkyPixDimension]: 

65 return ( 

66 dimension for name in self.dimension_keys if (dimension := universe.skypix_dimensions.get(name)) 

67 ) 

68 

69 def filter_governors(self, universe: DimensionUniverse) -> Iterator[GovernorDimension]: 

70 return ( 

71 dimension 

72 for name in self.dimension_keys 

73 if isinstance(dimension := universe[name], GovernorDimension) 

74 ) 

75 

76 def filter_timespan_dataset_types(self) -> Iterator[str]: 

77 return (dataset_type for dataset_type, columns in self.datasets.items() if "timespan" in columns) 

78 

79 def filter_timespan_dimension_elements(self) -> Iterator[str]: 

80 return (element for element, columns in self.dimension_records.items() if "timespan" in columns) 

81 

82 def filter_spatial_region_dimension_elements(self) -> Iterator[str]: 

83 return (element for element, columns in self.dimension_records.items() if "region" in columns)