Coverage for python/lsst/daf/butler/core/_column_categorization.py: 43%

35 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ("ColumnCategorization",) 

25 

26import dataclasses 

27from collections import defaultdict 

28from collections.abc import Iterable, Iterator 

29from typing import Any 

30 

31from ._column_tags import DatasetColumnTag, DimensionKeyColumnTag, DimensionRecordColumnTag 

32from .dimensions import DimensionUniverse, GovernorDimension, SkyPixDimension 

33 

34 

35@dataclasses.dataclass 

36class ColumnCategorization: 

37 """Split an iterable of ColumnTag objects by type.""" 

38 

39 dimension_keys: set[str] = dataclasses.field(default_factory=set) 

40 dimension_records: defaultdict[str, set[str]] = dataclasses.field( 40 ↛ exitline 40 didn't jump to the function exit

41 default_factory=lambda: defaultdict(set) 

42 ) 

43 datasets: defaultdict[str, set[str]] = dataclasses.field(default_factory=lambda: defaultdict(set)) 43 ↛ exitline 43 didn't run the lambda on line 43

44 

45 @classmethod 

46 def from_iterable(cls, iterable: Iterable[Any]) -> ColumnCategorization: 

47 result = cls() 

48 for tag in iterable: 

49 match tag: 

50 case DimensionKeyColumnTag(dimension=dimension): 

51 result.dimension_keys.add(dimension) 

52 case DimensionRecordColumnTag(element=element, column=column): 

53 result.dimension_records[element].add(column) 

54 case DatasetColumnTag(dataset_type=dataset_type, column=column): 

55 result.datasets[dataset_type].add(column) 

56 return result 

57 

58 def filter_skypix(self, universe: DimensionUniverse) -> Iterator[SkyPixDimension]: 

59 return ( 

60 dimension 

61 for name in self.dimension_keys 

62 if isinstance(dimension := universe[name], SkyPixDimension) 

63 ) 

64 

65 def filter_governors(self, universe: DimensionUniverse) -> Iterator[GovernorDimension]: 

66 return ( 

67 dimension 

68 for name in self.dimension_keys 

69 if isinstance(dimension := universe[name], GovernorDimension) 

70 ) 

71 

72 def filter_timespan_dataset_types(self) -> Iterator[str]: 

73 return (dataset_type for dataset_type, columns in self.datasets.items() if "timespan" in columns) 

74 

75 def filter_timespan_dimension_elements(self) -> Iterator[str]: 

76 return (element for element, columns in self.dimension_records.items() if "timespan" in columns) 

77 

78 def filter_spatial_region_dimension_elements(self) -> Iterator[str]: 

79 return (element for element, columns in self.dimension_records.items() if "region" in columns)