Coverage for python/lsst/afw/table/_base.py: 11%

191 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-01 03:31 -0700

1# This file is part of afw. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21import numpy as np 

22 

23from lsst.utils import continueClass, TemplateMeta 

24from ._table import BaseRecord, BaseCatalog 

25from ._schema import Key 

26 

27 

28__all__ = ["Catalog"] 

29 

30 

31@continueClass 

32class BaseRecord: # noqa: F811 

33 

34 def extract(self, *patterns, **kwargs): 

35 """Extract a dictionary of {<name>: <field-value>} in which the field 

36 names match the given shell-style glob pattern(s). 

37 

38 Any number of glob patterns may be passed; the result will be the union 

39 of all the result of each glob considered separately. 

40 

41 Parameters 

42 ---------- 

43 items : `dict` 

44 The result of a call to self.schema.extract(); this will be used 

45 instead of doing any new matching, and allows the pattern matching 

46 to be reused to extract values from multiple records. This 

47 keyword is incompatible with any position arguments and the regex, 

48 sub, and ordered keyword arguments. 

49 regex : `str` or `re` pattern object 

50 A regular expression to be used in addition to any glob patterns 

51 passed as positional arguments. Note that this will be compared 

52 with re.match, not re.search. 

53 sub : `str` 

54 A replacement string (see `re.MatchObject.expand`) used to set the 

55 dictionary keys of any fields matched by regex. 

56 ordered : `bool` 

57 If `True`, a `collections.OrderedDict` will be returned instead of 

58 a standard dict, with the order corresponding to the definition 

59 order of the `Schema`. Default is `False`. 

60 """ 

61 d = kwargs.pop("items", None) 

62 if d is None: 

63 d = self.schema.extract(*patterns, **kwargs).copy() 

64 elif kwargs: 

65 kwargsStr = ", ".join(kwargs.keys()) 

66 raise ValueError(f"Unrecognized keyword arguments for extract: {kwargsStr}") 

67 return {name: self.get(schemaItem.key) for name, schemaItem in d.items()} 

68 

69 def __repr__(self): 

70 return f"{type(self)}\n{self}" 

71 

72 

73class Catalog(metaclass=TemplateMeta): 

74 

75 def getColumnView(self): 

76 self._columns = self._getColumnView() 

77 return self._columns 

78 

79 def __getColumns(self): 

80 if not hasattr(self, "_columns") or self._columns is None: 

81 self._columns = self._getColumnView() 

82 return self._columns 

83 columns = property(__getColumns, doc="a column view of the catalog") 

84 

85 def __getitem__(self, key): 

86 """Return the record at index key if key is an integer, 

87 return a column if `key` is a string field name or Key, 

88 or return a subset of the catalog if key is a slice 

89 or boolean NumPy array. 

90 """ 

91 if type(key) is slice: 

92 (start, stop, step) = (key.start, key.stop, key.step) 

93 if step is None: 

94 step = 1 

95 if start is None: 

96 start = 0 

97 if stop is None: 

98 stop = len(self) 

99 return self.subset(start, stop, step) 

100 elif isinstance(key, np.ndarray): 

101 if key.dtype == bool: 

102 return self.subset(key) 

103 raise RuntimeError(f"Unsupported array type for indexing non-contiguous Catalog: {key.dtype}") 

104 elif isinstance(key, str): 

105 key = self.schema.find(key).key 

106 result, self._columns = self._get_column_from_key(key, self._columns) 

107 return result 

108 elif isinstance(key, Key): 

109 result, self._columns = self._get_column_from_key(key, self._columns) 

110 return result 

111 else: 

112 return self._getitem_(key) 

113 

114 def __setitem__(self, key, value): 

115 """If ``key`` is an integer, set ``catalog[key]`` to 

116 ``value``. Otherwise select column ``key`` and set it to 

117 ``value``. 

118 """ 

119 self._columns = None 

120 if isinstance(key, str): 

121 key = self.schema[key].asKey() 

122 if isinstance(key, Key): 

123 if isinstance(key, Key["Flag"]): 

124 self._set_flag(key, value) 

125 else: 

126 self.columns[key] = value 

127 else: 

128 return self.set(key, value) 

129 

130 def __delitem__(self, key): 

131 self._columns = None 

132 if isinstance(key, slice): 

133 self._delslice_(key) 

134 else: 

135 self._delitem_(key) 

136 

137 def append(self, record): 

138 self._columns = None 

139 self._append(record) 

140 

141 def insert(self, key, value): 

142 self._columns = None 

143 self._insert(key, value) 

144 

145 def clear(self): 

146 self._columns = None 

147 self._clear() 

148 

149 def addNew(self): 

150 self._columns = None 

151 return self._addNew() 

152 

153 def cast(self, type_, deep=False): 

154 """Return a copy of the catalog with the given type. 

155 

156 Parameters 

157 ---------- 

158 type_ : 

159 Type of catalog to return. 

160 deep : `bool`, optional 

161 If `True`, clone the table and deep copy all records. 

162 

163 Returns 

164 ------- 

165 copy : 

166 Copy of catalog with the requested type. 

167 """ 

168 if deep: 

169 table = self.table.clone() 

170 table.preallocate(len(self)) 

171 else: 

172 table = self.table 

173 copy = type_(table) 

174 copy.extend(self, deep=deep) 

175 return copy 

176 

177 def copy(self, deep=False): 

178 """ 

179 Copy a catalog (default is not a deep copy). 

180 """ 

181 return self.cast(type(self), deep) 

182 

183 def extend(self, iterable, deep=False, mapper=None): 

184 """Append all records in the given iterable to the catalog. 

185 

186 Parameters 

187 ---------- 

188 iterable : 

189 Any Python iterable containing records. 

190 deep : `bool`, optional 

191 If `True`, the records will be deep-copied; ignored if 

192 mapper is not `None` (that always implies `True`). 

193 mapper : `lsst.afw.table.schemaMapper.SchemaMapper`, optional 

194 Used to translate records. 

195 """ 

196 self._columns = None 

197 # We can't use isinstance here, because the SchemaMapper symbol isn't available 

198 # when this code is part of a subclass of Catalog in another package. 

199 if type(deep).__name__ == "SchemaMapper": 

200 mapper = deep 

201 deep = None 

202 if isinstance(iterable, type(self)): 

203 if mapper is not None: 

204 self._extend(iterable, mapper) 

205 else: 

206 self._extend(iterable, deep) 

207 else: 

208 for record in iterable: 

209 if mapper is not None: 

210 self._append(self.table.copyRecord(record, mapper)) 

211 elif deep: 

212 self._append(self.table.copyRecord(record)) 

213 else: 

214 self._append(record) 

215 

216 def __reduce__(self): 

217 import lsst.afw.fits 

218 return lsst.afw.fits.reduceToFits(self) 

219 

220 def asAstropy(self, cls=None, copy=False, unviewable="copy"): 

221 """Return an astropy.table.Table (or subclass thereof) view into this catalog. 

222 

223 Parameters 

224 ---------- 

225 cls : 

226 Table subclass to use; `None` implies `astropy.table.Table` 

227 itself. Use `astropy.table.QTable` to get Quantity columns. 

228 copy : bool, optional 

229 If `True`, copy data from the LSST catalog to the astropy 

230 table. Not copying is usually faster, but can keep memory 

231 from being freed if columns are later removed from the 

232 Astropy view. 

233 unviewable : `str`, optional 

234 One of the following options (which is ignored if 

235 copy=`True` ), indicating how to handle field types (`str` 

236 and `Flag`) for which views cannot be constructed: 

237 

238 - 'copy' (default): copy only the unviewable fields. 

239 - 'raise': raise ValueError if unviewable fields are present. 

240 - 'skip': do not include unviewable fields in the Astropy Table. 

241 

242 Returns 

243 ------- 

244 cls : `astropy.table.Table` 

245 Astropy view into the catalog. 

246 

247 Raises 

248 ------ 

249 ValueError 

250 Raised if the `unviewable` option is not a known value, or 

251 if the option is 'raise' and an uncopyable field is found. 

252 

253 """ 

254 import astropy.table 

255 if cls is None: 

256 cls = astropy.table.Table 

257 if unviewable not in ("copy", "raise", "skip"): 

258 raise ValueError( 

259 f"'unviewable'={unviewable!r} must be one of 'copy', 'raise', or 'skip'") 

260 ps = self.getMetadata() 

261 meta = ps.toOrderedDict() if ps is not None else None 

262 columns = [] 

263 items = self.schema.extract("*", ordered=True) 

264 for name, item in items.items(): 

265 key = item.key 

266 unit = item.field.getUnits() or None # use None instead of "" when empty 

267 if key.getTypeString() == "String": 

268 if not copy: 

269 if unviewable == "raise": 

270 raise ValueError("Cannot extract string " 

271 "unless copy=True or unviewable='copy' or 'skip'.") 

272 elif unviewable == "skip": 

273 continue 

274 data = np.zeros( 

275 len(self), dtype=np.dtype((str, key.getSize()))) 

276 for i, record in enumerate(self): 

277 data[i] = record.get(key) 

278 elif key.getTypeString() == "Flag": 

279 if not copy: 

280 if unviewable == "raise": 

281 raise ValueError("Cannot extract packed bit columns " 

282 "unless copy=True or unviewable='copy' or 'skip'.") 

283 elif unviewable == "skip": 

284 continue 

285 data = self[key] 

286 elif key.getTypeString() == "Angle": 

287 data = self.columns.get(key) 

288 unit = "radian" 

289 if copy: 

290 data = data.copy() 

291 elif "Array" in key.getTypeString() and key.isVariableLength(): 

292 # Can't get columns for variable-length array fields. 

293 if unviewable == "raise": 

294 raise ValueError("Cannot extract variable-length array fields unless unviewable='skip'.") 

295 elif unviewable == "skip" or unviewable == "copy": 

296 continue 

297 else: 

298 data = self.columns.get(key) 

299 if copy: 

300 data = data.copy() 

301 columns.append( 

302 astropy.table.Column( 

303 data, 

304 name=name, 

305 unit=unit, 

306 description=item.field.getDoc() 

307 ) 

308 ) 

309 return cls(columns, meta=meta, copy=False) 

310 

311 def __dir__(self): 

312 """ 

313 This custom dir is necessary due to the custom getattr below. 

314 Without it, not all of the methods available are returned with dir. 

315 See DM-7199. 

316 """ 

317 def recursive_get_class_dir(cls): 

318 """ 

319 Return a set containing the names of all methods 

320 for a given class *and* all of its subclasses. 

321 """ 

322 result = set() 

323 if cls.__bases__: 

324 for subcls in cls.__bases__: 

325 result |= recursive_get_class_dir(subcls) 

326 result |= set(cls.__dict__.keys()) 

327 return result 

328 return sorted(set(dir(self.columns)) | set(dir(self.table)) 

329 | recursive_get_class_dir(type(self)) | set(self.__dict__.keys())) 

330 

331 def __getattr__(self, name): 

332 # Catalog forwards unknown method calls to its table and column view 

333 # for convenience. (Feature requested by RHL; complaints about magic 

334 # should be directed to him.) 

335 if name == "_columns": 

336 self._columns = None 

337 return None 

338 try: 

339 return getattr(self.table, name) 

340 except AttributeError: 

341 return getattr(self.columns, name) 

342 

343 def __str__(self): 

344 if self.isContiguous(): 

345 return str(self.asAstropy()) 

346 else: 

347 fields = ' '.join(x.field.getName() for x in self.schema) 

348 return f"Non-contiguous afw.Catalog of {len(self)} rows.\ncolumns: {fields}" 

349 

350 def __repr__(self): 

351 return "%s\n%s" % (type(self), self) 

352 

353 def extract(self, *patterns, **kwds): 

354 """Extract a dictionary of {<name>: <column-array>} in which the field 

355 names match the given shell-style glob pattern(s). 

356 

357 Any number of glob patterns may be passed (including none); the result 

358 will be the union of all the result of each glob considered separately. 

359 

360 Note that extract("*", copy=True) provides an easy way to transform a 

361 catalog into a set of writeable contiguous NumPy arrays. 

362 

363 This routines unpacks `Flag` columns into full boolean arrays. String 

364 fields are silently ignored. 

365 

366 Parameters 

367 ---------- 

368 patterns : Array of `str` 

369 List of glob patterns to use to select field names. 

370 kwds : `dict` 

371 Dictionary of additional keyword arguments. May contain: 

372 

373 ``items`` : `list` 

374 The result of a call to self.schema.extract(); this will be 

375 used instead of doing any new matching, and allows the pattern 

376 matching to be reused to extract values from multiple records. 

377 This keyword is incompatible with any position arguments and 

378 the regex, sub, and ordered keyword arguments. 

379 ``where`` : array index expression 

380 Any expression that can be passed as indices to a NumPy array, 

381 including slices, boolean arrays, and index arrays, that will 

382 be used to index each column array. This is applied before 

383 arrays are copied when copy is True, so if the indexing results 

384 in an implicit copy no unnecessary second copy is performed. 

385 ``copy`` : `bool` 

386 If True, the returned arrays will be contiguous copies rather 

387 than strided views into the catalog. This ensures that the 

388 lifetime of the catalog is not tied to the lifetime of a 

389 particular catalog, and it also may improve the performance if 

390 the array is used repeatedly. Default is False. Copies are 

391 always made if the catalog is noncontiguous, but if 

392 ``copy=False`` these set as read-only to ensure code does not 

393 assume they are views that could modify the original catalog. 

394 ``regex`` : `str` or `re` pattern 

395 A regular expression to be used in addition to any glob 

396 patterns passed as positional arguments. Note that this will 

397 be compared with re.match, not re.search. 

398 ``sub`` : `str` 

399 A replacement string (see re.MatchObject.expand) used to set 

400 the dictionary keys of any fields matched by regex. 

401 ``ordered`` : `bool` 

402 If True, a collections.OrderedDict will be returned instead of 

403 a standard dict, with the order corresponding to the definition 

404 order of the Schema. Default is False. 

405 

406 Returns 

407 ------- 

408 d : `dict` 

409 Dictionary of extracted name-column array sets. 

410 

411 Raises 

412 ------ 

413 ValueError 

414 Raised if a list of ``items`` is supplied with additional keywords. 

415 """ 

416 copy = kwds.pop("copy", False) 

417 where = kwds.pop("where", None) 

418 d = kwds.pop("items", None) 

419 # If ``items`` is given as a kwd, an extraction has already been 

420 # performed and there shouldn't be any additional keywords. Otherwise 

421 # call schema.extract to load the dictionary. 

422 if d is None: 

423 d = self.schema.extract(*patterns, **kwds).copy() 

424 elif kwds: 

425 raise ValueError( 

426 "kwd 'items' was specified, which is not compatible with additional keywords") 

427 

428 def processArray(a): 

429 if where is not None: 

430 a = a[where] 

431 if copy: 

432 a = a.copy() 

433 return a 

434 

435 # must use list because we might be adding/deleting elements 

436 for name, schemaItem in list(d.items()): 

437 key = schemaItem.key 

438 if key.getTypeString() == "String": 

439 del d[name] 

440 else: 

441 d[name] = processArray(self[schemaItem.key]) 

442 return d 

443 

444 

445Catalog.register("Base", BaseCatalog)