lsst.afw  22.0.1-31-gd62ef0f05+23bd69c089
FitsSchemaInputMapper.cc
Go to the documentation of this file.
1 // -*- lsst-c++ -*-
2 
3 #include <array>
4 #include <cmath>
5 #include <cstdint>
6 #include <cstdio>
7 #include <string>
8 #include <algorithm>
9 #include <cctype>
10 #include <regex>
11 
12 #include "boost/multi_index_container.hpp"
13 #include "boost/multi_index/sequenced_index.hpp"
14 #include "boost/multi_index/ordered_index.hpp"
15 #include "boost/multi_index/hashed_index.hpp"
16 #include "boost/multi_index/member.hpp"
17 
18 #include "lsst/log/Log.h"
19 #include "lsst/geom.h"
22 
23 namespace lsst {
24 namespace afw {
25 namespace table {
26 namespace io {
27 
28 namespace {
29 
30 // A quirk of Boost.MultiIndex (which we use for our container of FitsSchemaItems)
31 // that you have to use a special functor (like this one) to set data members
32 // in a container with set indices (because setting those values might require
33 // the element to be moved to a different place in the set). Check out
34 // the Boost.MultiIndex docs for more information.
35 template <std::string FitsSchemaItem::*Member>
36 struct SetFitsSchemaString {
37  void operator()(FitsSchemaItem &item) { item.*Member = _v; }
38  explicit SetFitsSchemaString(std::string const &v) : _v(v) {}
39 
40 private:
41  std::string const &_v;
42 };
43 
44 } // namespace
45 
47 public:
48  // A container class (based on Boost.MultiIndex) that provides three sort orders,
49  // on column number, flag bit, and name (ttype). This allows us to insert fields into the
50  // schema in the correct order, regardless of which order they appear in the
51  // FITS header.
52  using InputContainer = boost::multi_index_container<FitsSchemaItem, boost::multi_index::indexed_by<boost::multi_index::ordered_non_unique<boost::multi_index::member<FitsSchemaItem, int, &FitsSchemaItem::column>>, boost::multi_index::ordered_non_unique<boost::multi_index::member<FitsSchemaItem, int, &FitsSchemaItem::bit>>, boost::multi_index::hashed_unique<boost::multi_index::member<FitsSchemaItem, std::string, &FitsSchemaItem::ttype>>, boost::multi_index::sequenced<>>>;
53 
54  // Typedefs for the special functors used to set data members.
55  using SetTTYPE = SetFitsSchemaString<&FitsSchemaItem::ttype>;
56  using SetTFORM = SetFitsSchemaString<&FitsSchemaItem::tform>;
57  using SetTCCLS = SetFitsSchemaString<&FitsSchemaItem::tccls>;
58  using SetTUNIT = SetFitsSchemaString<&FitsSchemaItem::tunit>;
59  using SetDoc = SetFitsSchemaString<&FitsSchemaItem::doc>;
60 
61  // Typedefs for the different indices.
66 
67  // Getters for the different indices.
68  ByColumn &byColumn() { return inputs.get<0>(); }
69  ByBit &byBit() { return inputs.get<1>(); }
70  ByName &byName() { return inputs.get<2>(); }
71  AsList &asList() { return inputs.get<3>(); }
72 
73  Impl() {}
74 
75  int version{0};
77  int flagColumn{0};
78  int archiveHdu{-1};
86 };
87 
88 std::size_t FitsSchemaInputMapper::PREPPED_ROWS_FACTOR = 1 << 15; // determined empirically; see DM-19461.
89 
91  : _impl(std::make_shared<Impl>()) {
92  // Set the table version. If AFW_TABLE_VERSION tag exists, use that
93  // If not, set to 0 if it has an AFW_TYPE, Schema default otherwise (DM-590)
94  if (!metadata.exists("AFW_TYPE")) {
95  _impl->version = lsst::afw::table::Schema::VERSION;
96  }
97  _impl->version = metadata.get("AFW_TABLE_VERSION", _impl->version);
98  _impl->type = metadata.get("AFW_TYPE", _impl->type);
99  if (stripMetadata) {
100  metadata.remove("AFW_TABLE_VERSION");
101  }
102  if (stripMetadata) {
103  metadata.remove("AFW_TYPE");
104  }
105 
106  // Find a key that indicates an Archive stored on other HDUs
107  _impl->archiveHdu = metadata.get("AR_HDU", -1);
108  if (_impl->archiveHdu > 0) {
109  --_impl->archiveHdu; // AR_HDU is 1-indexed for historical reasons (RFC-304; see Source.cc)
110  if (stripMetadata) {
111  metadata.remove("AR_HDU");
112  }
113  }
114 
115  // Read aliases, stored as header entries with key 'ALIAS'
116  try {
117  std::vector<std::string> rawAliases = metadata.getArray<std::string>("ALIAS");
118  for (auto const &rawAliase : rawAliases) {
119  std::size_t pos = rawAliase.find_first_of(':');
120  if (pos == std::string::npos) {
122  (boost::format("Malformed alias definition: '%s'") % rawAliase).str());
123  }
124  _impl->schema.getAliasMap()->set(rawAliase.substr(0, pos), rawAliase.substr(pos + 1, std::string::npos));
125  }
126  if (stripMetadata) {
127  metadata.remove("ALIAS");
128  }
129  } catch (pex::exceptions::NotFoundError &) {
130  // if there are no aliases, just move on
131  }
132 
133  if (_impl->version == 0) {
134  // Read slots saved using an old mechanism in as aliases, since the new slot mechanism delegates
135  // slot definition to the AliasMap.
136  static std::array<std::pair<std::string, std::string>, 7> oldSlotKeys = {
137  {std::make_pair("PSF_FLUX", "slot_PsfFlux"), std::make_pair("AP_FLUX", "slot_ApFlux"),
138  std::make_pair("INST_FLUX", "slot_GaussianFlux"),
139  std::make_pair("MODEL_FLUX", "slot_ModelFlux"),
140  std::make_pair("CALIB_FLUX", "slot_CalibFlux"), std::make_pair("CENTROID", "slot_Centroid"),
141  std::make_pair("SHAPE", "slot_Shape")}};
142  for (auto const &oldSlotKey : oldSlotKeys) {
143  std::string target = metadata.get(oldSlotKey.first + "_SLOT", std::string(""));
144  if (!target.empty()) {
145  _impl->schema.getAliasMap()->set(oldSlotKey.second, target);
146  if (stripMetadata) {
147  metadata.remove(oldSlotKey.first);
148  metadata.remove(oldSlotKey.first + "_ERR_SLOT");
149  metadata.remove(oldSlotKey.first + "_FLAG_SLOT");
150  }
151  }
152  }
153  }
154 
155  // Read the rest of the header into the intermediate inputs container.
156  std::vector<std::string> keyList = metadata.getOrderedNames();
157  for (auto const &key : keyList) {
158  if (key.compare(0, 5, "TTYPE") == 0) {
159  int column = std::stoi(key.substr(5)) - 1;
160  auto iter = _impl->byColumn().lower_bound(column);
161  if (iter == _impl->byColumn().end() || iter->column != column) {
162  iter = _impl->byColumn().insert(iter, FitsSchemaItem(column, -1));
163  }
164  std::string v = metadata.get<std::string>(key);
165  _impl->byColumn().modify(iter, Impl::SetTTYPE(v));
166  if (iter->doc.empty()) { // don't overwrite if already set with TDOCn
167  _impl->byColumn().modify(iter, Impl::SetDoc(metadata.getComment(key)));
168  }
169  if (stripMetadata) {
170  metadata.remove(key);
171  }
172  } else if (key.compare(0, 5, "TFLAG") == 0) {
173  int bit = std::stoi(key.substr(5)) - 1;
174  auto iter = _impl->byBit().lower_bound(bit);
175  if (iter == _impl->byBit().end() || iter->bit != bit) {
176  iter = _impl->byBit().insert(iter, FitsSchemaItem(-1, bit));
177  }
178  std::string v = metadata.get<std::string>(key);
179  _impl->byBit().modify(iter, Impl::SetTTYPE(v));
180  if (iter->doc.empty()) { // don't overwrite if already set with TFDOCn
181  _impl->byBit().modify(iter, Impl::SetDoc(metadata.getComment(key)));
182  }
183  if (stripMetadata) {
184  metadata.remove(key);
185  }
186  } else if (key.compare(0, 4, "TDOC") == 0) {
187  int column = std::stoi(key.substr(4)) - 1;
188  auto iter = _impl->byColumn().lower_bound(column);
189  if (iter == _impl->byColumn().end() || iter->column != column) {
190  iter = _impl->byColumn().insert(iter, FitsSchemaItem(column, -1));
191  }
192  _impl->byColumn().modify(iter, Impl::SetDoc(metadata.get<std::string>(key)));
193  if (stripMetadata) {
194  metadata.remove(key);
195  }
196  } else if (key.compare(0, 5, "TFDOC") == 0) {
197  int bit = std::stoi(key.substr(5)) - 1;
198  auto iter = _impl->byBit().lower_bound(bit);
199  if (iter == _impl->byBit().end() || iter->bit != bit) {
200  iter = _impl->byBit().insert(iter, FitsSchemaItem(-1, bit));
201  }
202  _impl->byBit().modify(iter, Impl::SetDoc(metadata.get<std::string>(key)));
203  if (stripMetadata) {
204  metadata.remove(key);
205  }
206  } else if (key.compare(0, 5, "TUNIT") == 0) {
207  int column = std::stoi(key.substr(5)) - 1;
208  auto iter = _impl->byColumn().lower_bound(column);
209  if (iter == _impl->byColumn().end() || iter->column != column) {
210  iter = _impl->byColumn().insert(iter, FitsSchemaItem(column, -1));
211  }
212  _impl->byColumn().modify(iter, Impl::SetTUNIT(metadata.get<std::string>(key)));
213  if (stripMetadata) {
214  metadata.remove(key);
215  }
216  } else if (key.compare(0, 5, "TCCLS") == 0) {
217  int column = std::stoi(key.substr(5)) - 1;
218  auto iter = _impl->byColumn().lower_bound(column);
219  if (iter == _impl->byColumn().end() || iter->column != column) {
220  iter = _impl->byColumn().insert(iter, FitsSchemaItem(column, -1));
221  }
222  _impl->byColumn().modify(iter, Impl::SetTCCLS(metadata.get<std::string>(key)));
223  if (stripMetadata) {
224  metadata.remove(key);
225  }
226  } else if (key.compare(0, 5, "TFORM") == 0) {
227  int column = std::stoi(key.substr(5)) - 1;
228  auto iter = _impl->byColumn().lower_bound(column);
229  if (iter == _impl->byColumn().end() || iter->column != column) {
230  iter = _impl->byColumn().insert(iter, FitsSchemaItem(column, -1));
231  }
232  _impl->byColumn().modify(iter, Impl::SetTFORM(metadata.get<std::string>(key)));
233  if (stripMetadata) {
234  metadata.remove(key);
235  }
236  } else if (key.compare(0, 5, "TZERO") == 0) {
237  if (stripMetadata) {
238  metadata.remove(key);
239  }
240  } else if (key.compare(0, 5, "TSCAL") == 0) {
241  if (stripMetadata) {
242  metadata.remove(key);
243  }
244  } else if (key.compare(0, 5, "TNULL") == 0) {
245  if (stripMetadata) {
246  metadata.remove(key);
247  }
248  } else if (key.compare(0, 5, "TDISP") == 0) {
249  if (stripMetadata) {
250  metadata.remove(key);
251  }
252  }
253  }
254 
255  // Find the column used to store flags, and setup the flag-handling data members from it.
256  _impl->flagColumn = metadata.get("FLAGCOL", 0);
257  if (_impl->flagColumn > 0) {
258  if (stripMetadata) {
259  metadata.remove("FLAGCOL");
260  }
261  --_impl->flagColumn; // switch from 1-indexed to 0-indexed
262  auto iter = _impl->byColumn().find(_impl->flagColumn);
263  if (iter == _impl->byColumn().end()) {
264  throw LSST_EXCEPT(
266  (boost::format("Column for flag data not found; FLAGCOL=%d") % _impl->flagColumn).str());
267  }
268  // Regex to unpack a FITS TFORM value for a bit array column (TFORM code 'X'). The number
269  // that precedes the code is the size of the array; the number that follows it (if present)
270  // is ignored.
271  static std::regex const regex("(\\d+)?X\\(?(\\d)*\\)?");
272  std::smatch m;
273  if (!std::regex_match(iter->tform, m, regex)) {
274  throw LSST_EXCEPT(
276  (boost::format("Invalid TFORM key for flags column: '%s'") % iter->tform).str());
277  }
278  int nFlags = 1;
279  if (m[1].matched) {
280  nFlags = std::stoi(m[1].str());
281  }
282  _impl->flagKeys.resize(nFlags);
283  _impl->flagWorkspace.reset(new bool[nFlags]);
284  // Delete the flag column from the input list so we don't interpret it as a
285  // regular field.
286  _impl->byColumn().erase(iter);
287  }
288 }
289 
295 
296 void FitsSchemaInputMapper::setArchive(std::shared_ptr<InputArchive> archive) { _impl->archive = archive; }
297 
299  int oldHdu = fits.getHdu();
300  if (_impl->archiveHdu < 0) _impl->archiveHdu = oldHdu + 1;
301  try {
302  fits.setHdu(_impl->archiveHdu);
303  _impl->archive.reset(new io::InputArchive(InputArchive::readFits(fits)));
304  fits.setHdu(oldHdu);
305  return true;
306  } catch (afw::fits::FitsError &) {
307  fits.status = 0;
308  fits.setHdu(oldHdu);
309  _impl->archiveHdu = -1;
310  return false;
311  }
312 }
313 
314 bool FitsSchemaInputMapper::hasArchive() const { return static_cast<bool>(_impl->archive); }
315 
317  auto iter = _impl->byName().find(ttype);
318  if (iter == _impl->byName().end()) {
319  return nullptr;
320  }
321  return &(*iter);
322 }
323 
324 FitsSchemaItem const *FitsSchemaInputMapper::find(int column) const {
325  auto iter = _impl->byColumn().lower_bound(column);
326  if (iter == _impl->byColumn().end() || iter->column != column) {
327  return nullptr;
328  }
329  return &(*iter);
330 }
331 
333  auto iter = _impl->byColumn().lower_bound(item->column);
334  assert(iter != _impl->byColumn().end() && iter->column == item->column);
335  _impl->byColumn().erase(iter);
336 }
337 
339  auto iter = _impl->byName().find(ttype);
340  if (iter != _impl->byName().end() && iter->ttype == ttype) {
341  _impl->byName().erase(iter);
342  }
343 }
344 
346  auto iter = _impl->byColumn().lower_bound(column);
347  if (iter != _impl->byColumn().end() && iter->column == column) {
348  _impl->byColumn().erase(iter);
349  }
350 }
351 
352 void erase(int column);
353 
355  _impl->readers.push_back(std::move(reader));
356 }
357 
358 namespace {
359 
360 template <typename T>
361 class StandardReader : public FitsColumnReader {
362 public:
364  FieldBase<T> const &base = FieldBase<T>()) {
365  return std::unique_ptr<FitsColumnReader>(new StandardReader(schema, item, base));
366  }
367 
368  StandardReader(Schema &schema, FitsSchemaItem const &item, FieldBase<T> const &base)
369  : _column(item.column), _key(schema.addField<T>(item.ttype, item.doc, item.tunit, base)),
370  _cache(), _cacheFirstRow(0)
371  {}
372 
373  void prepRead(std::size_t firstRow, std::size_t nRows, fits::Fits & fits) override {
374  // We only prep and cache scalar-valued columns, not array-valued
375  // columns, as apparently the order CFITSIO reads array-valued columns
376  // is not the order we want.
377  if (_key.getElementCount() == 1u) {
378  std::size_t nElements = nRows*_key.getElementCount();
379  _cache.resize(nElements);
380  _cacheFirstRow = firstRow;
381  fits.readTableArray(firstRow, _column, nElements, &_cache.front());
382  }
383  }
384 
385  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
386  std::shared_ptr<InputArchive> const &archive) const override {
387  if (_cache.empty()) {
388  fits.readTableArray(row, _column, _key.getElementCount(), record.getElement(_key));
389  } else {
390  assert(row >= _cacheFirstRow);
391  std::size_t offset = row - _cacheFirstRow;
392  assert(offset < _cache.size());
393  std::copy_n(_cache.begin() + offset, _key.getElementCount(), record.getElement(_key));
394  }
395  }
396 
397 private:
398  int _column;
399  Key<T> _key;
401  std::size_t _cacheFirstRow;
402  std::size_t _nRowsToPrep;
403 };
404 
405 class AngleReader : public FitsColumnReader {
406 public:
408  Schema &schema, FitsSchemaItem const &item,
409  FieldBase<lsst::geom::Angle> const &base = FieldBase<lsst::geom::Angle>()) {
410  return std::unique_ptr<FitsColumnReader>(new AngleReader(schema, item, base));
411  }
412 
413  AngleReader(Schema &schema, FitsSchemaItem const &item, FieldBase<lsst::geom::Angle> const &base)
414  : _column(item.column), _key(schema.addField<lsst::geom::Angle>(item.ttype, item.doc, "", base)) {
415  // We require an LSST-specific key in the headers before parsing a column
416  // as Angle at all, so we don't need to worry about other units or other
417  // spellings of radians. We do continue to support no units for backwards
418  // compatibility.
419  if (!item.tunit.empty() && item.tunit != "rad") {
420  throw LSST_EXCEPT(afw::fits::FitsError,
421  "Angle fields must be persisted in radians (TUNIT='rad').");
422  }
423  }
424 
425  void prepRead(std::size_t firstRow, std::size_t nRows, fits::Fits & fits) override {
426  assert(_key.getElementCount() == 1u);
427  _cache.resize(nRows);
428  _cacheFirstRow = firstRow;
429  fits.readTableArray(firstRow, _column, nRows, &_cache.front());
430  }
431 
432  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
433  std::shared_ptr<InputArchive> const &archive) const override {
434  if (_cache.empty()) {
435  double tmp = 0;
436  fits.readTableScalar(row, _column, tmp);
437  record.set(_key, tmp * lsst::geom::radians);
438  } else {
439  assert(row >= _cacheFirstRow);
440  std::size_t offset = row - _cacheFirstRow;
441  assert(offset < _cache.size());
442  record.set(_key, _cache[offset] * lsst::geom::radians);
443  }
444  }
445 
446 private:
447  int _column;
448  Key<lsst::geom::Angle> _key;
449  std::vector<double> _cache;
450  std::size_t _cacheFirstRow;
451 };
452 
453 class StringReader : public FitsColumnReader {
454 public:
455  static std::unique_ptr<FitsColumnReader> make(Schema &schema, FitsSchemaItem const &item, int size) {
456  return std::unique_ptr<FitsColumnReader>(new StringReader(schema, item, size));
457  }
458 
459  StringReader(Schema &schema, FitsSchemaItem const &item, int size)
460  : _column(item.column),
461  _key(schema.addField<std::string>(item.ttype, item.doc, item.tunit, size)),
462  _isVariableLength(size == 0) {}
463 
464  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
465  std::shared_ptr<InputArchive> const &archive) const override {
466  std::string s;
467  fits.readTableScalar(row, _column, s, _isVariableLength);
468  record.set(_key, s);
469  }
470 
471 private:
472  int _column;
473  Key<std::string> _key;
474  bool _isVariableLength;
475 };
476 
477 template <typename T>
478 class VariableLengthArrayReader : public FitsColumnReader {
479 public:
480  static std::unique_ptr<FitsColumnReader> make(Schema &schema, FitsSchemaItem const &item) {
481  return std::unique_ptr<FitsColumnReader>(new VariableLengthArrayReader(schema, item));
482  }
483 
484  VariableLengthArrayReader(Schema &schema, FitsSchemaItem const &item)
485  : _column(item.column), _key(schema.addField<Array<T>>(item.ttype, item.doc, item.tunit, 0)) {}
486 
487  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
488  std::shared_ptr<InputArchive> const &archive) const override {
489  int size = fits.getTableArraySize(row, _column);
490  ndarray::Array<T, 1, 1> array = ndarray::allocate(size);
491  fits.readTableArray(row, _column, size, array.getData());
492  record.set(_key, array);
493  }
494 
495 private:
496  int _column;
497  Key<Array<T>> _key;
498 };
499 
500 // Read a 2-element FITS array column as separate x and y Schema fields (hence converting
501 // from the old Point compound field to the new PointKey FunctorKey).
502 template <typename T>
503 class PointConversionReader : public FitsColumnReader {
504 public:
505  static std::unique_ptr<FitsColumnReader> make(Schema &schema, FitsSchemaItem const &item) {
506  return std::unique_ptr<FitsColumnReader>(new PointConversionReader(schema, item));
507  }
508 
509  PointConversionReader(Schema &schema, FitsSchemaItem const &item)
510  : _column(item.column), _key(PointKey<T>::addFields(schema, item.ttype, item.doc, item.tunit)) {}
511 
512  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
513  std::shared_ptr<InputArchive> const &archive) const override {
514  std::array<T, 2> buffer;
515  fits.readTableArray(row, _column, 2, buffer.data());
516  record.set(_key, lsst::geom::Point<T, 2>(buffer[0], buffer[1]));
517  }
518 
519 private:
520  int _column;
521  PointKey<T> _key;
522 };
523 
524 // Read a 2-element FITS array column as separate ra and dec Schema fields (hence converting
525 // from the old Coord compound field to the new CoordKey FunctorKey).
526 class CoordConversionReader : public FitsColumnReader {
527 public:
528  static std::unique_ptr<FitsColumnReader> make(Schema &schema, FitsSchemaItem const &item) {
529  return std::unique_ptr<FitsColumnReader>(new CoordConversionReader(schema, item));
530  }
531 
532  CoordConversionReader(Schema &schema, FitsSchemaItem const &item)
533  : _column(item.column), _key(CoordKey::addFields(schema, item.ttype, item.doc)) {}
534 
535  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
536  std::shared_ptr<InputArchive> const &archive) const override {
538  fits.readTableArray(row, _column, 2, buffer.data());
539  record.set(_key, lsst::geom::SpherePoint(buffer[0], buffer[1]));
540  }
541 
542 private:
543  int _column;
544  CoordKey _key;
545 };
546 
547 // Read a 3-element FITS array column as separate xx, yy, and xy Schema fields (hence converting
548 // from the old Moments compound field to the new QuadrupoleKey FunctorKey).
549 class MomentsConversionReader : public FitsColumnReader {
550 public:
551  static std::unique_ptr<FitsColumnReader> make(Schema &schema, FitsSchemaItem const &item) {
552  return std::unique_ptr<FitsColumnReader>(new MomentsConversionReader(schema, item));
553  }
554 
555  MomentsConversionReader(Schema &schema, FitsSchemaItem const &item)
556  : _column(item.column),
557  _key(QuadrupoleKey::addFields(schema, item.ttype, item.doc, CoordinateType::PIXEL)) {}
558 
559  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
560  std::shared_ptr<InputArchive> const &archive) const override {
561  std::array<double, 3> buffer;
562  fits.readTableArray(row, _column, 3, buffer.data());
563  record.set(_key, geom::ellipses::Quadrupole(buffer[0], buffer[1], buffer[2], false));
564  }
565 
566 private:
567  int _column;
568  QuadrupoleKey _key;
569 };
570 
571 // Read a FITS array column representing a packed symmetric matrix into
572 // Schema fields for each element (hence converting from the old Covariance
573 // compound field to the new CovarianceMatrixKey FunctorKey).
574 template <typename T, int N>
575 class CovarianceConversionReader : public FitsColumnReader {
576 public:
577  static std::string guessUnits(std::string const &oldUnits) {
578  static std::regex const regex("(.*)(\\^(\\d+))?");
579  std::smatch m;
580  if (!std::regex_match(oldUnits, m, regex)) {
581  int oldPower = std::stoi(m[2]);
582  int newPower = std::sqrt(oldPower);
583  return std::to_string(newPower);
584  }
585  return oldUnits;
586  }
587 
588  static std::unique_ptr<FitsColumnReader> make(Schema &schema, FitsSchemaItem const &item,
589  std::vector<std::string> const &names) {
590  return std::unique_ptr<FitsColumnReader>(new CovarianceConversionReader(schema, item, names));
591  }
592 
593  CovarianceConversionReader(Schema &schema, FitsSchemaItem const &item,
594  std::vector<std::string> const &names)
595  : _column(item.column),
596  _size(names.size()),
597  _key(CovarianceMatrixKey<T, N>::addFields(schema, item.ttype, names, guessUnits(item.tunit))),
598  _buffer(new T[detail::computeCovariancePackedSize(names.size())]) {}
599 
600  void readCell(BaseRecord &record, std::size_t row, afw::fits::Fits &fits,
601  std::shared_ptr<InputArchive> const &archive) const override {
602  fits.readTableArray(row, _column, detail::computeCovariancePackedSize(_size), _buffer.get());
603  for (int i = 0; i < _size; ++i) {
604  for (int j = i; j < _size; ++j) {
605  _key.setElement(record, i, j, _buffer[detail::indexCovariance(i, j)]);
606  }
607  }
608  }
609 
610 private:
611  int _column;
612  int _size;
613  CovarianceMatrixKey<T, N> _key;
614  std::unique_ptr<T[]> _buffer;
615 };
616 
617 std::unique_ptr<FitsColumnReader> makeColumnReader(Schema &schema, FitsSchemaItem const &item) {
618  // Regex to unpack a FITS TFORM value. The first number is the size of the array (1 if not present),
619  // followed by an alpha code indicating the type (preceded by P or Q for variable size array).
620  // The last number is ignored.
621  static std::regex const regex("(\\d+)?([PQ])?([A-Z])\\(?(\\d)*\\)?");
622  // start by parsing the format; this tells the element type of the field and the number of elements
623  std::smatch m;
624  if (!std::regex_match(item.tform, m, regex)) {
626  }
627  int size = 1;
628  if (m[1].matched) {
629  size = std::stoi(m[1].str());
630  }
631  char code = m[3].str()[0];
632  if (m[2].matched) {
633  // P or Q presence indicates a variable-length array, which we can get by just setting the
634  // size to zero and letting the rest of the logic run its course.
635  size = 0;
636  }
637  // switch code over FITS codes that correspond to different element types
638  switch (code) {
639  case 'B': // 8-bit unsigned integers -- can only be scalars or Arrays
640  if (size == 1) {
641  if (item.tccls == "Array") {
642  return StandardReader<Array<std::uint8_t>>::make(schema, item, size);
643  }
644  return StandardReader<std::uint8_t>::make(schema, item);
645  }
646  if (size == 0) {
647  return VariableLengthArrayReader<std::uint8_t>::make(schema, item);
648  }
649  return StandardReader<Array<std::uint8_t>>::make(schema, item, size);
650 
651  case 'I': // 16-bit integers - can only be scalars or Arrays (we assume they're unsigned, since
652  // that's all we ever write, and CFITSIO will complain later if they aren't)
653  if (size == 1) {
654  if (item.tccls == "Array") {
655  return StandardReader<Array<std::uint16_t>>::make(schema, item, size);
656  }
657  return StandardReader<std::uint16_t>::make(schema, item);
658  }
659  if (size == 0) {
660  return VariableLengthArrayReader<std::uint16_t>::make(schema, item);
661  }
662  return StandardReader<Array<std::uint16_t>>::make(schema, item, size);
663  case 'J': // 32-bit integers - can only be scalars, Point fields, or Arrays
664  if (size == 0) {
665  return VariableLengthArrayReader<std::int32_t>::make(schema, item);
666  }
667  if (item.tccls == "Point") {
668  return PointConversionReader<std::int32_t>::make(schema, item);
669  }
670  if (size > 1 || item.tccls == "Array") {
671  return StandardReader<Array<std::int32_t>>::make(schema, item, size);
672  }
673  return StandardReader<std::int32_t>::make(schema, item);
674  case 'K': // 64-bit integers - can only be scalars.
675  if (size == 1) {
676  return StandardReader<std::int64_t>::make(schema, item);
677  }
678  case 'E': // floats
679  if (size == 0) {
680  return VariableLengthArrayReader<float>::make(schema, item);
681  }
682  if (size == 1) {
683  if (item.tccls == "Array") {
684  return StandardReader<Array<float>>::make(schema, item, 1);
685  }
686  // Just use scalars for Covariances of size 1, since that results in more
687  // natural field names (essentially never happens anyway).
688  return StandardReader<float>::make(schema, item);
689  }
690  if (size == 3 && item.tccls == "Covariance(Point)") {
691  std::vector<std::string> names = {"x", "y"};
692  return CovarianceConversionReader<float, 2>::make(schema, item, names);
693  }
694  if (size == 6 && item.tccls == "Covariance(Moments)") {
695  std::vector<std::string> names = {"xx", "yy", "xy"};
696  return CovarianceConversionReader<float, 3>::make(schema, item, names);
697  }
698  if (item.tccls == "Covariance") {
699  double v = 0.5 * (std::sqrt(1 + 8 * size) - 1);
700  std::size_t n = std::lround(v);
701  if (n * (n + 1) != size * 2) {
702  throw LSST_EXCEPT(afw::fits::FitsError, "Covariance field has invalid size.");
703  }
704  std::vector<std::string> names(n);
705  for (std::size_t i = 0; i < n; ++i) {
706  names[i] = std::to_string(i);
707  }
708  return CovarianceConversionReader<float, Eigen::Dynamic>::make(schema, item, names);
709  }
710  return StandardReader<Array<float>>::make(schema, item, size);
711  case 'D': // doubles
712  if (size == 0) {
713  return VariableLengthArrayReader<double>::make(schema, item);
714  }
715  if (size == 1) {
716  if (item.tccls == "Angle") {
717  return AngleReader::make(schema, item);
718  }
719  if (item.tccls == "Array") {
720  return StandardReader<Array<double>>::make(schema, item, 1);
721  }
722  return StandardReader<double>::make(schema, item);
723  }
724  if (size == 2) {
725  if (item.tccls == "Point") {
726  return PointConversionReader<double>::make(schema, item);
727  }
728  if (item.tccls == "Coord") {
729  return CoordConversionReader::make(schema, item);
730  }
731  }
732  if (size == 3 && item.tccls == "Moments") {
733  return MomentsConversionReader::make(schema, item);
734  }
735  return StandardReader<Array<double>>::make(schema, item, size);
736  case 'A': // strings
737  // StringReader can read both fixed-length and variable-length (size=0) strings
738  return StringReader::make(schema, item, size);
739  default:
741  }
742 }
743 
744 bool endswith(std::string const &s, std::string const &suffix) {
745  return s.size() >= suffix.size() && s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
746 }
747 
748 bool isInstFlux(FitsSchemaItem const & item) {
749  // helper lambda to make reading the real logic easier
750  auto includes = [](std::string const & s, char const * target) {
751  return s.find(target) != std::string::npos;
752  };
753  if (!includes(item.ttype, "flux")) return false;
754  if (includes(item.ttype, "modelfit_CModel") && item.tunit.empty()) {
755  // CModel flux fields were written with no units prior to DM-16068,
756  // but should have been "count".
757  return true;
758  }
759  // transform units to lowercase.
760  std::string units(item.tunit);
761  std::transform(units.begin(), units.end(), units.begin(), [](char c) { return std::tolower(c); } );
762  return includes(units, "count") || includes(units, "dn") || includes (units, "adu");
763 }
764 
765 // Replace 'from' with 'to' in 'full', returning the result.
766 std::string replace(std::string full, std::string const & from, std::string const & to) {
767  return full.replace(full.find(from), from.size(), to);
768 }
769 
770 } // namespace
771 
773  if (_impl->version == 0) {
774  AliasMap &aliases = *_impl->schema.getAliasMap();
775  for (auto iter = _impl->asList().begin(); iter != _impl->asList().end(); ++iter) {
776  std::size_t flagPos = iter->ttype.find("flags");
777  if (flagPos != std::string::npos) {
778  // We want to create aliases that resolve "(.*)_flag" to "$1_flags"; old schemas will have
779  // the latter, but new conventions (including slots) expect the former.
780  // But we can't do that, because adding that alias directly results in a cycle in the
781  // aliases (since aliases do partial matches, and keep trying until there are no matches,
782  // we'd have "(.*)_flag" resolve to "$1_flagssssssssssssss...").
783  // Instead, we *rename* from "flags" to "flag", then create the reverse alias.
784  std::string ttype = iter->ttype;
785  std::string prefix = iter->ttype.substr(0, flagPos);
786  ttype.replace(flagPos, 5, "flag");
787  _impl->asList().modify(iter, Impl::SetTTYPE(ttype));
788  // Note that we're not aliasing the full field, just the first part - if we have multiple
789  // flag fields, one alias should be sufficient for all of them (because of partial matching).
790  // Of course, we'll try to recreate that alias every time we handle another flag field with
791  // the same prefix, but AliasMap know hows to handle that no-op set.
792  aliases.set(prefix + "flags", prefix + "flag");
793  } else if (isInstFlux(*iter)) {
794  // Create an alias that resolves "X_instFlux" to "X" or "X_instFluxErr" to "X_err".
795  if (endswith(iter->ttype, "_err")) {
796  aliases.set(replace(iter->ttype, "_err", "_instFluxErr"), iter->ttype);
797  } else {
798  aliases.set(iter->ttype + "_instFlux", iter->ttype);
799  }
800  } else if (endswith(iter->ttype, "_err")) {
801  // Create aliases that resolve "(.*)_(.*)Err" and "(.*)_(.*)_(.*)_Cov" to
802  // "$1_err_$2Err" and "$1_err_$2_$3_Cov", to make centroid and shape uncertainties
803  // available under the new conventions. We don't have to create aliases for the
804  // centroid and shape values themselves, as those will automatically be correct
805  // after the PointConversionReader and MomentsConversionReader do their work.
806  if (iter->tccls == "Covariance(Point)") {
807  aliases.set(replace(iter->ttype, "_err", "_yErr"), iter->ttype + "_yErr");
808  aliases.set(replace(iter->ttype, "_err", "_xErr"), iter->ttype + "_xErr");
809  aliases.set(replace(iter->ttype, "_err", "_x_y_Cov"), iter->ttype + "_x_y_Cov");
810  } else if (iter->tccls == "Covariance(Moments)") {
811  aliases.set(replace(iter->ttype, "_err", "_xxErr"), iter->ttype + "_xxErr");
812  aliases.set(replace(iter->ttype, "_err", "_yyErr"), iter->ttype + "_yyErr");
813  aliases.set(replace(iter->ttype, "_err", "_xyErr"), iter->ttype + "_xyErr");
814  aliases.set(replace(iter->ttype, "_err", "_xx_yy_Cov"), iter->ttype + "_xx_yy_Cov");
815  aliases.set(replace(iter->ttype, "_err", "_xx_xy_Cov"), iter->ttype + "_xx_xy_Cov");
816  aliases.set(replace(iter->ttype, "_err", "_yy_xy_Cov"), iter->ttype + "_yy_xy_Cov");
817  }
818  }
819  }
820  } else if (_impl->version < 3) {
821  // Version == 1 tables use Sigma when we should use Err (see RFC-333) and had no fields
822  // that should have been named Sigma. So provide aliases xErr -> xSigma.
823  // Version <= 2 tables used _flux when we should use _instFlux (see RFC-322).
824  AliasMap &aliases = *_impl->schema.getAliasMap();
825  for (auto iter = _impl->asList().begin(); iter != _impl->asList().end(); ++iter) {
826  std::string name = iter->ttype;
827  if (_impl->version < 2 && endswith(name, "Sigma")) {
828  name = replace(std::move(name), "Sigma", "Err");
829  }
830  if (_impl->version < 3 && isInstFlux(*iter)) {
831  name = replace(std::move(name), "flux", "instFlux");
832  }
833  if (name != iter->ttype) {
834  aliases.set(name, iter->ttype);
835  }
836  }
837  }
838  for (auto iter = _impl->asList().begin(); iter != _impl->asList().end(); ++iter) {
839  if (iter->bit < 0) { // not a Flag column
840  std::unique_ptr<FitsColumnReader> reader = makeColumnReader(_impl->schema, *iter);
841  if (reader) {
842  _impl->readers.push_back(std::move(reader));
843  } else {
844  LOGLS_WARN("afw.FitsSchemaInputMapper", "Format " << iter->tform << " for column "
845  << iter->ttype
846  << " not supported; skipping.");
847  }
848  } else { // is a Flag column
849  if (static_cast<std::size_t>(iter->bit) >= _impl->flagKeys.size()) {
851  (boost::format("Flag field '%s' is is in bit %d (0-indexed) of only %d") %
852  iter->ttype % iter->bit % _impl->flagKeys.size())
853  .str());
854  }
855  _impl->flagKeys[iter->bit] = _impl->schema.addField<Flag>(iter->ttype, iter->doc);
856  }
857  }
858  _impl->asList().clear();
859  if (_impl->schema.getRecordSize() <= 0) {
860  throw LSST_EXCEPT(
862  (boost::format("Non-positive record size: %d; file is corrupt or invalid.") %
863  _impl->schema.getRecordSize()).str()
864  );
865  }
866  _impl->nRowsToPrep = std::max(PREPPED_ROWS_FACTOR / _impl->schema.getRecordSize(), std::size_t(1));
867  return _impl->schema;
868 }
869 
871  if (!_impl->flagKeys.empty()) {
872  fits.readTableArray<bool>(row, _impl->flagColumn, _impl->flagKeys.size(), _impl->flagWorkspace.get());
873  for (std::size_t bit = 0; bit < _impl->flagKeys.size(); ++bit) {
874  record.set(_impl->flagKeys[bit], _impl->flagWorkspace[bit]);
875  }
876  }
877  if (_impl->nRowsToPrep != 1 && row % _impl->nRowsToPrep == 0) {
878  // Give readers a chance to read and cache up to nRowsToPrep rows-
879  // worth of values.
880  std::size_t size = std::min(_impl->nRowsToPrep, fits.countRows() - row);
881  for (auto const &reader : _impl->readers) {
882  reader->prepRead(row, size, fits);
883  }
884  }
885  for (auto const & reader : _impl->readers) {
886  reader->readCell(record, row, fits, _impl->archive);
887  }
888 }
889 } // namespace io
890 } // namespace table
891 } // namespace afw
892 } // namespace lsst
table::Key< std::string > name
Definition: Amplifier.cc:116
Key< Flag > const & target
table::Key< int > type
Definition: Detector.cc:163
#define LSST_EXCEPT(type,...)
Fits * fits
Definition: FitsWriter.cc:90
int nFlags
Definition: FitsWriter.cc:91
#define LOGLS_WARN(logger, message)
std::string prefix
Definition: SchemaMapper.cc:72
int m
Definition: SpanSet.cc:49
table::Key< int > from
table::Key< int > to
An exception thrown when problems are found when reading or writing FITS files.
Definition: fits.h:36
A simple struct that combines the two arguments that must be passed to most cfitsio routines and cont...
Definition: fits.h:297
Mapping class that holds aliases for a Schema.
Definition: AliasMap.h:36
void set(std::string const &alias, std::string const &target)
Add an alias to the schema or replace an existing one.
Definition: AliasMap.cc:82
Base class for all records.
Definition: BaseRecord.h:31
void set(Key< T > const &key, U const &value)
Set value of a field for the given key.
Definition: BaseRecord.h:164
Defines the fields and offsets for a table.
Definition: Schema.h:51
static int const VERSION
Definition: Schema.h:57
Polymorphic reader interface used to read different kinds of objects from one or more FITS binary tab...
boost::multi_index_container< FitsSchemaItem, boost::multi_index::indexed_by< boost::multi_index::ordered_non_unique< boost::multi_index::member< FitsSchemaItem, int, &FitsSchemaItem::column > >, boost::multi_index::ordered_non_unique< boost::multi_index::member< FitsSchemaItem, int, &FitsSchemaItem::bit > >, boost::multi_index::hashed_unique< boost::multi_index::member< FitsSchemaItem, std::string, &FitsSchemaItem::ttype > >, boost::multi_index::sequenced<> >> InputContainer
std::vector< std::unique_ptr< FitsColumnReader > > readers
SetFitsSchemaString<&FitsSchemaItem::tccls > SetTCCLS
SetFitsSchemaString<&FitsSchemaItem::ttype > SetTTYPE
SetFitsSchemaString<&FitsSchemaItem::doc > SetDoc
SetFitsSchemaString<&FitsSchemaItem::tform > SetTFORM
SetFitsSchemaString<&FitsSchemaItem::tunit > SetTUNIT
A class that describes a mapping from a FITS binary table to an afw::table Schema.
FitsSchemaInputMapper & operator=(FitsSchemaInputMapper const &)
static std::size_t PREPPED_ROWS_FACTOR
When processing each column, divide this number by the record size (in bytes) and ask CFITSIO to read...
bool readArchive(afw::fits::Fits &fits)
Set the Archive by reading from the HDU specified by the AR_HDU header entry.
Schema finalize()
Map any remaining items into regular Schema items, and return the final Schema.
void customize(std::unique_ptr< FitsColumnReader > reader)
Customize a mapping by providing a FitsColumnReader instance that will be invoked by readRecords().
void readRecord(BaseRecord &record, afw::fits::Fits &fits, std::size_t row)
Fill a record from a FITS binary table row.
FitsSchemaInputMapper(daf::base::PropertyList &metadata, bool stripMetadata)
Construct a mapper from a PropertyList of FITS header values, stripping recognized keys if desired.
void erase(Item const *item)
Remove the given item (which should have been retrieved via find()) from the mapping,...
bool hasArchive() const
Return true if the mapper has an InputArchive.
Item const * find(std::string const &ttype) const
Find an item with the given column name (ttype), returning nullptr if no such column exists.
void setArchive(std::shared_ptr< InputArchive > archive)
Set the Archive to an externally-provided one, overriding any that may have been read.
A multi-catalog archive object used to load table::io::Persistable objects.
Definition: InputArchive.h:31
static InputArchive readFits(fits::Fits &fitsfile)
Read an object from an already open FITS object.
T get(std::string const &name) const
std::string const & getComment(std::string const &name) const
std::vector< T > getArray(std::string const &name) const
std::vector< std::string > getOrderedNames() const
virtual void remove(std::string const &name)
bool exists(std::string const &name) const
T compare(T... args)
T copy_n(T... args)
T data(T... args)
T find(T... args)
T includes(T... args)
T make_pair(T... args)
T max(T... args)
T min(T... args)
T move(T... args)
def iter(self)
std::size_t computeCovariancePackedSize(std::size_t size)
Defines the packed size of a covariance matrices.
Definition: FieldBase.h:35
std::size_t indexCovariance(std::size_t i, std::size_t j)
Defines the ordering of packed covariance matrices.
Definition: FieldBase.h:32
void erase(int column)
lsst::geom::Angle Angle
Definition: misc.h:34
CoordinateType
Enum used to set units for geometric FunctorKeys.
Definition: aggregates.h:277
constexpr AngleUnit radians
A base class for image defects.
STL namespace.
T regex_match(T... args)
T replace(T... args)
T lround(T... args)
T size(T... args)
T sqrt(T... args)
T stoi(T... args)
Field base class default implementation (used for numeric scalars and lsst::geom::Angle).
Definition: FieldBase.h:43
A structure that describes a field as a collection of related strings read from the FITS header.
T to_string(T... args)
T transform(T... args)
table::Schema schema
Definition: python.h:134