18DIA_DTYPES = [int, float, float, float, float, str, float, float]
58 row, sss, fixedG12=None, magSigmaFloor=0.0, nSigmaClip=None,
61 assert sss[
"ssObjectId"].nunique() == 1
64 row[
"ssObjectId"] = sss[
"ssObjectId"].iloc[0]
65 row[
"firstObservationMjdTai"] = sss[
"dia_midpointMjdTai"].min()
67 if "discoverySubmissionDate" in row.dtype.names:
70 row[
"discoverySubmissionDate"] = row[
"firstObservationMjdTai"] + 7.0
71 row[
"arc"] = np.ptp(sss[
"dia_midpointMjdTai"])
72 row[
"designation"] = sss[
"designation"].iloc[0]
75 row[
"nObs"] = len(sss)
79 df = sss[sss[
"dia_band"] == band]
82 row[f
"{band}_Chi2"] = np.nan
83 row[f
"{band}_G12"] = np.nan
84 row[f
"{band}_G12Err"] = np.nan
85 row[f
"{band}_H"] = np.nan
86 row[f
"{band}_H_{band}_G12_Cov"] = np.nan
87 row[f
"{band}_HErr"] = np.nan
88 row[f
"{band}_nObsUsed"] = 0
89 row[f
"{band}_phaseAngleMin"] = np.nan
90 row[f
"{band}_phaseAngleMax"] = np.nan
93 row[f
"{band}_nObs"] = nBandObs
95 paMin, paMax = df[
"phaseAngle"].min(), df[
"phaseAngle"].max()
96 row[f
"{band}_phaseAngleMin"] = paMin
97 row[f
"{band}_phaseAngleMax"] = paMax
102 H, G12, sigmaH, sigmaG12, covHG12, chi2dof, nobsv = photfit.fitHG12(
103 df[
"dia_psfMag"], df[
"dia_psfMagErr"],
104 df[
"phaseAngle"], df[
"topoRange"], df[
"helioRange"],
105 fixedG12=fixedG12, magSigmaFloor=magSigmaFloor,
106 nSigmaClip=nSigmaClip,
108 nDof = nBandObs - (1
if fixedG12
is not None else 2)
114 row[f
"{band}_slope_fit_failed"] =
True
119 row[f
"{band}_Chi2"] = chi2dof * nDof
120 row[f
"{band}_G12"] = G12
121 row[f
"{band}_G12Err"] = sigmaG12
123 row[f
"{band}_H_{band}_G12_Cov"] = covHG12
124 row[f
"{band}_HErr"] = sigmaH
125 row[f
"{band}_nObsUsed"] = nobsv
128 row[
"extendednessMin"] = sss[
"dia_extendedness"].min()
129 row[
"extendednessMax"] = sss[
"dia_extendedness"].max()
130 row[
"extendednessMedian"] = sss[
"dia_extendedness"].median()
134 sss, dia, mpcorb, fixedG12=None, magSigmaFloor=0.0,
138 Compute solar system object properties by joining and processing
139 SSSource, DiaSource, and MPC orbit data.
141 This function takes a pre-grouped SSSource table, joins it with
142 DiaSource data, computes per-object quantities, and calculates
143 additional orbital parameters like Tisserand J and Minimum Orbit
144 Intersection Distance (MOID) with Earth for matching objects.
148 sss : pandas.DataFrame
149 SSSource table, pre-grouped by 'ssObjectId'. Must be sorted by
150 'ssObjectId' for correct grouping. Contains columns like
151 'ssObjectId', 'diaSourceId', etc.
152 dia : pandas.DataFrame
153 DiaSource table with columns prefixed as 'dia_' in the join.
154 Must include 'dia_diaSourceId', 'dia_psfFlux', 'dia_psfFluxErr',
156 mpcorb : pandas.DataFrame
157 MPC orbit data with columns like
158 'unpacked_primary_provisional_designation', 'q', 'e', 'i',
164 Array of ssObject records with dtype schema.ssObjectDtype,
165 containing computed properties for each unique ssObjectId,
166 including magnitudes, orbital elements, Tisserand J, and
172 If 'sss' is not pre-grouped by 'ssObjectId', or if DiaSources
173 are missing after join.
177 - The function assumes 'sss' is large and avoids internal
178 sorting/copying for efficiency.
179 - Tisserand J and MOID are computed only for objects matching
180 designations in 'mpcorb'.
181 - MOID computation uses a MOIDSolver for each matched object.
185 assert util.values_grouped(sss[
"ssObjectId"]), (
186 "SSSource table must be pre-grouped by ssObjectId. "
187 "An easy way to do this is to sort by ssObjectId before calling compute_ssobject(). "
188 "The grouping is required for correct per-object computations, and since SSSource is "
189 "typically large and we want to avoid copies, it's not done internally."
194 dia_tmp = dia[DIA_COLUMNS].add_prefix(
"dia_")
200 dia_tmp[
"dia_diaSourceId"] = dia_tmp[
"dia_diaSourceId"].astype(
"int64[pyarrow]")
201 sss = sss.merge(dia_tmp, left_on=
"diaSourceId", right_on=
"dia_diaSourceId", how=
"inner")
202 assert num == len(sss), f
"{num - len(sss)} DiaSources found missing."
203 del sss[
"dia_diaSourceId"]
207 sss[
"dia_psfMag"] =
nJy_to_mag(sss[
"dia_psfFlux"])
211 totalNumObjects = np.unique(sss[
"ssObjectId"]).size
212 obj = np.zeros(totalNumObjects, dtype=schema.SSObjectDtype)
216 compute_ssobject_entry, fixedG12=fixedG12,
217 magSigmaFloor=magSigmaFloor, nSigmaClip=nSigmaClip,
219 util.group_by([sss],
"ssObjectId", callback, out=obj)
226 if mpcorb
is not None:
231 oidx, midx = util.argjoin(
232 obj[
"designation"].astype(
"U"),
233 mpcorb[
"unpacked_primary_provisional_designation"].to_numpy().astype(
"U"),
236 mpcorb[
"unpacked_primary_provisional_designation"].take(midx)
237 == obj[
"designation"][oidx].astype(
"U")
239 q, e, i, node, argperi, epoch_mjd = util.unpack(
240 mpcorb[
"q e i node argperi epoch_mjd".split()].take(midx)
243 obj[
"tisserand_J"][oidx] = util.tisserand_jupiter(a, e, i)
247 for i, el_obj
in enumerate(zip(a, e, i, node, argperi)):
248 earth = earth_orbit(epoch_mjd[i])
249 (moid, deltaV, eclon, trueEarth, trueObject) = solver.compute(earth, el_obj)
251 row[
"MOIDEarth"] = moid
252 row[
"MOIDEarthDeltaV"] = deltaV
253 row[
"MOIDEarthEclipticLongitude"] = eclon
254 row[
"MOIDEarthTrueAnomaly"] = trueEarth
255 row[
"MOIDEarthTrueAnomalyObject"] = trueObject
262 CLI entry point for building SSObject table from SSSource,
263 DiaSource, and MPC orbit data.
265 parser = argparse.ArgumentParser(
266 description=
"Build SSObject table from SSSource, DiaSource, and MPC orbit Parquet files",
267 formatter_class=argparse.RawDescriptionHelpFormatter,
270 ssp-build-ssobject sssource.parquet dia_sources.parquet mpc_orbits.parquet --output ssobject.parquet
274 parser.add_argument(
"sssource_parquet", help=
"Path to SSSource Parquet file")
275 parser.add_argument(
"diasource_parquet", help=
"Path to DiaSource Parquet file")
276 parser.add_argument(
"mpcorb_parquet", help=
"Path to MPC orbits Parquet file")
277 parser.add_argument(
"--output",
"-o", required=
True, help=
"Path to output SSObject Parquet file")
281 help=
"Re-raise exceptions instead of exiting gracefully (for debugging)",
284 args = parser.parse_args()
288 print(f
"Loading SSSource from {args.sssource_parquet}...")
289 sss = pd.read_parquet(args.sssource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow").reset_index(
293 print(f
"Loaded {num:,} SSSource rows")
306 print(f
"Loading DiaSource from {args.diasource_parquet}...")
307 dia = pd.read_parquet(
308 args.diasource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=dia_columns
309 ).reset_index(drop=
True)
310 print(f
"Loaded {len(dia):,} DiaSource rows")
313 assert np.all(dia[
"diaSourceId"] >= 0)
314 dia[
"diaSourceId"] = dia[
"diaSourceId"].astype(
"uint64[pyarrow]")
318 "unpacked_primary_provisional_designation",
331 print(f
"Loading MPC orbits from {args.mpcorb_parquet}...")
332 mpcorb = pd.read_parquet(
333 args.mpcorb_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=mpcorb_columns
334 ).reset_index(drop=
True)
335 print(f
"Loaded {len(mpcorb):,} MPC orbit rows")
338 print(
"Computing SSObject data...")
342 print(f
"Saving {len(obj):,} SSObject rows to {args.output}...")
343 util.struct_to_parquet(obj, args.output)
345 print(f
"Success! Created SSObject with {len(obj):,} objects")
346 print(f
"Row size: {obj.dtype.itemsize:,} bytes, Total size: {obj.nbytes:,} bytes")
348 except Exception
as e:
349 print(f
"Error: {e}", file=sys.stderr)