17DIA_DTYPES = [int, float, float, float, float, str, float, float]
57 row, sss, fixedG12=None, magSigmaFloor=0.0, nSigmaClip=None,
60 assert sss[
"ssObjectId"].nunique() == 1
63 row[
"ssObjectId"] = sss[
"ssObjectId"].iloc[0]
64 row[
"firstObservationMjdTai"] = sss[
"dia_midpointMjdTai"].min()
66 if "discoverySubmissionDate" in row.dtype.names:
69 row[
"discoverySubmissionDate"] = row[
"firstObservationMjdTai"] + 7.0
70 row[
"arc"] = np.ptp(sss[
"dia_midpointMjdTai"])
71 row[
"designation"] = sss[
"designation"].iloc[0]
74 row[
"nObs"] = len(sss)
78 df = sss[sss[
"dia_band"] == band]
81 row[f
"{band}_Chi2"] = np.nan
82 row[f
"{band}_G12"] = np.nan
83 row[f
"{band}_G12Err"] = np.nan
84 row[f
"{band}_H"] = np.nan
85 row[f
"{band}_H_{band}_G12_Cov"] = np.nan
86 row[f
"{band}_HErr"] = np.nan
87 row[f
"{band}_nObsUsed"] = 0
88 row[f
"{band}_phaseAngleMin"] = np.nan
89 row[f
"{band}_phaseAngleMax"] = np.nan
92 row[f
"{band}_nObs"] = nBandObs
94 paMin, paMax = df[
"phaseAngle"].min(), df[
"phaseAngle"].max()
95 row[f
"{band}_phaseAngleMin"] = paMin
96 row[f
"{band}_phaseAngleMax"] = paMax
101 H, G12, sigmaH, sigmaG12, covHG12, chi2dof, nobsv = photfit.fitHG12(
102 df[
"dia_psfMag"], df[
"dia_psfMagErr"],
103 df[
"phaseAngle"], df[
"topoRange"], df[
"helioRange"],
104 fixedG12=fixedG12, magSigmaFloor=magSigmaFloor,
105 nSigmaClip=nSigmaClip,
107 nDof = nBandObs - (1
if fixedG12
is not None else 2)
113 row[f
"{band}_slope_fit_failed"] =
True
118 row[f
"{band}_Chi2"] = chi2dof * nDof
119 row[f
"{band}_G12"] = G12
120 row[f
"{band}_G12Err"] = sigmaG12
122 row[f
"{band}_H_{band}_G12_Cov"] = covHG12
123 row[f
"{band}_HErr"] = sigmaH
124 row[f
"{band}_nObsUsed"] = nobsv
127 row[
"extendednessMin"] = sss[
"dia_extendedness"].min()
128 row[
"extendednessMax"] = sss[
"dia_extendedness"].max()
129 row[
"extendednessMedian"] = sss[
"dia_extendedness"].median()
133 sss, dia, mpcorb, fixedG12=None, magSigmaFloor=0.0,
137 Compute solar system object properties by joining and processing
138 SSSource, DiaSource, and MPC orbit data.
140 This function takes a pre-grouped SSSource table, joins it with
141 DiaSource data, computes per-object quantities, and calculates
142 additional orbital parameters like Tisserand J and Minimum Orbit
143 Intersection Distance (MOID) with Earth for matching objects.
147 sss : pandas.DataFrame
148 SSSource table, pre-grouped by 'ssObjectId'. Must be sorted by
149 'ssObjectId' for correct grouping. Contains columns like
150 'ssObjectId', 'diaSourceId', etc.
151 dia : pandas.DataFrame
152 DiaSource table with columns prefixed as 'dia_' in the join.
153 Must include 'dia_diaSourceId', 'dia_psfFlux', 'dia_psfFluxErr',
155 mpcorb : pandas.DataFrame
156 MPC orbit data with columns like
157 'unpacked_primary_provisional_designation', 'q', 'e', 'i',
163 Array of ssObject records with dtype schema.ssObjectDtype,
164 containing computed properties for each unique ssObjectId,
165 including magnitudes, orbital elements, Tisserand J, and
171 If 'sss' is not pre-grouped by 'ssObjectId', or if DiaSources
172 are missing after join.
176 - The function assumes 'sss' is large and avoids internal
177 sorting/copying for efficiency.
178 - Tisserand J and MOID are computed only for objects matching
179 designations in 'mpcorb'.
180 - MOID computation uses a MOIDSolver for each matched object.
184 assert util.values_grouped(sss[
"ssObjectId"]), (
185 "SSSource table must be pre-grouped by ssObjectId. "
186 "An easy way to do this is to sort by ssObjectId before calling compute_ssobject(). "
187 "The grouping is required for correct per-object computations, and since SSSource is "
188 "typically large and we want to avoid copies, it's not done internally."
193 dia_tmp = dia[DIA_COLUMNS].add_prefix(
"dia_")
199 dia_tmp[
"dia_diaSourceId"] = dia_tmp[
"dia_diaSourceId"].astype(
"int64[pyarrow]")
200 sss = sss.merge(dia_tmp, left_on=
"diaSourceId", right_on=
"dia_diaSourceId", how=
"inner")
201 assert num == len(sss), f
"{num - len(sss)} DiaSources found missing."
202 del sss[
"dia_diaSourceId"]
206 sss[
"dia_psfMag"] =
nJy_to_mag(sss[
"dia_psfFlux"])
210 totalNumObjects = np.unique(sss[
"ssObjectId"]).size
211 obj = np.zeros(totalNumObjects, dtype=schema.SSObjectDtype)
215 compute_ssobject_entry, fixedG12=fixedG12,
216 magSigmaFloor=magSigmaFloor, nSigmaClip=nSigmaClip,
218 util.group_by([sss],
"ssObjectId", callback, out=obj)
225 if mpcorb
is not None:
230 oidx, midx = util.argjoin(
231 obj[
"designation"].astype(
"U"),
232 mpcorb[
"unpacked_primary_provisional_designation"].to_numpy().astype(
"U"),
235 mpcorb[
"unpacked_primary_provisional_designation"].take(midx)
236 == obj[
"designation"][oidx].astype(
"U")
238 q, e, i, node, argperi, epoch_mjd = util.unpack(
239 mpcorb[
"q e i node argperi epoch_mjd".split()].take(midx)
242 obj[
"tisserand_J"][oidx] = util.tisserand_jupiter(a, e, i)
246 for i, el_obj
in enumerate(zip(a, e, i, node, argperi)):
247 earth = earth_orbit(epoch_mjd[i])
248 (moid, deltaV, eclon, trueEarth, trueObject) = solver.compute(earth, el_obj)
250 row[
"MOIDEarth"] = moid
251 row[
"MOIDEarthDeltaV"] = deltaV
252 row[
"MOIDEarthEclipticLongitude"] = eclon
253 row[
"MOIDEarthTrueAnomaly"] = trueEarth
254 row[
"MOIDEarthTrueAnomalyObject"] = trueObject
261 CLI entry point for building SSObject table from SSSource,
262 DiaSource, and MPC orbit data.
264 parser = argparse.ArgumentParser(
265 description=
"Build SSObject table from SSSource, DiaSource, and MPC orbit Parquet files",
266 formatter_class=argparse.RawDescriptionHelpFormatter,
269 ssp-build-ssobject sssource.parquet dia_sources.parquet mpc_orbits.parquet --output ssobject.parquet
273 parser.add_argument(
"sssource_parquet", help=
"Path to SSSource Parquet file")
274 parser.add_argument(
"diasource_parquet", help=
"Path to DiaSource Parquet file")
275 parser.add_argument(
"mpcorb_parquet", help=
"Path to MPC orbits Parquet file")
276 parser.add_argument(
"--output",
"-o", required=
True, help=
"Path to output SSObject Parquet file")
280 help=
"Re-raise exceptions instead of exiting gracefully (for debugging)",
283 args = parser.parse_args()
287 print(f
"Loading SSSource from {args.sssource_parquet}...")
288 sss = pd.read_parquet(args.sssource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow").reset_index(
292 print(f
"Loaded {num:,} SSSource rows")
305 print(f
"Loading DiaSource from {args.diasource_parquet}...")
306 dia = pd.read_parquet(
307 args.diasource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=dia_columns
308 ).reset_index(drop=
True)
309 print(f
"Loaded {len(dia):,} DiaSource rows")
312 assert np.all(dia[
"diaSourceId"] >= 0)
313 dia[
"diaSourceId"] = dia[
"diaSourceId"].astype(
"uint64[pyarrow]")
317 "unpacked_primary_provisional_designation",
330 print(f
"Loading MPC orbits from {args.mpcorb_parquet}...")
331 mpcorb = pd.read_parquet(
332 args.mpcorb_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=mpcorb_columns
333 ).reset_index(drop=
True)
334 print(f
"Loaded {len(mpcorb):,} MPC orbit rows")
337 print(
"Computing SSObject data...")
341 print(f
"Saving {len(obj):,} SSObject rows to {args.output}...")
342 util.struct_to_parquet(obj, args.output)
344 print(f
"Success! Created SSObject with {len(obj):,} objects")
345 print(f
"Row size: {obj.dtype.itemsize:,} bytes, Total size: {obj.nbytes:,} bytes")
347 except Exception
as e:
348 print(f
"Error: {e}", file=sys.stderr)