16DIA_DTYPES = [int, float, float, float, float, str, float, float]
128 Compute solar system object properties by joining and processing
129 SSSource, DiaSource, and MPC orbit data.
131 This function takes a pre-grouped SSSource table, joins it with
132 DiaSource data, computes per-object quantities, and calculates
133 additional orbital parameters like Tisserand J and Minimum Orbit
134 Intersection Distance (MOID) with Earth for matching objects.
138 sss : pandas.DataFrame
139 SSSource table, pre-grouped by 'ssObjectId'. Must be sorted by
140 'ssObjectId' for correct grouping. Contains columns like
141 'ssObjectId', 'diaSourceId', etc.
142 dia : pandas.DataFrame
143 DiaSource table with columns prefixed as 'dia_' in the join.
144 Must include 'dia_diaSourceId', 'dia_psfFlux', 'dia_psfFluxErr',
146 mpcorb : pandas.DataFrame
147 MPC orbit data with columns like
148 'unpacked_primary_provisional_designation', 'q', 'e', 'i',
154 Array of ssObject records with dtype schema.ssObjectDtype,
155 containing computed properties for each unique ssObjectId,
156 including magnitudes, orbital elements, Tisserand J, and
162 If 'sss' is not pre-grouped by 'ssObjectId', or if DiaSources
163 are missing after join.
167 - The function assumes 'sss' is large and avoids internal
168 sorting/copying for efficiency.
169 - Tisserand J and MOID are computed only for objects matching
170 designations in 'mpcorb'.
171 - MOID computation uses a MOIDSolver for each matched object.
175 assert util.values_grouped(sss[
"ssObjectId"]), (
176 "SSSource table must be pre-grouped by ssObjectId. "
177 "An easy way to do this is to sort by ssObjectId before calling compute_ssobject(). "
178 "The grouping is required for correct per-object computations, and since SSSource is "
179 "typically large and we want to avoid copies, it's not done internally."
184 dia_tmp = dia[DIA_COLUMNS].add_prefix(
"dia_")
190 dia_tmp[
"dia_diaSourceId"] = dia_tmp[
"dia_diaSourceId"].astype(
"int64[pyarrow]")
191 sss = sss.merge(dia_tmp, left_on=
"diaSourceId", right_on=
"dia_diaSourceId", how=
"inner")
192 assert num == len(sss), f
"{num - len(sss)} DiaSources found missing."
193 del sss[
"dia_diaSourceId"]
197 sss[
"dia_psfMag"] =
nJy_to_mag(sss[
"dia_psfFlux"])
201 totalNumObjects = np.unique(sss[
"ssObjectId"]).size
202 obj = np.zeros(totalNumObjects, dtype=schema.SSObjectDtype)
205 util.group_by([sss],
"ssObjectId", compute_ssobject_entry, out=obj)
212 if mpcorb
is not None:
217 oidx, midx = util.argjoin(
218 obj[
"designation"].astype(
"U"),
219 mpcorb[
"unpacked_primary_provisional_designation"].to_numpy().astype(
"U"),
222 mpcorb[
"unpacked_primary_provisional_designation"].take(midx)
223 == obj[
"designation"][oidx].astype(
"U")
225 q, e, i, node, argperi = util.unpack(mpcorb[
"q e i node argperi".split()].take(midx))
227 obj[
"tisserand_J"][oidx] = util.tisserand_jupiter(a, e, i)
231 for i, el_obj
in enumerate(zip(a, e, i, node, argperi)):
232 (moid, deltaV, eclon, trueEarth, trueObject) = solver.compute(earth_orbit_J2000(), el_obj)
234 row[
"MOIDEarth"] = moid
235 row[
"MOIDEarthDeltaV"] = deltaV
236 row[
"MOIDEarthEclipticLongitude"] = eclon
237 row[
"MOIDEarthTrueAnomaly"] = trueEarth
238 row[
"MOIDEarthTrueAnomalyObject"] = trueObject
245 CLI entry point for building SSObject table from SSSource,
246 DiaSource, and MPC orbit data.
248 parser = argparse.ArgumentParser(
249 description=
"Build SSObject table from SSSource, DiaSource, and MPC orbit Parquet files",
250 formatter_class=argparse.RawDescriptionHelpFormatter,
253 ssp-build-ssobject sssource.parquet dia_sources.parquet mpc_orbits.parquet --output ssobject.parquet
257 parser.add_argument(
"sssource_parquet", help=
"Path to SSSource Parquet file")
258 parser.add_argument(
"diasource_parquet", help=
"Path to DiaSource Parquet file")
259 parser.add_argument(
"mpcorb_parquet", help=
"Path to MPC orbits Parquet file")
260 parser.add_argument(
"--output",
"-o", required=
True, help=
"Path to output SSObject Parquet file")
264 help=
"Re-raise exceptions instead of exiting gracefully (for debugging)",
267 args = parser.parse_args()
271 print(f
"Loading SSSource from {args.sssource_parquet}...")
272 sss = pd.read_parquet(args.sssource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow").reset_index(
276 print(f
"Loaded {num:,} SSSource rows")
289 print(f
"Loading DiaSource from {args.diasource_parquet}...")
290 dia = pd.read_parquet(
291 args.diasource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=dia_columns
292 ).reset_index(drop=
True)
293 print(f
"Loaded {len(dia):,} DiaSource rows")
296 assert np.all(dia[
"diaSourceId"] >= 0)
297 dia[
"diaSourceId"] = dia[
"diaSourceId"].astype(
"uint64[pyarrow]")
301 "unpacked_primary_provisional_designation",
314 print(f
"Loading MPC orbits from {args.mpcorb_parquet}...")
315 mpcorb = pd.read_parquet(
316 args.mpcorb_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=mpcorb_columns
317 ).reset_index(drop=
True)
318 print(f
"Loaded {len(mpcorb):,} MPC orbit rows")
321 print(
"Computing SSObject data...")
325 print(f
"Saving {len(obj):,} SSObject rows to {args.output}...")
326 util.struct_to_parquet(obj, args.output)
328 print(f
"Success! Created SSObject with {len(obj):,} objects")
329 print(f
"Row size: {obj.dtype.itemsize:,} bytes, Total size: {obj.nbytes:,} bytes")
331 except Exception
as e:
332 print(f
"Error: {e}", file=sys.stderr)