17DIA_DTYPES = [int, float, float, float, float, str, float, float]
129 Compute solar system object properties by joining and processing
130 SSSource, DiaSource, and MPC orbit data.
132 This function takes a pre-grouped SSSource table, joins it with
133 DiaSource data, computes per-object quantities, and calculates
134 additional orbital parameters like Tisserand J and Minimum Orbit
135 Intersection Distance (MOID) with Earth for matching objects.
139 sss : pandas.DataFrame
140 SSSource table, pre-grouped by 'ssObjectId'. Must be sorted by
141 'ssObjectId' for correct grouping. Contains columns like
142 'ssObjectId', 'diaSourceId', etc.
143 dia : pandas.DataFrame
144 DiaSource table with columns prefixed as 'dia_' in the join.
145 Must include 'dia_diaSourceId', 'dia_psfFlux', 'dia_psfFluxErr',
147 mpcorb : pandas.DataFrame
148 MPC orbit data with columns like
149 'unpacked_primary_provisional_designation', 'q', 'e', 'i',
155 Array of ssObject records with dtype schema.ssObjectDtype,
156 containing computed properties for each unique ssObjectId,
157 including magnitudes, orbital elements, Tisserand J, and
163 If 'sss' is not pre-grouped by 'ssObjectId', or if DiaSources
164 are missing after join.
168 - The function assumes 'sss' is large and avoids internal
169 sorting/copying for efficiency.
170 - Tisserand J and MOID are computed only for objects matching
171 designations in 'mpcorb'.
172 - MOID computation uses a MOIDSolver for each matched object.
176 assert util.values_grouped(sss[
"ssObjectId"]), (
177 "SSSource table must be pre-grouped by ssObjectId. "
178 "An easy way to do this is to sort by ssObjectId before calling compute_ssobject(). "
179 "The grouping is required for correct per-object computations, and since SSSource is "
180 "typically large and we want to avoid copies, it's not done internally."
185 dia_tmp = dia[DIA_COLUMNS].add_prefix(
"dia_")
191 dia_tmp[
"dia_diaSourceId"] = dia_tmp[
"dia_diaSourceId"].astype(
"int64[pyarrow]")
192 sss = sss.merge(dia_tmp, left_on=
"diaSourceId", right_on=
"dia_diaSourceId", how=
"inner")
193 assert num == len(sss), f
"{num - len(sss)} DiaSources found missing."
194 del sss[
"dia_diaSourceId"]
198 sss[
"dia_psfMag"] =
nJy_to_mag(sss[
"dia_psfFlux"])
202 totalNumObjects = np.unique(sss[
"ssObjectId"]).size
203 obj = np.zeros(totalNumObjects, dtype=schema.SSObjectDtype)
206 util.group_by([sss],
"ssObjectId", compute_ssobject_entry, out=obj)
213 if mpcorb
is not None:
218 oidx, midx = util.argjoin(
219 obj[
"designation"].astype(
"U"),
220 mpcorb[
"unpacked_primary_provisional_designation"].to_numpy().astype(
"U"),
223 mpcorb[
"unpacked_primary_provisional_designation"].take(midx)
224 == obj[
"designation"][oidx].astype(
"U")
226 q, e, i, node, argperi = util.unpack(mpcorb[
"q e i node argperi".split()].take(midx))
228 obj[
"tisserand_J"][oidx] = util.tisserand_jupiter(a, e, i)
232 for i, el_obj
in enumerate(zip(a, e, i, node, argperi)):
233 (moid, deltaV, eclon, trueEarth, trueObject) = solver.compute(earth_orbit_J2000(), el_obj)
235 row[
"MOIDEarth"] = moid
236 row[
"MOIDEarthDeltaV"] = deltaV
237 row[
"MOIDEarthEclipticLongitude"] = eclon
238 row[
"MOIDEarthTrueAnomaly"] = trueEarth
239 row[
"MOIDEarthTrueAnomalyObject"] = trueObject
246 CLI entry point for building SSObject table from SSSource,
247 DiaSource, and MPC orbit data.
249 parser = argparse.ArgumentParser(
250 description=
"Build SSObject table from SSSource, DiaSource, and MPC orbit Parquet files",
251 formatter_class=argparse.RawDescriptionHelpFormatter,
254 ssp-build-ssobject sssource.parquet dia_sources.parquet mpc_orbits.parquet --output ssobject.parquet
258 parser.add_argument(
"sssource_parquet", help=
"Path to SSSource Parquet file")
259 parser.add_argument(
"diasource_parquet", help=
"Path to DiaSource Parquet file")
260 parser.add_argument(
"mpcorb_parquet", help=
"Path to MPC orbits Parquet file")
261 parser.add_argument(
"--output",
"-o", required=
True, help=
"Path to output SSObject Parquet file")
265 help=
"Re-raise exceptions instead of exiting gracefully (for debugging)",
268 args = parser.parse_args()
272 print(f
"Loading SSSource from {args.sssource_parquet}...")
273 sss = pd.read_parquet(args.sssource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow").reset_index(
277 print(f
"Loaded {num:,} SSSource rows")
290 print(f
"Loading DiaSource from {args.diasource_parquet}...")
291 dia = pd.read_parquet(
292 args.diasource_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=dia_columns
293 ).reset_index(drop=
True)
294 print(f
"Loaded {len(dia):,} DiaSource rows")
297 assert np.all(dia[
"diaSourceId"] >= 0)
298 dia[
"diaSourceId"] = dia[
"diaSourceId"].astype(
"uint64[pyarrow]")
302 "unpacked_primary_provisional_designation",
315 print(f
"Loading MPC orbits from {args.mpcorb_parquet}...")
316 mpcorb = pd.read_parquet(
317 args.mpcorb_parquet, engine=
"pyarrow", dtype_backend=
"pyarrow", columns=mpcorb_columns
318 ).reset_index(drop=
True)
319 print(f
"Loaded {len(mpcorb):,} MPC orbit rows")
322 print(
"Computing SSObject data...")
326 print(f
"Saving {len(obj):,} SSObject rows to {args.output}...")
327 util.struct_to_parquet(obj, args.output)
329 print(f
"Success! Created SSObject with {len(obj):,} objects")
330 print(f
"Row size: {obj.dtype.itemsize:,} bytes, Total size: {obj.nbytes:,} bytes")
332 except Exception
as e:
333 print(f
"Error: {e}", file=sys.stderr)