24 dia = dia[[
"ra",
"dec",
"midpointMjdTai"]].iloc[assoc[
"dia_index"].values]
27 obs = astropy.coordinates.SkyCoord(ra=dia[
"ra"].values, dec=dia[
"dec"].values, unit=
"deg")
28 mpc = astropy.coordinates.SkyCoord(ra=assoc[
"mpc_ra"].values, dec=assoc[
"mpc_dec"].values, unit=
"deg")
29 sep = obs.separation(mpc)
31 print(
"Separation diffeerence range (arcsec): ", sep.min().arcsec, sep.max().arcsec)
32 assert sep.min().arcsec >= 0
33 assert sep.max().arcsec <= 0.005
37 t_utc = Time(dia[
"midpointMjdTai"].to_numpy(), format=
"mjd", scale=
"tai").utc
38 midpoint_utc = pd.to_datetime(t_utc.to_datetime())
39 mpc_time = pd.to_datetime(assoc[
"mpc_obstime"])
40 delta_sec = (mpc_time - midpoint_utc).dt.total_seconds()
43 print(
"Time diffeerence range (sec): ", delta_sec.min(), delta_sec.max())
49 assert abs(delta_sec).max() < 0.51
51 print(f
"All OK, {len(assoc):,} observations.")
56 Convert a pandas string[pyarrow] column of ASCII strings (<= 8 bytes)
57 to little-endian uint64 by left-padding with ASCII spaces to 8 chars.
61 arr = pa.array(mpc_packed, type=pa.string())
65 padded = pc.ascii_lpad(arr, 8,
" ")
68 bin_arr = pc.cast(padded, pa.binary())
71 fixed = pc.binary_slice(bin_arr, 0, 8)
74 if isinstance(fixed, pa.ChunkedArray):
75 fixed = fixed.combine_chunks()
78 buf = fixed.buffers()[2]
81 return np.frombuffer(buf, dtype=
"<u8")
124def group_by(arrs, key, func, out=None, check_grouped=True):
126 Group multiple NumPy arrays by arrs[0][key], assuming the key column
127 is already grouped (equal keys are contiguous), but group blocks may
131 func(row_view, *subarrs)
137 arrs : list/tuple of ndarray
138 Equal-length arrays. arrs[0] contains the grouping key.
140 Column name in arrs[0] to group by.
142 Called either as func(row, *subarrs) or func(*subarrs).
143 out : ndarray or None
144 Optional preallocated structured output array.
146 If True, verify that key values are grouped contiguously.
161 for i
in range(1, len(keys)):
167 f
"Key '{key}' is not properly grouped. "
168 f
"Value {k} reappears at index {i} "
169 f
"after a different key was encountered."
175 unique_keys, idx_start, counts = np.unique(keys, return_index=
True, return_counts=
True)
176 idx_end = idx_start + counts
177 n_groups = len(unique_keys)
181 if len(out) < n_groups:
182 raise ValueError(f
"Out array too small: need {n_groups}, have {len(out)}")
184 for out_idx, (start, end)
in enumerate(zip(idx_start, idx_end)):
185 subarrs = tuple(a[start:end]
for a
in arrs)
188 if out_idx % 100 == 0:
189 print(f
"[{datetime.now().isoformat()}] count={out_idx}")
195 for keyval, start, end
in zip(unique_keys, idx_start, idx_end):
196 subarrs = tuple(a[start:end]
for a
in arrs)
197 results[keyval] = func(*subarrs)
226 Convert an MPC observatory code (e.g. 'X05') to an EarthLocation,
227 using MPC.get_observatory_codes() columns:
228 Code, Longitude, cos, sin, Name.
230 tbl = MPC.get_observatory_codes()
231 row = tbl[tbl[
"Code"] == obscode]
233 raise ValueError(f
"Unknown or ambiguous obscode {obscode!r}")
237 if ma.is_masked(row[
"Longitude"])
or ma.is_masked(row[
"cos"])
or ma.is_masked(row[
"sin"]):
238 raise ValueError(f
"Obscode {obscode!r} has no ground position (spacecraft?)")
240 lon = (row[
"Longitude"] * u.deg).to(u.rad).value
241 rho_cosphi = float(row[
"cos"])
242 rho_sinphi = float(row[
"sin"])
245 x_er = rho_cosphi * np.cos(lon)
246 y_er = rho_cosphi * np.sin(lon)
250 x = (x_er * R_earth).to(u.m)
251 y = (y_er * R_earth).to(u.m)
252 z = (z_er * R_earth).to(u.m)
254 return EarthLocation.from_geocentric(x, y, z)
259 Barycentric (ICRS) position and velocity of an observatory given an
260 MPC obscode, using JPL DE440 for the Earth ephemeris.
264 r_bary : Quantity, shape (3, ...)
265 Barycentric position in AU.
266 v_bary : Quantity, shape (3, ...)
267 Barycentric velocity in AU/day.
272 obsgeoloc, obsgeovel = loc.get_gcrs_posvel(obstime)
275 with solar_system_ephemeris.set(
"de440"):
276 earth_pos, earth_vel = get_body_barycentric_posvel(
"earth", obstime)
280 r_earth_si = earth_pos.xyz.to(u.m)
281 v_earth_si = earth_vel.xyz.to(u.m / u.s)
284 r_site_geo_si = getattr(obsgeoloc,
"xyz", obsgeoloc).to(u.m)
285 v_site_geo_si = getattr(obsgeovel,
"xyz", obsgeovel).to(u.m / u.s)
288 r_site_bary_si = r_earth_si + r_site_geo_si
289 v_site_bary_si = v_earth_si + v_site_geo_si
292 r_site_bary = r_site_bary_si.to(u.au)
293 v_site_bary = v_site_bary_si.to(u.au / u.day)
295 return r_site_bary, v_site_bary
307 chunk_size: Optional[int] =
None,
308 row_group_size: Optional[int] =
None,
311 Write a large NumPy structured array to a Parquet file using PyArrow.
313 Designed for dtypes like dia_dtype / orbit_dtype with up to ~1e8 rows.
316 if arr.dtype.names
is None:
317 raise TypeError(
"struct_to_parquet expects a structured NumPy array (dtype.names is None).")
324 if chunk_size
is None:
325 if n_rows <= 10_000_000:
328 chunk_size = 1_000_000
330 def _numpy_to_arrow_array(col: np.ndarray, name: str) -> pa.Array:
332 Convert a 1D NumPy column view to a PyArrow Array.
334 - Object / unicode / bytes → Arrow string(), with padding stripped
335 for fixed-width 'S'/'U' dtypes.
336 - Numeric types → Arrow infers type from NumPy.
338 kind = col.dtype.kind
342 return pa.array(col, type=pa.string())
348 decoded = np.char.decode(col,
"utf-8", errors=
"ignore")
349 stripped = np.char.rstrip(decoded,
"\x00")
350 return pa.array(stripped, type=pa.string())
354 stripped = np.char.rstrip(col,
"\x00")
355 return pa.array(stripped, type=pa.string())
360 def _chunk_to_table(chunk: np.ndarray) -> pa.Table:
364 for name
in chunk.dtype.names:
366 arr_arrow = _numpy_to_arrow_array(col, name)
367 arrays.append(arr_arrow)
368 fields.append(pa.field(name, arr_arrow.type))
370 schema = pa.schema(fields)
371 return pa.Table.from_arrays(arrays, schema=schema)
373 writer: Optional[pq.ParquetWriter] =
None
375 for start
in range(0, n_rows, chunk_size):
376 end = min(start + chunk_size, n_rows)
377 chunk = arr[start:end]
378 table = _chunk_to_table(chunk)
381 writer = pq.ParquetWriter(path, table.schema)
382 writer.write_table(table, row_group_size=row_group_size)
384 if writer
is not None:
441 Perform an efficient inner join between two 1-D NumPy arrays, returning
442 the index pairs that match by value.
447 The left-hand array to join on. Must be 1-dimensional.
449 The right-hand array to join on. Must be 1-dimensional.
454 Indices into `a` selecting the rows that participate in the join.
456 Indices into `v` selecting the corresponding matching rows.
460 is guaranteed to be true for all elements.
464 This function implements a pure NumPy equivalent of an SQL-style
465 INNER JOIN on the key columns `a` and `v`.
469 1. Sort `a` to produce a permutation `i` so that `a[i]` is sorted.
470 2. Use `np.searchsorted(a[i], v)` to find, for each element of `v`,
471 the candidate matching location in the sorted array.
472 3. Map these positions back to the coordinates of the original array `a`
473 using the permutation `i`.
474 4. Filter out non-matches (values in `v` not present in `a`).
475 The remaining pairs form the inner join.
479 Sorting: O(len(a) log len(a))
480 Searching: O(len(v) log len(a))
483 This is optimal for join-like operations on unsorted arrays in NumPy.
487 >>> a = np.array(["b", "a", "c", "b"])
488 >>> v = np.array(["a", "b", "x", "b"])
490 >>> aidx, vidx = argjoin(a, v)
492 array(['a', 'b', 'b'])
494 array(['a', 'b', 'b'])
502 idx = np.searchsorted(ai, v)
505 idx = np.clip(idx, 0, len(ai) - 1)
508 aidx_candidate = i[idx]
512 vidx = np.flatnonzero(mask)
515 aidx = aidx_candidate[vidx]
517 assert np.all(a[aidx] == v[vidx])