Coverage for tests/test_parquet.py: 17%
914 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 09:22 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-14 09:22 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for ParquetFormatter.
24Tests in this module are disabled unless pandas and pyarrow are importable.
25"""
27import os
28import unittest
30try:
31 import pyarrow as pa
32except ImportError:
33 pa = None
34try:
35 import astropy.table as atable
36 from astropy import units
37except ImportError:
38 atable = None
39try:
40 import numpy as np
41except ImportError:
42 np = None
43try:
44 import pandas as pd
45except ImportError:
46 np = None
48from lsst.daf.butler import (
49 Butler,
50 Config,
51 DatasetRef,
52 DatasetType,
53 FileDataset,
54 StorageClassConfig,
55 StorageClassFactory,
56)
57from lsst.daf.butler.delegates.arrowastropy import ArrowAstropyDelegate
58from lsst.daf.butler.delegates.arrownumpy import ArrowNumpyDelegate
59from lsst.daf.butler.delegates.arrowtable import ArrowTableDelegate
60from lsst.daf.butler.delegates.dataframe import DataFrameDelegate
61from lsst.daf.butler.formatters.parquet import (
62 ArrowAstropySchema,
63 ArrowNumpySchema,
64 DataFrameSchema,
65 ParquetFormatter,
66 _append_numpy_multidim_metadata,
67 _astropy_to_numpy_dict,
68 _numpy_dict_to_numpy,
69 _numpy_dtype_to_arrow_types,
70 _numpy_to_numpy_dict,
71 arrow_to_astropy,
72 arrow_to_numpy,
73 arrow_to_numpy_dict,
74 arrow_to_pandas,
75 astropy_to_arrow,
76 numpy_dict_to_arrow,
77 numpy_to_arrow,
78 pandas_to_arrow,
79)
80from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
82TESTDIR = os.path.abspath(os.path.dirname(__file__))
85def _makeSimpleNumpyTable(include_multidim=False, include_bigendian=False):
86 """Make a simple numpy table with random data.
88 Parameters
89 ----------
90 include_multidim : `bool`
91 Include multi-dimensional columns.
92 include_bigendian : `bool`
93 Include big-endian columns.
95 Returns
96 -------
97 numpyTable : `numpy.ndarray`
98 """
99 nrow = 5
101 dtype = [
102 ("index", "i4"),
103 ("a", "f8"),
104 ("b", "f8"),
105 ("c", "f8"),
106 ("ddd", "f8"),
107 ("f", "i8"),
108 ("strcol", "U10"),
109 ("bytecol", "a10"),
110 ]
112 if include_multidim:
113 dtype.extend(
114 [
115 ("d1", "f4", (5,)),
116 ("d2", "i8", (5, 10)),
117 ("d3", "f8", (5, 10)),
118 ]
119 )
121 if include_bigendian:
122 dtype.extend([("a_bigendian", ">f8"), ("f_bigendian", ">i8")])
124 data = np.zeros(nrow, dtype=dtype)
125 data["index"][:] = np.arange(nrow)
126 data["a"] = np.random.randn(nrow)
127 data["b"] = np.random.randn(nrow)
128 data["c"] = np.random.randn(nrow)
129 data["ddd"] = np.random.randn(nrow)
130 data["f"] = np.arange(nrow) * 10
131 data["strcol"][:] = "teststring"
132 data["bytecol"][:] = "teststring"
134 if include_multidim:
135 data["d1"] = np.random.randn(data["d1"].size).reshape(data["d1"].shape)
136 data["d2"] = np.arange(data["d2"].size).reshape(data["d2"].shape)
137 data["d3"] = np.asfortranarray(np.random.randn(data["d3"].size).reshape(data["d3"].shape))
139 if include_bigendian:
140 data["a_bigendian"][:] = data["a"]
141 data["f_bigendian"][:] = data["f"]
143 return data
146def _makeSingleIndexDataFrame(include_masked=False):
147 """Make a single index data frame for testing.
149 Parameters
150 ----------
151 include_masked : `bool`
152 Include masked columns.
154 Returns
155 -------
156 dataFrame : `~pandas.DataFrame`
157 The test dataframe.
158 allColumns : `list` [`str`]
159 List of all the columns (including index columns).
160 """
161 data = _makeSimpleNumpyTable()
162 df = pd.DataFrame(data)
163 df = df.set_index("index")
165 if include_masked:
166 nrow = len(df)
168 df["m1"] = pd.array(np.arange(nrow), dtype=pd.Int64Dtype())
169 df["m2"] = pd.array(np.arange(nrow), dtype=np.float32)
170 df["mstrcol"] = pd.array(np.array(["text"] * nrow))
171 df.loc[1, ["m1", "m2", "mstrcol"]] = None
173 allColumns = df.columns.append(pd.Index(df.index.names))
175 return df, allColumns
178def _makeMultiIndexDataFrame():
179 """Make a multi-index data frame for testing.
181 Returns
182 -------
183 dataFrame : `~pandas.DataFrame`
184 The test dataframe.
185 """
186 columns = pd.MultiIndex.from_tuples(
187 [
188 ("g", "a"),
189 ("g", "b"),
190 ("g", "c"),
191 ("r", "a"),
192 ("r", "b"),
193 ("r", "c"),
194 ],
195 names=["filter", "column"],
196 )
197 df = pd.DataFrame(np.random.randn(5, 6), index=np.arange(5, dtype=int), columns=columns)
199 return df
202def _makeSimpleAstropyTable(include_multidim=False, include_masked=False, include_bigendian=False):
203 """Make an astropy table for testing.
205 Parameters
206 ----------
207 include_multidim : `bool`
208 Include multi-dimensional columns.
209 include_masked : `bool`
210 Include masked columns.
211 include_bigendian : `bool`
212 Include big-endian columns.
214 Returns
215 -------
216 astropyTable : `astropy.table.Table`
217 The test table.
218 """
219 data = _makeSimpleNumpyTable(include_multidim=include_multidim, include_bigendian=include_bigendian)
220 # Add a couple of units.
221 table = atable.Table(data)
222 table["a"].unit = units.degree
223 table["b"].unit = units.meter
225 # Add some masked columns.
226 if include_masked:
227 nrow = len(table)
228 mask = np.zeros(nrow, dtype=bool)
229 mask[1] = True
230 table["m1"] = np.ma.masked_array(data=np.arange(nrow, dtype="i8"), mask=mask)
231 table["m2"] = np.ma.masked_array(data=np.arange(nrow, dtype="f4"), mask=mask)
232 table["mstrcol"] = np.ma.masked_array(data=np.array(["text"] * nrow), mask=mask)
233 table["mbytecol"] = np.ma.masked_array(data=np.array([b"bytes"] * nrow), mask=mask)
235 return table
238def _makeSimpleArrowTable(include_multidim=False, include_masked=False):
239 """Make an arrow table for testing.
241 Parameters
242 ----------
243 include_multidim : `bool`
244 Include multi-dimensional columns.
245 include_masked : `bool`
246 Include masked columns.
248 Returns
249 -------
250 arrowTable : `pyarrow.Table`
251 The test table.
252 """
253 data = _makeSimpleAstropyTable(include_multidim=include_multidim, include_masked=include_masked)
254 return astropy_to_arrow(data)
257@unittest.skipUnless(pd is not None, "Cannot test ParquetFormatterDataFrame without pandas.")
258@unittest.skipUnless(pa is not None, "Cannot test ParquetFormatterDataFrame without pyarrow.")
259class ParquetFormatterDataFrameTestCase(unittest.TestCase):
260 """Tests for ParquetFormatter, DataFrame, using local file datastore."""
262 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
264 def setUp(self):
265 """Create a new butler root for each test."""
266 self.root = makeTestTempDir(TESTDIR)
267 config = Config(self.configFile)
268 self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run")
269 # No dimensions in dataset type so we don't have to worry about
270 # inserting dimension data or defining data IDs.
271 self.datasetType = DatasetType(
272 "data", dimensions=(), storageClass="DataFrame", universe=self.butler.registry.dimensions
273 )
274 self.butler.registry.registerDatasetType(self.datasetType)
276 def tearDown(self):
277 removeTestTempDir(self.root)
279 def testSingleIndexDataFrame(self):
280 df1, allColumns = _makeSingleIndexDataFrame(include_masked=True)
282 self.butler.put(df1, self.datasetType, dataId={})
283 # Read the whole DataFrame.
284 df2 = self.butler.get(self.datasetType, dataId={})
285 self.assertTrue(df1.equals(df2))
286 # Read just the column descriptions.
287 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
288 self.assertTrue(allColumns.equals(columns2))
289 # Read the rowcount.
290 rowcount = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
291 self.assertEqual(rowcount, len(df1))
292 # Read the schema.
293 schema = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
294 self.assertEqual(schema, DataFrameSchema(df1))
295 # Read just some columns a few different ways.
296 df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]})
297 self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3))
298 df4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"})
299 self.assertTrue(df1.loc[:, ["a"]].equals(df4))
300 df5 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["index", "a"]})
301 self.assertTrue(df1.loc[:, ["a"]].equals(df5))
302 df6 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "ddd"})
303 self.assertTrue(df1.loc[:, ["ddd"]].equals(df6))
304 df7 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "a"]})
305 self.assertTrue(df1.loc[:, ["a"]].equals(df7))
306 # Passing an unrecognized column should be a ValueError.
307 with self.assertRaises(ValueError):
308 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["e"]})
310 def testMultiIndexDataFrame(self):
311 df1 = _makeMultiIndexDataFrame()
313 self.butler.put(df1, self.datasetType, dataId={})
314 # Read the whole DataFrame.
315 df2 = self.butler.get(self.datasetType, dataId={})
316 self.assertTrue(df1.equals(df2))
317 # Read just the column descriptions.
318 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
319 self.assertTrue(df1.columns.equals(columns2))
320 self.assertEqual(columns2.names, df1.columns.names)
321 # Read the rowcount.
322 rowcount = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
323 self.assertEqual(rowcount, len(df1))
324 # Read the schema.
325 schema = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
326 self.assertEqual(schema, DataFrameSchema(df1))
327 # Read just some columns a few different ways.
328 df3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": {"filter": "g"}})
329 self.assertTrue(df1.loc[:, ["g"]].equals(df3))
330 df4 = self.butler.get(
331 self.datasetType, dataId={}, parameters={"columns": {"filter": ["r"], "column": "a"}}
332 )
333 self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4))
334 column_list = [("g", "a"), ("r", "c")]
335 df5 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": column_list})
336 self.assertTrue(df1.loc[:, column_list].equals(df5))
337 column_dict = {"filter": "r", "column": ["a", "b"]}
338 df6 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": column_dict})
339 self.assertTrue(df1.loc[:, [("r", "a"), ("r", "b")]].equals(df6))
340 # Passing an unrecognized column should be a ValueError.
341 with self.assertRaises(ValueError):
342 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["d"]})
344 def testSingleIndexDataFrameEmptyString(self):
345 """Test persisting a single index dataframe with empty strings."""
346 df1, _ = _makeSingleIndexDataFrame()
348 # Set one of the strings to None
349 df1.at[1, "strcol"] = None
351 self.butler.put(df1, self.datasetType, dataId={})
352 # Read the whole DataFrame.
353 df2 = self.butler.get(self.datasetType, dataId={})
354 self.assertTrue(df1.equals(df2))
356 def testSingleIndexDataFrameAllEmptyStrings(self):
357 """Test persisting a single index dataframe with an empty string
358 column.
359 """
360 df1, _ = _makeSingleIndexDataFrame()
362 # Set all of the strings to None
363 df1.loc[0:, "strcol"] = None
365 self.butler.put(df1, self.datasetType, dataId={})
366 # Read the whole DataFrame.
367 df2 = self.butler.get(self.datasetType, dataId={})
368 self.assertTrue(df1.equals(df2))
370 def testLegacyDataFrame(self):
371 """Test writing a dataframe to parquet via pandas (without additional
372 metadata) and ensure that we can read it back with all the new
373 functionality.
374 """
375 df1, allColumns = _makeSingleIndexDataFrame()
377 fname = os.path.join(self.root, "test_dataframe.parq")
378 df1.to_parquet(fname)
380 legacy_type = DatasetType(
381 "legacy_dataframe",
382 dimensions=(),
383 storageClass="DataFrame",
384 universe=self.butler.registry.dimensions,
385 )
386 self.butler.registry.registerDatasetType(legacy_type)
388 data_id = {}
389 ref = DatasetRef(legacy_type, data_id, id=None)
390 dataset = FileDataset(path=fname, refs=[ref], formatter=ParquetFormatter)
392 self.butler.ingest(dataset, transfer="copy")
394 self.butler.put(df1, self.datasetType, dataId={})
396 df2a = self.butler.get(self.datasetType, dataId={})
397 df2b = self.butler.get("legacy_dataframe", dataId={})
398 self.assertTrue(df2a.equals(df2b))
400 df3a = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a"]})
401 df3b = self.butler.get("legacy_dataframe", dataId={}, parameters={"columns": ["a"]})
402 self.assertTrue(df3a.equals(df3b))
404 columns2a = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
405 columns2b = self.butler.get("legacy_dataframe.columns", dataId={})
406 self.assertTrue(columns2a.equals(columns2b))
408 rowcount2a = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
409 rowcount2b = self.butler.get("legacy_dataframe.rowcount", dataId={})
410 self.assertEqual(rowcount2a, rowcount2b)
412 schema2a = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
413 schema2b = self.butler.get("legacy_dataframe.schema", dataId={})
414 self.assertEqual(schema2a, schema2b)
416 def testDataFrameSchema(self):
417 tab1 = _makeSimpleArrowTable()
419 schema = DataFrameSchema.from_arrow(tab1.schema)
421 self.assertIsInstance(schema.schema, pd.DataFrame)
422 self.assertEqual(repr(schema), repr(schema._schema))
423 self.assertNotEqual(schema, "not_a_schema")
424 self.assertEqual(schema, schema)
426 tab2 = _makeMultiIndexDataFrame()
427 schema2 = DataFrameSchema(tab2)
429 self.assertNotEqual(schema, schema2)
431 @unittest.skipUnless(atable is not None, "Cannot test reading as astropy without astropy.")
432 def testWriteSingleIndexDataFrameReadAsAstropyTable(self):
433 df1, allColumns = _makeSingleIndexDataFrame()
435 self.butler.put(df1, self.datasetType, dataId={})
437 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowAstropy")
439 tab2_df = tab2.to_pandas(index="index")
440 self.assertTrue(df1.equals(tab2_df))
442 # Check reading the columns.
443 columns = list(tab2.columns.keys())
444 columns2 = self.butler.get(
445 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
446 )
447 # We check the set because pandas reorders the columns.
448 self.assertEqual(set(columns2), set(columns))
450 # Check reading the schema.
451 schema = ArrowAstropySchema(tab2)
452 schema2 = self.butler.get(
453 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowAstropySchema"
454 )
456 # The string types are objectified by pandas, and the order
457 # will be changed because of pandas indexing.
458 self.assertEqual(len(schema2.schema.columns), len(schema.schema.columns))
459 for name in schema.schema.columns:
460 self.assertIn(name, schema2.schema.columns)
461 if schema2.schema[name].dtype != np.dtype("O"):
462 self.assertEqual(schema2.schema[name].dtype, schema.schema[name].dtype)
464 @unittest.skipUnless(atable is not None, "Cannot test reading as astropy without astropy.")
465 def testWriteSingleIndexDataFrameWithMaskedColsReadAsAstropyTable(self):
466 # We need to special-case the write-as-pandas read-as-astropy code
467 # with masks because pandas has multiple ways to use masked columns.
468 # (The string column mask handling in particular is frustratingly
469 # inconsistent.)
470 df1, allColumns = _makeSingleIndexDataFrame(include_masked=True)
472 self.butler.put(df1, self.datasetType, dataId={})
474 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowAstropy")
475 tab2_df = tab2.to_pandas(index="index")
477 self.assertTrue(df1.columns.equals(tab2_df.columns))
478 for name in tab2_df.columns:
479 col1 = df1[name]
480 col2 = tab2_df[name]
482 if col1.hasnans:
483 notNull = col1.notnull()
484 self.assertTrue(notNull.equals(col2.notnull()))
485 # Need to check value-by-value because column may
486 # be made of objects, depending on what pandas decides.
487 for index in notNull.values.nonzero()[0]:
488 self.assertEqual(col1[index], col2[index])
489 else:
490 self.assertTrue(col1.equals(col2))
492 @unittest.skipUnless(atable is not None, "Cannot test reading as astropy without astropy.")
493 def testWriteMultiIndexDataFrameReadAsAstropyTable(self):
494 df1 = _makeMultiIndexDataFrame()
496 self.butler.put(df1, self.datasetType, dataId={})
498 _ = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowAstropy")
500 # This is an odd duck, it doesn't really round-trip.
501 # This test simply checks that it's readable, but definitely not
502 # recommended.
504 @unittest.skipUnless(pa is not None, "Cannot test reading as arrow without pyarrow.")
505 def testWriteSingleIndexDataFrameReadAsArrowTable(self):
506 df1, allColumns = _makeSingleIndexDataFrame()
508 self.butler.put(df1, self.datasetType, dataId={})
510 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowTable")
512 tab2_df = arrow_to_pandas(tab2)
513 self.assertTrue(df1.equals(tab2_df))
515 # Check reading the columns.
516 columns = list(tab2.schema.names)
517 columns2 = self.butler.get(
518 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
519 )
520 # We check the set because pandas reorders the columns.
521 self.assertEqual(set(columns), set(columns2))
523 # Check reading the schema.
524 schema = tab2.schema
525 schema2 = self.butler.get(
526 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowSchema"
527 )
529 # These will not have the same metadata, nor will the string column
530 # information be maintained.
531 self.assertEqual(len(schema.names), len(schema2.names))
532 for name in schema.names:
533 if schema.field(name).type not in (pa.string(), pa.binary()):
534 self.assertEqual(schema.field(name).type, schema2.field(name).type)
536 @unittest.skipUnless(pa is not None, "Cannot test reading as arrow without pyarrow.")
537 def testWriteMultiIndexDataFrameReadAsArrowTable(self):
538 df1 = _makeMultiIndexDataFrame()
540 self.butler.put(df1, self.datasetType, dataId={})
542 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowTable")
544 tab2_df = arrow_to_pandas(tab2)
545 self.assertTrue(df1.equals(tab2_df))
547 @unittest.skipUnless(np is not None, "Cannot test reading as numpy without numpy.")
548 def testWriteSingleIndexDataFrameReadAsNumpyTable(self):
549 df1, allColumns = _makeSingleIndexDataFrame()
551 self.butler.put(df1, self.datasetType, dataId={})
553 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpy")
555 tab2_df = pd.DataFrame.from_records(tab2, index=["index"])
556 self.assertTrue(df1.equals(tab2_df))
558 # Check reading the columns.
559 columns = list(tab2.dtype.names)
560 columns2 = self.butler.get(
561 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
562 )
563 # We check the set because pandas reorders the columns.
564 self.assertEqual(set(columns2), set(columns))
566 # Check reading the schema.
567 schema = ArrowNumpySchema(tab2.dtype)
568 schema2 = self.butler.get(
569 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowNumpySchema"
570 )
572 # The string types will be objectified by pandas, and the order
573 # will be changed because of pandas indexing.
574 self.assertEqual(len(schema.schema.names), len(schema2.schema.names))
575 for name in schema.schema.names:
576 self.assertIn(name, schema2.schema.names)
577 self.assertEqual(schema2.schema[name].type, schema.schema[name].type)
579 @unittest.skipUnless(np is not None, "Cannot test reading as numpy without numpy.")
580 def testWriteMultiIndexDataFrameReadAsNumpyTable(self):
581 df1 = _makeMultiIndexDataFrame()
583 self.butler.put(df1, self.datasetType, dataId={})
585 _ = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpy")
587 # This is an odd duck, it doesn't really round-trip.
588 # This test simply checks that it's readable, but definitely not
589 # recommended.
591 @unittest.skipUnless(np is not None, "Cannot test reading as numpy dict without numpy.")
592 def testWriteSingleIndexDataFrameReadAsNumpyDict(self):
593 df1, allColumns = _makeSingleIndexDataFrame()
595 self.butler.put(df1, self.datasetType, dataId={})
597 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpyDict")
599 tab2_df = pd.DataFrame.from_records(tab2, index=["index"])
600 # The column order is not maintained.
601 self.assertEqual(set(df1.columns), set(tab2_df.columns))
602 for col in df1.columns:
603 self.assertTrue(np.all(df1[col].values == tab2_df[col].values))
605 @unittest.skipUnless(np is not None, "Cannot test reading as numpy dict without numpy.")
606 def testWriteMultiIndexDataFrameReadAsNumpyDict(self):
607 df1 = _makeMultiIndexDataFrame()
609 self.butler.put(df1, self.datasetType, dataId={})
611 _ = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpyDict")
613 # This is an odd duck, it doesn't really round-trip.
614 # This test simply checks that it's readable, but definitely not
615 # recommended.
618@unittest.skipUnless(pd is not None, "Cannot test InMemoryDataFrameDelegate without pandas.")
619class InMemoryDataFrameDelegateTestCase(ParquetFormatterDataFrameTestCase):
620 """Tests for InMemoryDatastore, using DataFrameDelegate."""
622 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
624 def testWriteMultiIndexDataFrameReadAsAstropyTable(self):
625 df1 = _makeMultiIndexDataFrame()
627 self.butler.put(df1, self.datasetType, dataId={})
629 with self.assertRaises(ValueError):
630 _ = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowAstropy")
632 def testLegacyDataFrame(self):
633 # This test does not work with an inMemoryDatastore.
634 pass
636 def testBadInput(self):
637 df1, _ = _makeSingleIndexDataFrame()
638 delegate = DataFrameDelegate("DataFrame")
640 with self.assertRaises(ValueError):
641 delegate.handleParameters(inMemoryDataset="not_a_dataframe")
643 with self.assertRaises(AttributeError):
644 delegate.getComponent(composite=df1, componentName="nothing")
646 def testStorageClass(self):
647 df1, allColumns = _makeSingleIndexDataFrame()
649 factory = StorageClassFactory()
650 factory.addFromConfig(StorageClassConfig())
652 storageClass = factory.findStorageClass(type(df1), compare_types=False)
653 # Force the name lookup to do name matching.
654 storageClass._pytype = None
655 self.assertEqual(storageClass.name, "DataFrame")
657 storageClass = factory.findStorageClass(type(df1), compare_types=True)
658 # Force the name lookup to do name matching.
659 storageClass._pytype = None
660 self.assertEqual(storageClass.name, "DataFrame")
663@unittest.skipUnless(atable is not None, "Cannot test ParquetFormatterArrowAstropy without astropy.")
664@unittest.skipUnless(pa is not None, "Cannot test ParquetFormatterArrowAstropy without pyarrow.")
665class ParquetFormatterArrowAstropyTestCase(unittest.TestCase):
666 """Tests for ParquetFormatter, ArrowAstropy, using local file datastore."""
668 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
670 def setUp(self):
671 """Create a new butler root for each test."""
672 self.root = makeTestTempDir(TESTDIR)
673 config = Config(self.configFile)
674 self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run")
675 # No dimensions in dataset type so we don't have to worry about
676 # inserting dimension data or defining data IDs.
677 self.datasetType = DatasetType(
678 "data", dimensions=(), storageClass="ArrowAstropy", universe=self.butler.registry.dimensions
679 )
680 self.butler.registry.registerDatasetType(self.datasetType)
682 def tearDown(self):
683 removeTestTempDir(self.root)
685 def testAstropyTable(self):
686 tab1 = _makeSimpleAstropyTable(include_multidim=True, include_masked=True)
688 self.butler.put(tab1, self.datasetType, dataId={})
689 # Read the whole Table.
690 tab2 = self.butler.get(self.datasetType, dataId={})
691 self._checkAstropyTableEquality(tab1, tab2)
692 # Read the columns.
693 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
694 self.assertEqual(len(columns2), len(tab1.dtype.names))
695 for i, name in enumerate(tab1.dtype.names):
696 self.assertEqual(columns2[i], name)
697 # Read the rowcount.
698 rowcount = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
699 self.assertEqual(rowcount, len(tab1))
700 # Read the schema.
701 schema = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
702 self.assertEqual(schema, ArrowAstropySchema(tab1))
703 # Read just some columns a few different ways.
704 tab3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]})
705 self._checkAstropyTableEquality(tab1[("a", "c")], tab3)
706 tab4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"})
707 self._checkAstropyTableEquality(tab1[("a",)], tab4)
708 tab5 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["index", "a"]})
709 self._checkAstropyTableEquality(tab1[("index", "a")], tab5)
710 tab6 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "ddd"})
711 self._checkAstropyTableEquality(tab1[("ddd",)], tab6)
712 tab7 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "a"]})
713 self._checkAstropyTableEquality(tab1[("a",)], tab7)
714 # Passing an unrecognized column should be a ValueError.
715 with self.assertRaises(ValueError):
716 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["e"]})
718 def testAstropyTableBigEndian(self):
719 tab1 = _makeSimpleAstropyTable(include_bigendian=True)
721 self.butler.put(tab1, self.datasetType, dataId={})
722 # Read the whole Table.
723 tab2 = self.butler.get(self.datasetType, dataId={})
724 self._checkAstropyTableEquality(tab1, tab2, has_bigendian=True)
726 def testAstropyTableWithMetadata(self):
727 tab1 = _makeSimpleAstropyTable(include_multidim=True)
729 meta = {
730 "meta_a": 5,
731 "meta_b": 10.0,
732 "meta_c": [1, 2, 3],
733 "meta_d": True,
734 "meta_e": "string",
735 }
737 tab1.meta.update(meta)
739 self.butler.put(tab1, self.datasetType, dataId={})
740 # Read the whole Table.
741 tab2 = self.butler.get(self.datasetType, dataId={})
742 # This will check that the metadata is equivalent as well.
743 self._checkAstropyTableEquality(tab1, tab2)
745 def testArrowAstropySchema(self):
746 tab1 = _makeSimpleAstropyTable()
747 tab1_arrow = astropy_to_arrow(tab1)
748 schema = ArrowAstropySchema.from_arrow(tab1_arrow.schema)
750 self.assertIsInstance(schema.schema, atable.Table)
751 self.assertEqual(repr(schema), repr(schema._schema))
752 self.assertNotEqual(schema, "not_a_schema")
753 self.assertEqual(schema, schema)
755 # Test various inequalities
756 tab2 = tab1.copy()
757 tab2.rename_column("index", "index2")
758 schema2 = ArrowAstropySchema(tab2)
759 self.assertNotEqual(schema2, schema)
761 tab2 = tab1.copy()
762 tab2["index"].unit = units.micron
763 schema2 = ArrowAstropySchema(tab2)
764 self.assertNotEqual(schema2, schema)
766 tab2 = tab1.copy()
767 tab2["index"].description = "Index column"
768 schema2 = ArrowAstropySchema(tab2)
769 self.assertNotEqual(schema2, schema)
771 tab2 = tab1.copy()
772 tab2["index"].format = "%05d"
773 schema2 = ArrowAstropySchema(tab2)
774 self.assertNotEqual(schema2, schema)
776 def testAstropyParquet(self):
777 tab1 = _makeSimpleAstropyTable()
779 fname = os.path.join(self.root, "test_astropy.parq")
780 tab1.write(fname)
782 astropy_type = DatasetType(
783 "astropy_parquet",
784 dimensions=(),
785 storageClass="ArrowAstropy",
786 universe=self.butler.registry.dimensions,
787 )
788 self.butler.registry.registerDatasetType(astropy_type)
790 data_id = {}
791 ref = DatasetRef(astropy_type, data_id, id=None)
792 dataset = FileDataset(path=fname, refs=[ref], formatter=ParquetFormatter)
794 self.butler.ingest(dataset, transfer="copy")
796 self.butler.put(tab1, self.datasetType, dataId={})
798 tab2a = self.butler.get(self.datasetType, dataId={})
799 tab2b = self.butler.get("astropy_parquet", dataId={})
800 self._checkAstropyTableEquality(tab2a, tab2b)
802 columns2a = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
803 columns2b = self.butler.get("astropy_parquet.columns", dataId={})
804 self.assertEqual(len(columns2b), len(columns2a))
805 for i, name in enumerate(columns2a):
806 self.assertEqual(columns2b[i], name)
808 rowcount2a = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
809 rowcount2b = self.butler.get("astropy_parquet.rowcount", dataId={})
810 self.assertEqual(rowcount2a, rowcount2b)
812 schema2a = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
813 schema2b = self.butler.get("astropy_parquet.schema", dataId={})
814 self.assertEqual(schema2a, schema2b)
816 @unittest.skipUnless(pa is not None, "Cannot test reading as arrow without pyarrow.")
817 def testWriteAstropyReadAsArrowTable(self):
818 # This astropy <-> arrow works fine with masked columns.
819 tab1 = _makeSimpleAstropyTable(include_masked=True)
821 self.butler.put(tab1, self.datasetType, dataId={})
823 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowTable")
825 tab2_astropy = arrow_to_astropy(tab2)
826 self._checkAstropyTableEquality(tab1, tab2_astropy)
828 # Check reading the columns.
829 columns = tab2.schema.names
830 columns2 = self.butler.get(
831 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
832 )
833 self.assertEqual(columns2, columns)
835 # Check reading the schema.
836 schema = tab2.schema
837 schema2 = self.butler.get(
838 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowSchema"
839 )
841 self.assertEqual(schema, schema2)
843 @unittest.skipUnless(pd is not None, "Cannot test reading as a dataframe without pandas.")
844 def testWriteAstropyReadAsDataFrame(self):
845 tab1 = _makeSimpleAstropyTable()
847 self.butler.put(tab1, self.datasetType, dataId={})
849 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="DataFrame")
851 # This is tricky because it loses the units and gains a bonus pandas
852 # _index_ column, so we just test the dataframe form.
854 tab1_df = tab1.to_pandas()
855 self.assertTrue(tab1_df.equals(tab2))
857 # Check reading the columns.
858 columns = tab2.columns
859 columns2 = self.butler.get(
860 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="DataFrameIndex"
861 )
862 self.assertTrue(columns.equals(columns2))
864 # Check reading the schema.
865 schema = DataFrameSchema(tab2)
866 schema2 = self.butler.get(
867 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="DataFrameSchema"
868 )
870 self.assertEqual(schema2, schema)
872 @unittest.skipUnless(pd is not None, "Cannot test reading as a dataframe without pandas.")
873 def testWriteAstropyWithMaskedColsReadAsDataFrame(self):
874 # We need to special-case the write-as-astropy read-as-pandas code
875 # with masks because pandas has multiple ways to use masked columns.
876 # (When writing an astropy table with masked columns we get an object
877 # column back, but each unmasked element has the correct type.)
878 tab1 = _makeSimpleAstropyTable(include_masked=True)
880 self.butler.put(tab1, self.datasetType, dataId={})
882 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="DataFrame")
884 tab1_df = tab1.to_pandas()
886 self.assertTrue(tab1_df.columns.equals(tab2.columns))
887 for name in tab2.columns:
888 col1 = tab1_df[name]
889 col2 = tab2[name]
891 if col1.hasnans:
892 notNull = col1.notnull()
893 self.assertTrue(notNull.equals(col2.notnull()))
894 # Need to check value-by-value because column may
895 # be made of objects, depending on what pandas decides.
896 for index in notNull.values.nonzero()[0]:
897 self.assertEqual(col1[index], col2[index])
898 else:
899 self.assertTrue(col1.equals(col2))
901 @unittest.skipUnless(np is not None, "Cannot test reading as numpy without numpy.")
902 def testWriteAstropyReadAsNumpyTable(self):
903 tab1 = _makeSimpleAstropyTable()
904 self.butler.put(tab1, self.datasetType, dataId={})
906 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpy")
908 # This is tricky because it loses the units.
909 tab2_astropy = atable.Table(tab2)
911 self._checkAstropyTableEquality(tab1, tab2_astropy, skip_units=True)
913 # Check reading the columns.
914 columns = list(tab2.dtype.names)
915 columns2 = self.butler.get(
916 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
917 )
918 self.assertEqual(columns2, columns)
920 # Check reading the schema.
921 schema = ArrowNumpySchema(tab2.dtype)
922 schema2 = self.butler.get(
923 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowNumpySchema"
924 )
926 self.assertEqual(schema2, schema)
928 @unittest.skipUnless(np is not None, "Cannot test reading as numpy without numpy.")
929 def testWriteAstropyReadAsNumpyDict(self):
930 tab1 = _makeSimpleAstropyTable()
931 self.butler.put(tab1, self.datasetType, dataId={})
933 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpyDict")
935 # This is tricky because it loses the units.
936 tab2_astropy = atable.Table(tab2)
938 self._checkAstropyTableEquality(tab1, tab2_astropy, skip_units=True)
940 def _checkAstropyTableEquality(self, table1, table2, skip_units=False, has_bigendian=False):
941 """Check if two astropy tables have the same columns/values.
943 Parameters
944 ----------
945 table1 : `astropy.table.Table`
946 table2 : `astropy.table.Table`
947 skip_units : `bool`
948 has_bigendian : `bool`
949 """
950 if not has_bigendian:
951 self.assertEqual(table1.dtype, table2.dtype)
952 else:
953 for name in table1.dtype.names:
954 # Only check type matches, force to little-endian.
955 self.assertEqual(table1.dtype[name].newbyteorder(">"), table2.dtype[name].newbyteorder(">"))
957 self.assertEqual(table1.meta, table2.meta)
958 if not skip_units:
959 for name in table1.columns:
960 self.assertEqual(table1[name].unit, table2[name].unit)
961 self.assertEqual(table1[name].description, table2[name].description)
962 self.assertEqual(table1[name].format, table2[name].format)
963 self.assertTrue(np.all(table1 == table2))
966@unittest.skipUnless(atable is not None, "Cannot test InMemoryArrowAstropyDelegate without astropy.")
967class InMemoryArrowAstropyDelegateTestCase(ParquetFormatterArrowAstropyTestCase):
968 """Tests for InMemoryDatastore, using ArrowAstropyDelegate."""
970 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
972 def testAstropyParquet(self):
973 # This test does not work with an inMemoryDatastore.
974 pass
976 def testBadInput(self):
977 tab1 = _makeSimpleAstropyTable()
978 delegate = ArrowAstropyDelegate("ArrowAstropy")
980 with self.assertRaises(ValueError):
981 delegate.handleParameters(inMemoryDataset="not_an_astropy_table")
983 with self.assertRaises(NotImplementedError):
984 delegate.handleParameters(inMemoryDataset=tab1, parameters={"columns": [("a", "b")]})
986 with self.assertRaises(AttributeError):
987 delegate.getComponent(composite=tab1, componentName="nothing")
990@unittest.skipUnless(np is not None, "Cannot test ParquetFormatterArrowNumpy without numpy.")
991@unittest.skipUnless(pa is not None, "Cannot test ParquetFormatterArrowNumpy without pyarrow.")
992class ParquetFormatterArrowNumpyTestCase(unittest.TestCase):
993 """Tests for ParquetFormatter, ArrowNumpy, using local file datastore."""
995 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
997 def setUp(self):
998 """Create a new butler root for each test."""
999 self.root = makeTestTempDir(TESTDIR)
1000 config = Config(self.configFile)
1001 self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run")
1002 # No dimensions in dataset type so we don't have to worry about
1003 # inserting dimension data or defining data IDs.
1004 self.datasetType = DatasetType(
1005 "data", dimensions=(), storageClass="ArrowNumpy", universe=self.butler.registry.dimensions
1006 )
1007 self.butler.registry.registerDatasetType(self.datasetType)
1009 def tearDown(self):
1010 removeTestTempDir(self.root)
1012 def testNumpyTable(self):
1013 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1015 self.butler.put(tab1, self.datasetType, dataId={})
1016 # Read the whole Table.
1017 tab2 = self.butler.get(self.datasetType, dataId={})
1018 self._checkNumpyTableEquality(tab1, tab2)
1019 # Read the columns.
1020 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
1021 self.assertEqual(len(columns2), len(tab1.dtype.names))
1022 for i, name in enumerate(tab1.dtype.names):
1023 self.assertEqual(columns2[i], name)
1024 # Read the rowcount.
1025 rowcount = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
1026 self.assertEqual(rowcount, len(tab1))
1027 # Read the schema.
1028 schema = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
1029 self.assertEqual(schema, ArrowNumpySchema(tab1.dtype))
1030 # Read just some columns a few different ways.
1031 tab3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]})
1032 self._checkNumpyTableEquality(tab1[["a", "c"]], tab3)
1033 tab4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"})
1034 self._checkNumpyTableEquality(
1035 tab1[
1036 [
1037 "a",
1038 ]
1039 ],
1040 tab4,
1041 )
1042 tab5 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["index", "a"]})
1043 self._checkNumpyTableEquality(tab1[["index", "a"]], tab5)
1044 tab6 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "ddd"})
1045 self._checkNumpyTableEquality(
1046 tab1[
1047 [
1048 "ddd",
1049 ]
1050 ],
1051 tab6,
1052 )
1053 tab7 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "a"]})
1054 self._checkNumpyTableEquality(
1055 tab1[
1056 [
1057 "a",
1058 ]
1059 ],
1060 tab7,
1061 )
1062 # Passing an unrecognized column should be a ValueError.
1063 with self.assertRaises(ValueError):
1064 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["e"]})
1066 def testNumpyTableBigEndian(self):
1067 tab1 = _makeSimpleNumpyTable(include_bigendian=True)
1069 self.butler.put(tab1, self.datasetType, dataId={})
1070 # Read the whole Table.
1071 tab2 = self.butler.get(self.datasetType, dataId={})
1072 self._checkNumpyTableEquality(tab1, tab2, has_bigendian=True)
1074 def testArrowNumpySchema(self):
1075 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1076 tab1_arrow = numpy_to_arrow(tab1)
1077 schema = ArrowNumpySchema.from_arrow(tab1_arrow.schema)
1079 self.assertIsInstance(schema.schema, np.dtype)
1080 self.assertEqual(repr(schema), repr(schema._dtype))
1081 self.assertNotEqual(schema, "not_a_schema")
1082 self.assertEqual(schema, schema)
1084 # Test inequality
1085 tab2 = tab1.copy()
1086 names = list(tab2.dtype.names)
1087 names[0] = "index2"
1088 tab2.dtype.names = names
1089 schema2 = ArrowNumpySchema(tab2.dtype)
1090 self.assertNotEqual(schema2, schema)
1092 @unittest.skipUnless(pa is not None, "Cannot test arrow conversions without pyarrow.")
1093 def testNumpyDictConversions(self):
1094 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1096 # Verify that everything round-trips, including the schema.
1097 tab1_arrow = numpy_to_arrow(tab1)
1098 tab1_dict = arrow_to_numpy_dict(tab1_arrow)
1099 tab1_dict_arrow = numpy_dict_to_arrow(tab1_dict)
1101 self.assertEqual(tab1_arrow.schema, tab1_dict_arrow.schema)
1102 self.assertEqual(tab1_arrow, tab1_dict_arrow)
1104 @unittest.skipUnless(pa is not None, "Cannot test reading as arrow without pyarrow.")
1105 def testWriteNumpyTableReadAsArrowTable(self):
1106 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1108 self.butler.put(tab1, self.datasetType, dataId={})
1110 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowTable")
1112 tab2_numpy = arrow_to_numpy(tab2)
1114 self._checkNumpyTableEquality(tab1, tab2_numpy)
1116 # Check reading the columns.
1117 columns = tab2.schema.names
1118 columns2 = self.butler.get(
1119 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
1120 )
1121 self.assertEqual(columns2, columns)
1123 # Check reading the schema.
1124 schema = tab2.schema
1125 schema2 = self.butler.get(
1126 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowSchema"
1127 )
1128 self.assertEqual(schema2, schema)
1130 @unittest.skipUnless(pd is not None, "Cannot test reading as a dataframe without pandas.")
1131 def testWriteNumpyTableReadAsDataFrame(self):
1132 tab1 = _makeSimpleNumpyTable()
1134 self.butler.put(tab1, self.datasetType, dataId={})
1136 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="DataFrame")
1138 # Converting this back to numpy gets confused with the index column
1139 # and changes the datatype of the string column.
1141 tab1_df = pd.DataFrame(tab1)
1143 self.assertTrue(tab1_df.equals(tab2))
1145 # Check reading the columns.
1146 columns = tab2.columns
1147 columns2 = self.butler.get(
1148 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="DataFrameIndex"
1149 )
1150 self.assertTrue(columns.equals(columns2))
1152 # Check reading the schema.
1153 schema = DataFrameSchema(tab2)
1154 schema2 = self.butler.get(
1155 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="DataFrameSchema"
1156 )
1158 self.assertEqual(schema2, schema)
1160 @unittest.skipUnless(atable is not None, "Cannot test reading as astropy without astropy.")
1161 def testWriteNumpyTableReadAsAstropyTable(self):
1162 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1164 self.butler.put(tab1, self.datasetType, dataId={})
1166 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowAstropy")
1167 tab2_numpy = tab2.as_array()
1169 self._checkNumpyTableEquality(tab1, tab2_numpy)
1171 # Check reading the columns.
1172 columns = list(tab2.columns.keys())
1173 columns2 = self.butler.get(
1174 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
1175 )
1176 self.assertEqual(columns2, columns)
1178 # Check reading the schema.
1179 schema = ArrowAstropySchema(tab2)
1180 schema2 = self.butler.get(
1181 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowAstropySchema"
1182 )
1184 self.assertEqual(schema2, schema)
1186 def testWriteNumpyTableReadAsNumpyDict(self):
1187 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1189 self.butler.put(tab1, self.datasetType, dataId={})
1191 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpyDict")
1192 tab2_numpy = _numpy_dict_to_numpy(tab2)
1194 self._checkNumpyTableEquality(tab1, tab2_numpy)
1196 def _checkNumpyTableEquality(self, table1, table2, has_bigendian=False):
1197 """Check if two numpy tables have the same columns/values
1199 Parameters
1200 ----------
1201 table1 : `numpy.ndarray`
1202 table2 : `numpy.ndarray`
1203 has_bigendian : `bool`
1204 """
1205 self.assertEqual(table1.dtype.names, table2.dtype.names)
1206 for name in table1.dtype.names:
1207 if not has_bigendian:
1208 self.assertEqual(table1.dtype[name], table2.dtype[name])
1209 else:
1210 # Only check type matches, force to little-endian.
1211 self.assertEqual(table1.dtype[name].newbyteorder(">"), table2.dtype[name].newbyteorder(">"))
1212 self.assertTrue(np.all(table1 == table2))
1215@unittest.skipUnless(np is not None, "Cannot test ParquetFormatterArrowNumpy without numpy.")
1216class InMemoryArrowNumpyDelegateTestCase(ParquetFormatterArrowNumpyTestCase):
1217 """Tests for InMemoryDatastore, using ArrowNumpyDelegate."""
1219 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1221 def testBadInput(self):
1222 tab1 = _makeSimpleNumpyTable()
1223 delegate = ArrowNumpyDelegate("ArrowNumpy")
1225 with self.assertRaises(ValueError):
1226 delegate.handleParameters(inMemoryDataset="not_a_numpy_table")
1228 with self.assertRaises(NotImplementedError):
1229 delegate.handleParameters(inMemoryDataset=tab1, parameters={"columns": [("a", "b")]})
1231 with self.assertRaises(AttributeError):
1232 delegate.getComponent(composite=tab1, componentName="nothing")
1234 def testStorageClass(self):
1235 tab1 = _makeSimpleNumpyTable()
1237 factory = StorageClassFactory()
1238 factory.addFromConfig(StorageClassConfig())
1240 storageClass = factory.findStorageClass(type(tab1), compare_types=False)
1241 # Force the name lookup to do name matching.
1242 storageClass._pytype = None
1243 self.assertEqual(storageClass.name, "ArrowNumpy")
1245 storageClass = factory.findStorageClass(type(tab1), compare_types=True)
1246 # Force the name lookup to do name matching.
1247 storageClass._pytype = None
1248 self.assertEqual(storageClass.name, "ArrowNumpy")
1251@unittest.skipUnless(pa is not None, "Cannot test ParquetFormatterArrowTable without pyarrow.")
1252class ParquetFormatterArrowTableTestCase(unittest.TestCase):
1253 """Tests for ParquetFormatter, ArrowTable, using local file datastore."""
1255 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1257 def setUp(self):
1258 """Create a new butler root for each test."""
1259 self.root = makeTestTempDir(TESTDIR)
1260 config = Config(self.configFile)
1261 self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run")
1262 # No dimensions in dataset type so we don't have to worry about
1263 # inserting dimension data or defining data IDs.
1264 self.datasetType = DatasetType(
1265 "data", dimensions=(), storageClass="ArrowTable", universe=self.butler.registry.dimensions
1266 )
1267 self.butler.registry.registerDatasetType(self.datasetType)
1269 def tearDown(self):
1270 removeTestTempDir(self.root)
1272 def testArrowTable(self):
1273 tab1 = _makeSimpleArrowTable(include_multidim=True, include_masked=True)
1275 self.butler.put(tab1, self.datasetType, dataId={})
1276 # Read the whole Table.
1277 tab2 = self.butler.get(self.datasetType, dataId={})
1278 self.assertEqual(tab2, tab1)
1279 # Read the columns.
1280 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
1281 self.assertEqual(len(columns2), len(tab1.schema.names))
1282 for i, name in enumerate(tab1.schema.names):
1283 self.assertEqual(columns2[i], name)
1284 # Read the rowcount.
1285 rowcount = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
1286 self.assertEqual(rowcount, len(tab1))
1287 # Read the schema.
1288 schema = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
1289 self.assertEqual(schema, tab1.schema)
1290 # Read just some columns a few different ways.
1291 tab3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]})
1292 self.assertEqual(tab3, tab1.select(("a", "c")))
1293 tab4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"})
1294 self.assertEqual(tab4, tab1.select(("a",)))
1295 tab5 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["index", "a"]})
1296 self.assertEqual(tab5, tab1.select(("index", "a")))
1297 tab6 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "ddd"})
1298 self.assertEqual(tab6, tab1.select(("ddd",)))
1299 tab7 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "a"]})
1300 self.assertEqual(tab7, tab1.select(("a",)))
1301 # Passing an unrecognized column should be a ValueError.
1302 with self.assertRaises(ValueError):
1303 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["e"]})
1305 def testEmptyArrowTable(self):
1306 data = _makeSimpleNumpyTable()
1307 type_list = _numpy_dtype_to_arrow_types(data.dtype)
1309 schema = pa.schema(type_list)
1310 arrays = [[]] * len(schema.names)
1312 tab1 = pa.Table.from_arrays(arrays, schema=schema)
1314 self.butler.put(tab1, self.datasetType, dataId={})
1315 tab2 = self.butler.get(self.datasetType, dataId={})
1316 self.assertEqual(tab2, tab1)
1318 tab1_numpy = arrow_to_numpy(tab1)
1319 self.assertEqual(len(tab1_numpy), 0)
1320 tab1_numpy_arrow = numpy_to_arrow(tab1_numpy)
1321 self.assertEqual(tab1_numpy_arrow, tab1)
1323 tab1_pandas = arrow_to_pandas(tab1)
1324 self.assertEqual(len(tab1_pandas), 0)
1325 tab1_pandas_arrow = pandas_to_arrow(tab1_pandas)
1326 # Unfortunately, string/byte columns get mangled when translated
1327 # through empty pandas dataframes.
1328 self.assertEqual(
1329 tab1_pandas_arrow.select(("index", "a", "b", "c", "ddd")),
1330 tab1.select(("index", "a", "b", "c", "ddd")),
1331 )
1333 tab1_astropy = arrow_to_astropy(tab1)
1334 self.assertEqual(len(tab1_astropy), 0)
1335 tab1_astropy_arrow = astropy_to_arrow(tab1_astropy)
1336 self.assertEqual(tab1_astropy_arrow, tab1)
1338 def testEmptyArrowTableMultidim(self):
1339 data = _makeSimpleNumpyTable(include_multidim=True)
1340 type_list = _numpy_dtype_to_arrow_types(data.dtype)
1342 md = {}
1343 for name in data.dtype.names:
1344 _append_numpy_multidim_metadata(md, name, data.dtype[name])
1346 schema = pa.schema(type_list, metadata=md)
1347 arrays = [[]] * len(schema.names)
1349 tab1 = pa.Table.from_arrays(arrays, schema=schema)
1351 self.butler.put(tab1, self.datasetType, dataId={})
1352 tab2 = self.butler.get(self.datasetType, dataId={})
1353 self.assertEqual(tab2, tab1)
1355 tab1_numpy = arrow_to_numpy(tab1)
1356 self.assertEqual(len(tab1_numpy), 0)
1357 tab1_numpy_arrow = numpy_to_arrow(tab1_numpy)
1358 self.assertEqual(tab1_numpy_arrow, tab1)
1360 tab1_astropy = arrow_to_astropy(tab1)
1361 self.assertEqual(len(tab1_astropy), 0)
1362 tab1_astropy_arrow = astropy_to_arrow(tab1_astropy)
1363 self.assertEqual(tab1_astropy_arrow, tab1)
1365 @unittest.skipUnless(pd is not None, "Cannot test reading as a dataframe without pandas.")
1366 def testWriteArrowTableReadAsSingleIndexDataFrame(self):
1367 df1, allColumns = _makeSingleIndexDataFrame()
1369 self.butler.put(df1, self.datasetType, dataId={})
1371 # Read back out as a dataframe.
1372 df2 = self.butler.get(self.datasetType, dataId={}, storageClass="DataFrame")
1373 self.assertTrue(df1.equals(df2))
1375 # Read back out as an arrow table, convert to dataframe.
1376 tab3 = self.butler.get(self.datasetType, dataId={})
1377 df3 = arrow_to_pandas(tab3)
1378 self.assertTrue(df1.equals(df3))
1380 # Check reading the columns.
1381 columns = df2.reset_index().columns
1382 columns2 = self.butler.get(
1383 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="DataFrameIndex"
1384 )
1385 # We check the set because pandas reorders the columns.
1386 self.assertEqual(set(columns2.to_list()), set(columns.to_list()))
1388 # Check reading the schema.
1389 schema = DataFrameSchema(df1)
1390 schema2 = self.butler.get(
1391 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="DataFrameSchema"
1392 )
1393 self.assertEqual(schema2, schema)
1395 @unittest.skipUnless(pd is not None, "Cannot test reading as a dataframe without pandas.")
1396 def testWriteArrowTableReadAsMultiIndexDataFrame(self):
1397 df1 = _makeMultiIndexDataFrame()
1399 self.butler.put(df1, self.datasetType, dataId={})
1401 # Read back out as a dataframe.
1402 df2 = self.butler.get(self.datasetType, dataId={}, storageClass="DataFrame")
1403 self.assertTrue(df1.equals(df2))
1405 # Read back out as an arrow table, convert to dataframe.
1406 atab3 = self.butler.get(self.datasetType, dataId={})
1407 df3 = arrow_to_pandas(atab3)
1408 self.assertTrue(df1.equals(df3))
1410 # Check reading the columns.
1411 columns = df2.columns
1412 columns2 = self.butler.get(
1413 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="DataFrameIndex"
1414 )
1415 self.assertTrue(columns2.equals(columns))
1417 # Check reading the schema.
1418 schema = DataFrameSchema(df1)
1419 schema2 = self.butler.get(
1420 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="DataFrameSchema"
1421 )
1422 self.assertEqual(schema2, schema)
1424 @unittest.skipUnless(atable is not None, "Cannot test reading as astropy without astropy.")
1425 def testWriteArrowTableReadAsAstropyTable(self):
1426 tab1 = _makeSimpleAstropyTable(include_multidim=True, include_masked=True)
1428 self.butler.put(tab1, self.datasetType, dataId={})
1430 # Read back out as an astropy table.
1431 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowAstropy")
1432 self._checkAstropyTableEquality(tab1, tab2)
1434 # Read back out as an arrow table, convert to astropy table.
1435 atab3 = self.butler.get(self.datasetType, dataId={})
1436 tab3 = arrow_to_astropy(atab3)
1437 self._checkAstropyTableEquality(tab1, tab3)
1439 # Check reading the columns.
1440 columns = list(tab2.columns.keys())
1441 columns2 = self.butler.get(
1442 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
1443 )
1444 self.assertEqual(columns2, columns)
1446 # Check reading the schema.
1447 schema = ArrowAstropySchema(tab1)
1448 schema2 = self.butler.get(
1449 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowAstropySchema"
1450 )
1451 self.assertEqual(schema2, schema)
1453 @unittest.skipUnless(np is not None, "Cannot test reading as numpy without numpy.")
1454 def testWriteArrowTableReadAsNumpyTable(self):
1455 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1457 self.butler.put(tab1, self.datasetType, dataId={})
1459 # Read back out as a numpy table.
1460 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpy")
1461 self._checkNumpyTableEquality(tab1, tab2)
1463 # Read back out as an arrow table, convert to numpy table.
1464 atab3 = self.butler.get(self.datasetType, dataId={})
1465 tab3 = arrow_to_numpy(atab3)
1466 self._checkNumpyTableEquality(tab1, tab3)
1468 # Check reading the columns.
1469 columns = list(tab2.dtype.names)
1470 columns2 = self.butler.get(
1471 self.datasetType.componentTypeName("columns"), dataId={}, storageClass="ArrowColumnList"
1472 )
1473 self.assertEqual(columns2, columns)
1475 # Check reading the schema.
1476 schema = ArrowNumpySchema(tab1.dtype)
1477 schema2 = self.butler.get(
1478 self.datasetType.componentTypeName("schema"), dataId={}, storageClass="ArrowNumpySchema"
1479 )
1480 self.assertEqual(schema2, schema)
1482 @unittest.skipUnless(np is not None, "Cannot test reading as numpy without numpy.")
1483 def testWriteArrowTableReadAsNumpyDict(self):
1484 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1486 self.butler.put(tab1, self.datasetType, dataId={})
1488 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpyDict")
1489 tab2_numpy = _numpy_dict_to_numpy(tab2)
1490 self._checkNumpyTableEquality(tab1, tab2_numpy)
1492 def _checkAstropyTableEquality(self, table1, table2):
1493 """Check if two astropy tables have the same columns/values
1495 Parameters
1496 ----------
1497 table1 : `astropy.table.Table`
1498 table2 : `astropy.table.Table`
1499 """
1500 self.assertEqual(table1.dtype, table2.dtype)
1501 for name in table1.columns:
1502 self.assertEqual(table1[name].unit, table2[name].unit)
1503 self.assertEqual(table1[name].description, table2[name].description)
1504 self.assertEqual(table1[name].format, table2[name].format)
1505 self.assertTrue(np.all(table1 == table2))
1507 def _checkNumpyTableEquality(self, table1, table2):
1508 """Check if two numpy tables have the same columns/values
1510 Parameters
1511 ----------
1512 table1 : `numpy.ndarray`
1513 table2 : `numpy.ndarray`
1514 """
1515 self.assertEqual(table1.dtype.names, table2.dtype.names)
1516 for name in table1.dtype.names:
1517 self.assertEqual(table1.dtype[name], table2.dtype[name])
1518 self.assertTrue(np.all(table1 == table2))
1521@unittest.skipUnless(pa is not None, "Cannot test InMemoryArrowTableDelegate without pyarrow.")
1522class InMemoryArrowTableDelegateTestCase(ParquetFormatterArrowTableTestCase):
1523 """Tests for InMemoryDatastore, using ArrowTableDelegate."""
1525 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1527 def testBadInput(self):
1528 tab1 = _makeSimpleArrowTable()
1529 delegate = ArrowTableDelegate("ArrowTable")
1531 with self.assertRaises(ValueError):
1532 delegate.handleParameters(inMemoryDataset="not_an_arrow_table")
1534 with self.assertRaises(NotImplementedError):
1535 delegate.handleParameters(inMemoryDataset=tab1, parameters={"columns": [("a", "b")]})
1537 with self.assertRaises(AttributeError):
1538 delegate.getComponent(composite=tab1, componentName="nothing")
1540 def testStorageClass(self):
1541 tab1 = _makeSimpleArrowTable()
1543 factory = StorageClassFactory()
1544 factory.addFromConfig(StorageClassConfig())
1546 storageClass = factory.findStorageClass(type(tab1), compare_types=False)
1547 # Force the name lookup to do name matching.
1548 storageClass._pytype = None
1549 self.assertEqual(storageClass.name, "ArrowTable")
1551 storageClass = factory.findStorageClass(type(tab1), compare_types=True)
1552 # Force the name lookup to do name matching.
1553 storageClass._pytype = None
1554 self.assertEqual(storageClass.name, "ArrowTable")
1557@unittest.skipUnless(np is not None, "Cannot test ParquetFormatterArrowNumpy without numpy.")
1558@unittest.skipUnless(pa is not None, "Cannot test ParquetFormatterArrowNumpy without pyarrow.")
1559class ParquetFormatterArrowNumpyDictTestCase(unittest.TestCase):
1560 """Tests for ParquetFormatter, ArrowNumpyDict, using local file store."""
1562 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1564 def setUp(self):
1565 """Create a new butler root for each test."""
1566 self.root = makeTestTempDir(TESTDIR)
1567 config = Config(self.configFile)
1568 self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run")
1569 # No dimensions in dataset type so we don't have to worry about
1570 # inserting dimension data or defining data IDs.
1571 self.datasetType = DatasetType(
1572 "data", dimensions=(), storageClass="ArrowNumpyDict", universe=self.butler.registry.dimensions
1573 )
1574 self.butler.registry.registerDatasetType(self.datasetType)
1576 def tearDown(self):
1577 removeTestTempDir(self.root)
1579 def testNumpyDict(self):
1580 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1581 dict1 = _numpy_to_numpy_dict(tab1)
1583 self.butler.put(dict1, self.datasetType, dataId={})
1584 # Read the whole table.
1585 dict2 = self.butler.get(self.datasetType, dataId={})
1586 self._checkNumpyDictEquality(dict1, dict2)
1587 # Read the columns.
1588 columns2 = self.butler.get(self.datasetType.componentTypeName("columns"), dataId={})
1589 self.assertEqual(len(columns2), len(dict1.keys()))
1590 for i, name in enumerate(dict1.keys()):
1591 self.assertIn(name, columns2)
1592 # Read the rowcount.
1593 rowcount = self.butler.get(self.datasetType.componentTypeName("rowcount"), dataId={})
1594 self.assertEqual(rowcount, len(dict1["a"]))
1595 # Read the schema.
1596 schema = self.butler.get(self.datasetType.componentTypeName("schema"), dataId={})
1597 self.assertEqual(schema, ArrowNumpySchema(tab1.dtype))
1598 # Read just some columns a few different ways.
1599 tab3 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "c"]})
1600 subdict = {key: dict1[key] for key in ["a", "c"]}
1601 self._checkNumpyDictEquality(subdict, tab3)
1602 tab4 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "a"})
1603 subdict = {key: dict1[key] for key in ["a"]}
1604 self._checkNumpyDictEquality(subdict, tab4)
1605 tab5 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["index", "a"]})
1606 subdict = {key: dict1[key] for key in ["index", "a"]}
1607 self._checkNumpyDictEquality(subdict, tab5)
1608 tab6 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": "ddd"})
1609 subdict = {key: dict1[key] for key in ["ddd"]}
1610 self._checkNumpyDictEquality(subdict, tab6)
1611 tab7 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "a"]})
1612 subdict = {key: dict1[key] for key in ["a"]}
1613 self._checkNumpyDictEquality(subdict, tab7)
1614 # Passing an unrecognized column should be a ValueError.
1615 with self.assertRaises(ValueError):
1616 self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["e"]})
1618 @unittest.skipUnless(pa is not None, "Cannot test reading as arrow without pyarrow.")
1619 def testWriteNumpyDictReadAsArrowTable(self):
1620 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1621 dict1 = _numpy_to_numpy_dict(tab1)
1623 self.butler.put(dict1, self.datasetType, dataId={})
1625 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowTable")
1627 tab2_dict = arrow_to_numpy_dict(tab2)
1629 self._checkNumpyDictEquality(dict1, tab2_dict)
1631 @unittest.skipUnless(pd is not None, "Cannot test reading as a dataframe without pandas.")
1632 def testWriteNumpyDictReadAsDataFrame(self):
1633 tab1 = _makeSimpleNumpyTable()
1634 dict1 = _numpy_to_numpy_dict(tab1)
1636 self.butler.put(dict1, self.datasetType, dataId={})
1638 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="DataFrame")
1640 # The order of the dict may get mixed up, so we need to check column
1641 # by column. We also need to do this in dataframe form because pandas
1642 # changes the datatype of the string column.
1643 tab1_df = pd.DataFrame(tab1)
1645 self.assertEqual(set(tab1_df.columns), set(tab2.columns))
1646 for col in tab1_df.columns:
1647 self.assertTrue(np.all(tab1_df[col].values == tab2[col].values))
1649 @unittest.skipUnless(atable is not None, "Cannot test reading as astropy without astropy.")
1650 def testWriteNumpyDictReadAsAstropyTable(self):
1651 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1652 dict1 = _numpy_to_numpy_dict(tab1)
1654 self.butler.put(dict1, self.datasetType, dataId={})
1656 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowAstropy")
1657 tab2_dict = _astropy_to_numpy_dict(tab2)
1659 self._checkNumpyDictEquality(dict1, tab2_dict)
1661 def testWriteNumpyDictReadAsNumpyTable(self):
1662 tab1 = _makeSimpleNumpyTable(include_multidim=True)
1663 dict1 = _numpy_to_numpy_dict(tab1)
1665 self.butler.put(dict1, self.datasetType, dataId={})
1667 tab2 = self.butler.get(self.datasetType, dataId={}, storageClass="ArrowNumpy")
1668 tab2_dict = _numpy_to_numpy_dict(tab2)
1670 self._checkNumpyDictEquality(dict1, tab2_dict)
1672 def testWriteNumpyDictBad(self):
1673 dict1 = {"a": 4, "b": np.ndarray([1])}
1674 with self.assertRaises(RuntimeError):
1675 self.butler.put(dict1, self.datasetType, dataId={})
1677 dict2 = {"a": np.zeros(4), "b": np.zeros(5)}
1678 with self.assertRaises(RuntimeError):
1679 self.butler.put(dict2, self.datasetType, dataId={})
1681 dict3 = {"a": [0] * 5, "b": np.zeros(5)}
1682 with self.assertRaises(RuntimeError):
1683 self.butler.put(dict3, self.datasetType, dataId={})
1685 def _checkNumpyDictEquality(self, dict1, dict2):
1686 """Check if two numpy dicts have the same columns/values.
1688 Parameters
1689 ----------
1690 dict1 : `dict` [`str`, `np.ndarray`]
1691 dict2 : `dict` [`str`, `np.ndarray`]
1692 """
1693 self.assertEqual(set(dict1.keys()), set(dict2.keys()))
1694 for name in dict1.keys():
1695 self.assertEqual(dict1[name].dtype, dict2[name].dtype)
1696 self.assertTrue(np.all(dict1[name] == dict2[name]))
1699@unittest.skipUnless(np is not None, "Cannot test ParquetFormatterArrowNumpy without numpy.")
1700@unittest.skipUnless(pa is not None, "Cannot test ParquetFormatterArrowNumpy without pyarrow.")
1701class InMemoryNumpyDictDelegateTestCase(ParquetFormatterArrowNumpyDictTestCase):
1702 """Tests for InMemoryDatastore, using ArrowNumpyDictDelegate."""
1704 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1706 def testWriteNumpyDictBad(self):
1707 # The sub-type checking is not done on in-memory datastore.
1708 pass
1711if __name__ == "__main__":
1712 unittest.main()