Coverage for python/lsst/sims/maf/db/database.py : 75%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
""" Meta class for databases, to build a registry of database classes. """ else: if len(modname.split('.')) > 1: modname = '.'.join(modname.split('.')[:-1]) + '.' else: modname = modname + '.' raise Exception('Redefining databases %s! (there are >1 database classes with the same name)' %(databasename)) return cls.registry[databasename] for databasename in sorted(cls.registry): if not doc: print(databasename) if doc: print('---- ', databasename, ' ----') print(inspect.getdoc(cls.registry[databasename]))
"""Base class for database access. Implements some basic query functionality and demonstrates API.
Parameters ---------- database : str Name of the database (or full path + filename for sqlite db). driver : str, opt Dialect+driver for sqlalchemy. Default 'sqlite'. (other examples, 'pymssql+mssql'). host : str, opt Hostname for database. Default None (for sqlite). port : int, opt Port for database. Default None. defaultTable : str, opt Default table in the database to query for metric data. longstrings : bool, opt Flag to convert strings in database to long (1024) or short (256) characters in numpy recarray. Default False (convert to 256 character strings). verbose : bool, opt Flag for additional output. Default False. """
longstrings=False, verbose=False): # If it's a sqlite file, check that the filename exists. # This gives a more understandable error message than trying to connect to non-existent file later.
# Connect to database using DBObject init. host=host, port=port, verbose=verbose, connection=None)
'NUMERIC': (float,), 'SMALLINT': (int,), 'TINYINT': (int,), 'VARCHAR': (np.str, 256), 'TEXT': (np.str, 256), 'CLOB': (np.str, 256), 'NVARCHAR': (np.str, 256), 'NCLOB': (np.str, 256), 'NTEXT': (np.str, 256), 'CHAR': (np.str, 1), 'INT': (int,), 'REAL': (float,), 'DOUBLE': (float,), 'STRING': (np.str, 256), 'DOUBLE_PRECISION': (float,), 'DECIMAL': (float,), 'DATETIME': (np.str, 50)} typeOverRide = {'VARCHAR':(np.str, 1024), 'NVARCHAR':(np.str, 1024), 'TEXT':(np.str, 1024), 'CLOB':(np.str, 1024), 'STRING':(np.str, 1024)} self.dbTypeMap.update(typeOverRide)
# Get a dict (keyed by the table names) of all the columns in each table and view. # Create all the sqlalchemy table objects. This lets us see the schema and query it with types. # if there is is only one table and we haven't said otherwise, set defaultTable automatically.
"""Fetch 'colnames' from 'tableName'.
This is basically a thin wrapper around query_columns, but uses the default table. It's mostly still here for backward compatibility.
Parameters ---------- colnames : list The columns to fetch from the table. sqlconstraint : str or None, opt The sql constraint to apply to the data (minus "WHERE"). Default None. Examples: to fetch data for the r band filter only, set sqlconstraint to 'filter = "r"'. groupBy : str or None, opt The column to group the returned data by. Default (when using summaryTable) is the MJD, otherwise will be None. tableName : str or None, opt The table to query. The default (None) will use the summary table, set by self.defaultTable.
Returns ------- np.recarray A structured array containing the data queried from the database. """ tableName = self.defaultTable
# For a basic Database object, there is no default column to group by. So reset to None. groupBy = None
raise ValueError('Table %s not recognized; not in list of database tables.' % (tableName))
groupBy=groupBy)
"""Get config (metadata) info on source of data for metric calculation. """ # Demo API (for interface with driver). configSummary = {} configDetails = {} return configSummary, configDetails
"""Simple wrapper around execute_arbitrary for backwards compatibility.
Parameters ----------- sqlQuery : str SQL query. dtype: opt, numpy dtype. Numpy recarray dtype. If None, then an attempt to determine the dtype will be made. This attempt will fail if there are commas in the data you query.
Returns ------- numpy.recarray """
groupBy=None, numLimit=None, chunksize=1000000): """Query a table in the database and return data from colnames in recarray.
Parameters ---------- tablename : str Name of table to query. colnames : list of str or None, opt Columns from the table to query for. If None, all columns are selected. sqlconstraint : str or None, opt Constraint to apply to to the query. Default None. groupBy : str or None, opt Name of column to group by. Default None. numLimit : int or None, opt Number of records to return. Default no limit. chunksize : int, opt Query database and convert to recarray in series of chunks of chunksize.
Returns ------- numpy.recarray """ # Build the sqlalchemy query from a single table, with various columns/constraints/etc. # Does NOT use a mapping between column names and database names - assumes the database names # are what the user will specify.
# Build the query. groupBy=groupBy, numLimit=numLimit)
# Determine dtype for numpy recarray.
# Execute query on database.
# Fetch all results and convert to numpy recarray. results = exec_query.fetchall() data = self._convert_results(results, dtype) else: # Loop through results, converting in steps of chunksize. else:
raise ValueError('Tablename %s not in list of available tables (%s).' % (tablename, self.tables.keys())) colnames = self.columnNames[tablename] else: raise ValueError("Requested column %s not available in table %s" % (col, tablename)) raise ValueError("GroupBy column %s is not available in table %s" % (groupBy, tablename)) # Put together sqlalchemy query object. else:
data = np.recarray((0,), dtype=dtype) else: # Have to do the tuple(xx) for py2 string objects. With py3 is okay to just pass results. |