//
// Copyright (C) 2012 by Richard Brooks
// The file is part of the exstreamspeed library
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
//
#include
#include
/*
* Example program to explore the basics of the core database API.
* The program does the following:
* 1) Define and populate a sales database that records unit and dollar
* sales by date, product and customer. Maintains aggregate unit sales by date.
* 2) Query database to compute the top 5 most profitable products
* for sales against a specific customer and range of dates.
* 3) Query database to compute total unit sales by date for a range of dates
*
* genschema() explores how to define a database definition. Uses es_mbd and
* es_mbc interfaces.
*
* genproduct() explores how to update a database root node using a string key field
* using the es_mb, es_mbb, es_mbi and es_sd interfaces:
*
* gensales() explores how to update a database node hierarchy. Shows how to
* construct new child nodes and how to update a database field incrementally
* using binding by-reference.
* Uses the es_mb, es_mbb, es_mbi, es_mbn and es_date interfaces.
*
* queryprofit() explores how to run complex queries against a database using
* a database aggregator, membership filter and sorted iterator.
* Uses es_mbm, es_mbd, es_mb, es_mba, es_mbb, es_mbs and es_sd interfaces.
*
* queryunitsales() explores how to run simpler queries against a database using
* a filtered iterator. Uses es_mb, es_mbm, es_mbb, es_mbi and es_date interfaces.
*/
// constants driving size of database construction
#define NUMPRODUCT 1000
#define NUMCUSTOMER 250
#define NUMDATES 600
#define NUMSALESPERDATE 10000
// buffer sizes
#define SDCHUNKSIZE 4096
#define EBUFSIZE 1024
#define QPLANSIZE 4096
es::mbd genschema()
{
// construct new empty schema
es::mbd def;
// construct root class Product( ProductId, ProductCost ) that maintains
// product production cost by product-id
es::mbc c_product = def.newclass( "Product", 1000, es::mbd::eNONE );
c_product.addfld( "ProductId", es::mbd::eI16 );
c_product.addfld( "ProductCost", es::mbd::eF64 );
c_product.addfldidx( "ProductId" );
// construct root class DateSales( DateId, TotalUnitSales ) that maintains
// aggregate unit sales sales by date-id
es::mbc c_datesales = def.newclass( "DateSales", 200, es::mbd::eNONE );
c_datesales.addfld( "DateId", es::mbd::eI32 );
c_datesales.addfld( "TotalUnitSales", es::mbd::eI32 );
c_datesales.addfldidx( "DateId" );
// construct child class Sales(ProductId,CustomerId,UnitSales,DollarSales) that
// maintains the break-down of unit and dollar sales by product and customer
// corresponding to a particular date
es::mbc c_sales = def.newclass( "Sales", 2000, es::mbd::eSHARED, c_datesales );
c_sales.addfld( "ProductId", es::mbd::eI16 );
c_sales.addfld( "CustomerId", es::mbd::eI16 );
c_sales.addfld( "UnitSales", es::mbd::eI32 );
c_sales.addfld( "DollarSales", es::mbd::eF64 );
c_sales.addfldidx( "ProductId" );
c_sales.addfldidx( "CustomerId" );
return def;
}
void genproduct( es::mb& db, es::sd& s_prod )
{
//
// Populate Product root node and string dictionary
//
// retrieve Product class deinition from database
es::mbc c_product = db.getclass( "Product" );
// retrieve Product root node from database
es::mbn n_product = db.getnode( c_product );
// construct field-binding for Product class definition
es::mbb b_product( c_product );
// construct iterator from Product field-binding
es::mbi i_product( b_product );
// add a binding for ProductId
uint16_t f_productid;
b_product.bind_v( "ProductId", &f_productid );
// add a binding for ProductCost
double f_productcost;
b_product.bind_v( "ProductCost", &f_productcost );
// generate NUMPRODUCT products
char prodname[256];
unsigned i;
for( i=0; i != NUMPRODUCT; ++i ) {
// generate product name
sprintf( prodname, "Product_%03d", i );
// add to string dictionary - ProductId is assigned the string-id
unsigned idx;
s_prod.add( &idx, prodname );
f_productid = idx;
// derive production cost
f_productcost = (double)(f_productid % 100);
// update Product node with ProductId and ProductCost
i_product.findupdate( n_product );
}
}
void gensales( es::mb& db )
{
//
// Populate DateSales and Sales
//
std::cout << "Generating "
<< NUMDATES << " dates x " << NUMSALESPERDATE << " sales "
<< "=> " << (NUMDATES*(1+NUMSALESPERDATE))
<< " random-access updates" << std::endl << std::endl;
// retrieve DateSales and Sales class definitions
es::mbc c_datesales = db.getclass( "DateSales" );
es::mbc c_sales = db.getclass( "Sales" );
// retrieve DateSales root node from database
es::mbn n_datesales = db.getnode( c_datesales );
// construct field-bindings from DateSales and Sales class definition
es::mbb b_datesales( c_datesales );
es::mbb b_sales( c_sales );
// construct iterators from DateSales and Sales field-bindings
es::mbi i_datesales( b_datesales );
es::mbi i_sales( b_sales );
// add field-bindings for DateId
uint32_t f_dateid;
b_datesales.bind_v( "DateId", &f_dateid );
// add binding by-reference for TotalUnitSales as we're going to update this
// derived aggregate field incrementally
uint32_t *f_totalunitsales;
b_datesales.bind_r( "TotalUnitSales", &f_totalunitsales );
// add field-binding by-reference for Sales child node
es::mbnref f_sales;
b_datesales.bind_r( "Sales", f_sales );
// add field bindings for ProductId, CustomerId, UnitSales and DollarSales
uint16_t f_productid, f_customerid;
uint32_t f_unitsales;
double f_dollarsales;
b_sales.bind_v( "ProductId", &f_productid );
b_sales.bind_v( "CustomerId", &f_customerid );
b_sales.bind_v( "UnitSales", &f_unitsales );
b_sales.bind_v( "DollarSales", &f_dollarsales );
// loop over NUMDATES calendar days starting from 4/14/2007
f_dateid = es::date( 2007, 4, 14 ).getid();
unsigned i;
for( i=0; i != NUMDATES; ++i, ++f_dateid ) {
// update row in DateSales
i_datesales.findupdate( n_datesales );
// construct Sales child node if missing
if ( f_sales.isnodenull() ) {
f_sales.setnode( db.newnode( c_sales ) );
}
// each date has the same set of NUMSALESPERDATE combinations of ProductId
// and CustomerId, but different UnitSales and DollarSales
unsigned i;
uint32_t unitsale = f_dateid%77;
f_productid = 17;
f_customerid = 43;
for( i=0; i!= NUMSALESPERDATE; ++i ) {
// construct ProductId, CustomerId, UnitSales, DollarSales
if ( ++f_productid == NUMPRODUCT ) {
f_productid = 1;
}
if ( ++f_customerid == NUMCUSTOMER ) {
f_customerid = 1;
}
f_unitsales = unitsale+i%49;
f_dollarsales = f_unitsales*(13.0+i%17);
// update aggregate unit sales for this DateId
*f_totalunitsales += f_unitsales;
// update this Sales child
i_sales.findupdate( f_sales.getnode() );
}
}
}
void queryprofit( es::mb &db, es::sd& s_product,
const es::date& lowdateid, const es::date& highdateid,
int customerid )
{
//
// Define and execute a database aggregator to run query
//
std::cout << "Compute top 5 most profitable ProductIds for "
<< "CustomerId=" << customerid
<< " from " << lowdateid.getcomp() << " to " << highdateid.getcomp()
<< std::endl << std::endl;
// construct membership filter for DateId
es::mbm m_dateid( es::mbm::eIN );
m_dateid.addrange( lowdateid.getid(), highdateid.getid() );
// construct membership filter for CustomerId
es::mbm m_customerid( es::mbm::eIN );
m_customerid.add( customerid );
// define result database definition and class to store query results
es::mbd rdef;
es::mbc c_res = rdef.newclass( "Result", 200, es::mbd::ePRIVATE );
// construct query aggregator against input database definition
es::mba agg( db );
// add ProductId as key of query result
agg.addkey( "ProductId" );
// add membership filters for DateId and CustomerId to aggregator
agg.addmembership( "DateId", m_dateid );
agg.addmembership( "CustomerId", m_customerid );
// define derived value Profit
agg.addformula( "Profit", "DollarSales - UnitSales * ProductCost", es::mbd::eF64 );
// add Profit as an aggregate result field
agg.addresult( "Profit", es::mba::eSUM );
// prepare and display the query plan for the aggregator
std::string queryplan;
agg.prepare( rdef, "Result", queryplan );
std::cout << "Aggregator Query Plan:" << std::endl << queryplan << std::endl;
// construct an empty result database with which to fill query results
es::mb rdb( rdef );
// execute query against input and result databases
agg.exec( db, rdb );
//
// print top 5 most profitable products
//
// construct binding for ProductId and Profit fields in Result class
es::mbb b_res( c_res );
es::mbn n_res = rdb.getnode( c_res );
uint16_t f_productid;
double f_profit;
b_res.bind_v( "ProductId", &f_productid );
b_res.bind_v( "Profit", &f_profit );
// construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the
// first 5 entries and don't want to sort the entire node
es::mbs s_res( b_res, es::mbs::eBOTTOM );
s_res.add( "Profit" );
// extract first five entries
std::fixed( std::cout );
std::cout << "Rank,Product,Profit" << std::endl;
int i;
bool r = s_res.beginfetch( n_res );
for( i=0; i!=5 && r ; ++i ) {
// get name of product from string dictionary by id
const char *prodname = s_product.getstr( f_productid );
// print report line
std::cout << i+1 << "," << prodname << "," << f_profit << std::endl;
// get next most profitable product
r = s_res.incfetch();
}
std::cout << std::endl;
}
void queryprofitq( es::mb &db, es::sd& s_product,
const es::date& lowdateid, const es::date& highdateid,
int customerid )
{
//
// Define and execute a database query expression to run query
//
std::cout << "Compute top 5 most profitable ProductIds for "
<< "CustomerId=" << customerid
<< " from " << lowdateid.getcomp() << " to " << highdateid.getcomp()
<< std::endl << std::endl;
// construct query aggregator against input database definition
es::mbq qry;
// define membership query string for date filter
qry << "mbm( id=DateF memtype=in "
<< "addrange( low=" << (long)lowdateid.getid() << " high= "
<< (long)highdateid.getid() << "))\n";
// define membership query string for customer filter
qry << "mbm( id=CustomerF memtype=in add( value=" << (long)customerid << "))\n";
// define result mbc
qry << "mbc( id=Result batch=200 index=private)\n";
/* define aggregation query */
qry << "mba( refmbc=Result "
<< "addmembership( field=DateId refmbm=DateF ) "
<< "addmembership( field=CustomerId refmbm=CustomerF ) "
<< "addformula( field=Profit type=f64 "
<< " def=\"DollarSales - UnitSales * ProductCost\") "
<< "addkey( field=ProductId ) "
<< "addresult( field=Profit aggtype=sum ))";
// prepare and print query plan
es::mbd idef = db.getdefinition();
std::string queryplan;
qry.prepare( idef, queryplan );
std::cout << "Aggregator Query Plan Derived From Query Language Expression:"
<< std::endl << queryplan << std::endl;
/* execute query */
es::mb rdb = qry.exec( db );
//
// print top 5 most profitable products
//
// construct binding for ProductId and Profit fields in Result class
es::mbc c_res( rdb.getclass( "Result" ) );
es::mbb b_res( c_res );
es::mbn n_res = rdb.getnode( c_res );
uint16_t f_productid;
double f_profit;
b_res.bind_v( "ProductId", &f_productid );
b_res.bind_v( "Profit", &f_profit );
// construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the
// first 5 entries and don't want to sort the entire node
es::mbs s_res( b_res, es::mbs::eBOTTOM );
s_res.add( "Profit" );
// extract first five entries
std::fixed( std::cout );
std::cout << "Rank,Product,Profit" << std::endl;
int i;
bool r = s_res.beginfetch( n_res );
for( i=0; i!=5 && r ; ++i ) {
// get name of product from string dictionary by id
const char *prodname = s_product.getstr( f_productid );
// print report line
std::cout << i+1 << "," << prodname << "," << f_profit << std::endl;
// get next most profitable product
r = s_res.incfetch();
}
std::cout << std::endl;
}
void queryunitsales( es::mb& db, const es::date& lowdateid, const es::date& highdateid )
{
std::cout <<"Compute TotalUnitSales by date from "
<< lowdateid.getcomp() << " to " << highdateid.getcomp()
<< std::endl << std::endl;
// retrieve DateSales class definition
es::mbc c_datesales = db.getclass( "DateSales" );
// retrieve DateSales root node from database
es::mbn n_datesales = db.getnode( c_datesales );
// construct field-bindings and iterator for DateSales
es::mbb b_datesales( c_datesales );
es::mbi i_datesales( b_datesales );
uint32_t f_dateid, f_totalunitsales;
b_datesales.bind_v( "DateId", &f_dateid );
b_datesales.bind_v( "TotalUnitSales", &f_totalunitsales );
// construct membership filter for DateId
es::mbm m_dateid( es::mbm::eIN );
m_dateid.addrange( lowdateid.getid(), highdateid.getid() );
// add membership filter to DateSales field-binding
b_datesales.bind( "DateId", m_dateid );
// iterator for DateSales node
std::cout << "Date,TotalUnitSales" << std::endl;
for( bool r = i_datesales.beginfetch( n_datesales );r; r = i_datesales.incfetch() ) {
std::cout << es::date( f_dateid ) << "," << f_totalunitsales << std::endl;
}
}
int main( int argc, char **argv )
{
try {
// initalize date calculator
es::date::init( 2007, 2010 );
// construct database definition
es::mbd def = genschema();
// construct empty database from definition
es::mb db( def );
// populate Product root node and string dictionary of product names
es::sd s_product( SDCHUNKSIZE, true );
genproduct( db, s_product );
// populate DateSales root and child nodes
gensales( db );
// run query to determine the 5 most profitable products for sales
// to a particular customer and range of dates (in this case the whole of 2008)
es::date lowdate( 2008, 1, 1 );
es::date highdate( 2008, 12, 31 );
queryprofit( db, s_product, lowdate, highdate, 20 );
// repeat aggregator query but using query expression language (mbq)
queryprofitq( db, s_product, lowdate, highdate, 20 );
// print total unit sales for dates in some range (in this case April 2007)
lowdate = es::date( 2007, 4, 1 );
highdate = es::date( 2007, 4, 30 );
queryunitsales( db, lowdate, highdate );
// recover resources for date calculator
es::date::destroy();
} catch ( es::errmsg &e ) {
std::cerr << "error code=" << e.geterrorcode()
<< " msg=" << e.geterrormsg() << std::endl;
} catch ( es::err &e ) {
std::cerr << "error code=" << e.geterrorcode() << std::endl;
}
return 0;
}