// // Copyright (C) 2012 by Richard Brooks // The file is part of the exstreamspeed library // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // #include #include /* * Example program to explore the basics of the core database API. * The program does the following: * 1) Define and populate a sales database that records unit and dollar * sales by date, product and customer. Maintains aggregate unit sales by date. * 2) Query database to compute the top 5 most profitable products * for sales against a specific customer and range of dates. * 3) Query database to compute total unit sales by date for a range of dates * * genschema() explores how to define a database definition. Uses es_mbd and * es_mbc interfaces. * * genproduct() explores how to update a database root node using a string key field * using the es_mb, es_mbb, es_mbi and es_sd interfaces: * * gensales() explores how to update a database node hierarchy. Shows how to * construct new child nodes and how to update a database field incrementally * using binding by-reference. * Uses the es_mb, es_mbb, es_mbi, es_mbn and es_date interfaces. * * queryprofit() explores how to run complex queries against a database using * a database aggregator, membership filter and sorted iterator. * Uses es_mbm, es_mbd, es_mb, es_mba, es_mbb, es_mbs and es_sd interfaces. * * queryunitsales() explores how to run simpler queries against a database using * a filtered iterator. Uses es_mb, es_mbm, es_mbb, es_mbi and es_date interfaces. */ // constants driving size of database construction #define NUMPRODUCT 1000 #define NUMCUSTOMER 250 #define NUMDATES 600 #define NUMSALESPERDATE 10000 // buffer sizes #define SDCHUNKSIZE 4096 #define EBUFSIZE 1024 #define QPLANSIZE 4096 es::mbd genschema() { // construct new empty schema es::mbd def; // construct root class Product( ProductId, ProductCost ) that maintains // product production cost by product-id es::mbc c_product = def.newclass( "Product", 1000, es::mbd::eNONE ); c_product.addfld( "ProductId", es::mbd::eI16 ); c_product.addfld( "ProductCost", es::mbd::eF64 ); c_product.addfldidx( "ProductId" ); // construct root class DateSales( DateId, TotalUnitSales ) that maintains // aggregate unit sales sales by date-id es::mbc c_datesales = def.newclass( "DateSales", 200, es::mbd::eNONE ); c_datesales.addfld( "DateId", es::mbd::eI32 ); c_datesales.addfld( "TotalUnitSales", es::mbd::eI32 ); c_datesales.addfldidx( "DateId" ); // construct child class Sales(ProductId,CustomerId,UnitSales,DollarSales) that // maintains the break-down of unit and dollar sales by product and customer // corresponding to a particular date es::mbc c_sales = def.newclass( "Sales", 2000, es::mbd::eSHARED, c_datesales ); c_sales.addfld( "ProductId", es::mbd::eI16 ); c_sales.addfld( "CustomerId", es::mbd::eI16 ); c_sales.addfld( "UnitSales", es::mbd::eI32 ); c_sales.addfld( "DollarSales", es::mbd::eF64 ); c_sales.addfldidx( "ProductId" ); c_sales.addfldidx( "CustomerId" ); return def; } void genproduct( es::mb& db, es::sd& s_prod ) { // // Populate Product root node and string dictionary // // retrieve Product class deinition from database es::mbc c_product = db.getclass( "Product" ); // retrieve Product root node from database es::mbn n_product = db.getnode( c_product ); // construct field-binding for Product class definition es::mbb b_product( c_product ); // construct iterator from Product field-binding es::mbi i_product( b_product ); // add a binding for ProductId uint16_t f_productid; b_product.bind_v( "ProductId", &f_productid ); // add a binding for ProductCost double f_productcost; b_product.bind_v( "ProductCost", &f_productcost ); // generate NUMPRODUCT products char prodname[256]; unsigned i; for( i=0; i != NUMPRODUCT; ++i ) { // generate product name sprintf( prodname, "Product_%03d", i ); // add to string dictionary - ProductId is assigned the string-id unsigned idx; s_prod.add( &idx, prodname ); f_productid = idx; // derive production cost f_productcost = (double)(f_productid % 100); // update Product node with ProductId and ProductCost i_product.findupdate( n_product ); } } void gensales( es::mb& db ) { // // Populate DateSales and Sales // std::cout << "Generating " << NUMDATES << " dates x " << NUMSALESPERDATE << " sales " << "=> " << (NUMDATES*(1+NUMSALESPERDATE)) << " random-access updates" << std::endl << std::endl; // retrieve DateSales and Sales class definitions es::mbc c_datesales = db.getclass( "DateSales" ); es::mbc c_sales = db.getclass( "Sales" ); // retrieve DateSales root node from database es::mbn n_datesales = db.getnode( c_datesales ); // construct field-bindings from DateSales and Sales class definition es::mbb b_datesales( c_datesales ); es::mbb b_sales( c_sales ); // construct iterators from DateSales and Sales field-bindings es::mbi i_datesales( b_datesales ); es::mbi i_sales( b_sales ); // add field-bindings for DateId uint32_t f_dateid; b_datesales.bind_v( "DateId", &f_dateid ); // add binding by-reference for TotalUnitSales as we're going to update this // derived aggregate field incrementally uint32_t *f_totalunitsales; b_datesales.bind_r( "TotalUnitSales", &f_totalunitsales ); // add field-binding by-reference for Sales child node es::mbnref f_sales; b_datesales.bind_r( "Sales", f_sales ); // add field bindings for ProductId, CustomerId, UnitSales and DollarSales uint16_t f_productid, f_customerid; uint32_t f_unitsales; double f_dollarsales; b_sales.bind_v( "ProductId", &f_productid ); b_sales.bind_v( "CustomerId", &f_customerid ); b_sales.bind_v( "UnitSales", &f_unitsales ); b_sales.bind_v( "DollarSales", &f_dollarsales ); // loop over NUMDATES calendar days starting from 4/14/2007 f_dateid = es::date( 2007, 4, 14 ).getid(); unsigned i; for( i=0; i != NUMDATES; ++i, ++f_dateid ) { // update row in DateSales i_datesales.findupdate( n_datesales ); // construct Sales child node if missing if ( f_sales.isnodenull() ) { f_sales.setnode( db.newnode( c_sales ) ); } // each date has the same set of NUMSALESPERDATE combinations of ProductId // and CustomerId, but different UnitSales and DollarSales unsigned i; uint32_t unitsale = f_dateid%77; f_productid = 17; f_customerid = 43; for( i=0; i!= NUMSALESPERDATE; ++i ) { // construct ProductId, CustomerId, UnitSales, DollarSales if ( ++f_productid == NUMPRODUCT ) { f_productid = 1; } if ( ++f_customerid == NUMCUSTOMER ) { f_customerid = 1; } f_unitsales = unitsale+i%49; f_dollarsales = f_unitsales*(13.0+i%17); // update aggregate unit sales for this DateId *f_totalunitsales += f_unitsales; // update this Sales child i_sales.findupdate( f_sales.getnode() ); } } } void queryprofit( es::mb &db, es::sd& s_product, const es::date& lowdateid, const es::date& highdateid, int customerid ) { // // Define and execute a database aggregator to run query // std::cout << "Compute top 5 most profitable ProductIds for " << "CustomerId=" << customerid << " from " << lowdateid.getcomp() << " to " << highdateid.getcomp() << std::endl << std::endl; // construct membership filter for DateId es::mbm m_dateid( es::mbm::eIN ); m_dateid.addrange( lowdateid.getid(), highdateid.getid() ); // construct membership filter for CustomerId es::mbm m_customerid( es::mbm::eIN ); m_customerid.add( customerid ); // define result database definition and class to store query results es::mbd rdef; es::mbc c_res = rdef.newclass( "Result", 200, es::mbd::ePRIVATE ); // construct query aggregator against input database definition es::mba agg( db ); // add ProductId as key of query result agg.addkey( "ProductId" ); // add membership filters for DateId and CustomerId to aggregator agg.addmembership( "DateId", m_dateid ); agg.addmembership( "CustomerId", m_customerid ); // define derived value Profit agg.addformula( "Profit", "DollarSales - UnitSales * ProductCost", es::mbd::eF64 ); // add Profit as an aggregate result field agg.addresult( "Profit", es::mba::eSUM ); // prepare and display the query plan for the aggregator std::string queryplan; agg.prepare( rdef, "Result", queryplan ); std::cout << "Aggregator Query Plan:" << std::endl << queryplan << std::endl; // construct an empty result database with which to fill query results es::mb rdb( rdef ); // execute query against input and result databases agg.exec( db, rdb ); // // print top 5 most profitable products // // construct binding for ProductId and Profit fields in Result class es::mbb b_res( c_res ); es::mbn n_res = rdb.getnode( c_res ); uint16_t f_productid; double f_profit; b_res.bind_v( "ProductId", &f_productid ); b_res.bind_v( "Profit", &f_profit ); // construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the // first 5 entries and don't want to sort the entire node es::mbs s_res( b_res, es::mbs::eBOTTOM ); s_res.add( "Profit" ); // extract first five entries std::fixed( std::cout ); std::cout << "Rank,Product,Profit" << std::endl; int i; bool r = s_res.beginfetch( n_res ); for( i=0; i!=5 && r ; ++i ) { // get name of product from string dictionary by id const char *prodname = s_product.getstr( f_productid ); // print report line std::cout << i+1 << "," << prodname << "," << f_profit << std::endl; // get next most profitable product r = s_res.incfetch(); } std::cout << std::endl; } void queryprofitq( es::mb &db, es::sd& s_product, const es::date& lowdateid, const es::date& highdateid, int customerid ) { // // Define and execute a database query expression to run query // std::cout << "Compute top 5 most profitable ProductIds for " << "CustomerId=" << customerid << " from " << lowdateid.getcomp() << " to " << highdateid.getcomp() << std::endl << std::endl; // construct query aggregator against input database definition es::mbq qry; // define membership query string for date filter qry << "mbm( id=DateF memtype=in " << "addrange( low=" << (long)lowdateid.getid() << " high= " << (long)highdateid.getid() << "))\n"; // define membership query string for customer filter qry << "mbm( id=CustomerF memtype=in add( value=" << (long)customerid << "))\n"; // define result mbc qry << "mbc( id=Result batch=200 index=private)\n"; /* define aggregation query */ qry << "mba( refmbc=Result " << "addmembership( field=DateId refmbm=DateF ) " << "addmembership( field=CustomerId refmbm=CustomerF ) " << "addformula( field=Profit type=f64 " << " def=\"DollarSales - UnitSales * ProductCost\") " << "addkey( field=ProductId ) " << "addresult( field=Profit aggtype=sum ))"; // prepare and print query plan es::mbd idef = db.getdefinition(); std::string queryplan; qry.prepare( idef, queryplan ); std::cout << "Aggregator Query Plan Derived From Query Language Expression:" << std::endl << queryplan << std::endl; /* execute query */ es::mb rdb = qry.exec( db ); // // print top 5 most profitable products // // construct binding for ProductId and Profit fields in Result class es::mbc c_res( rdb.getclass( "Result" ) ); es::mbb b_res( c_res ); es::mbn n_res = rdb.getnode( c_res ); uint16_t f_productid; double f_profit; b_res.bind_v( "ProductId", &f_productid ); b_res.bind_v( "Profit", &f_profit ); // construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the // first 5 entries and don't want to sort the entire node es::mbs s_res( b_res, es::mbs::eBOTTOM ); s_res.add( "Profit" ); // extract first five entries std::fixed( std::cout ); std::cout << "Rank,Product,Profit" << std::endl; int i; bool r = s_res.beginfetch( n_res ); for( i=0; i!=5 && r ; ++i ) { // get name of product from string dictionary by id const char *prodname = s_product.getstr( f_productid ); // print report line std::cout << i+1 << "," << prodname << "," << f_profit << std::endl; // get next most profitable product r = s_res.incfetch(); } std::cout << std::endl; } void queryunitsales( es::mb& db, const es::date& lowdateid, const es::date& highdateid ) { std::cout <<"Compute TotalUnitSales by date from " << lowdateid.getcomp() << " to " << highdateid.getcomp() << std::endl << std::endl; // retrieve DateSales class definition es::mbc c_datesales = db.getclass( "DateSales" ); // retrieve DateSales root node from database es::mbn n_datesales = db.getnode( c_datesales ); // construct field-bindings and iterator for DateSales es::mbb b_datesales( c_datesales ); es::mbi i_datesales( b_datesales ); uint32_t f_dateid, f_totalunitsales; b_datesales.bind_v( "DateId", &f_dateid ); b_datesales.bind_v( "TotalUnitSales", &f_totalunitsales ); // construct membership filter for DateId es::mbm m_dateid( es::mbm::eIN ); m_dateid.addrange( lowdateid.getid(), highdateid.getid() ); // add membership filter to DateSales field-binding b_datesales.bind( "DateId", m_dateid ); // iterator for DateSales node std::cout << "Date,TotalUnitSales" << std::endl; for( bool r = i_datesales.beginfetch( n_datesales );r; r = i_datesales.incfetch() ) { std::cout << es::date( f_dateid ) << "," << f_totalunitsales << std::endl; } } int main( int argc, char **argv ) { try { // initalize date calculator es::date::init( 2007, 2010 ); // construct database definition es::mbd def = genschema(); // construct empty database from definition es::mb db( def ); // populate Product root node and string dictionary of product names es::sd s_product( SDCHUNKSIZE, true ); genproduct( db, s_product ); // populate DateSales root and child nodes gensales( db ); // run query to determine the 5 most profitable products for sales // to a particular customer and range of dates (in this case the whole of 2008) es::date lowdate( 2008, 1, 1 ); es::date highdate( 2008, 12, 31 ); queryprofit( db, s_product, lowdate, highdate, 20 ); // repeat aggregator query but using query expression language (mbq) queryprofitq( db, s_product, lowdate, highdate, 20 ); // print total unit sales for dates in some range (in this case April 2007) lowdate = es::date( 2007, 4, 1 ); highdate = es::date( 2007, 4, 30 ); queryunitsales( db, lowdate, highdate ); // recover resources for date calculator es::date::destroy(); } catch ( es::errmsg &e ) { std::cerr << "error code=" << e.geterrorcode() << " msg=" << e.geterrormsg() << std::endl; } catch ( es::err &e ) { std::cerr << "error code=" << e.geterrorcode() << std::endl; } return 0; }