2012年9月5日 星期三

Three ways when getting data from Hypertable


Normally , we got data from Hypertable via HQL. Interesting !! when I try to parser data from ThriftBroker  (with C++), there are 3 easy ways to retrieve it.

[Preparing]

First ,  you should build table and data. I used stock daily information for example:

(1) Download daily data and convert dos text file into unix one
#Hypertable is only satisfied with Unix-Ascii text file(no CRLF format)
      yum install dos2unix
      dos2unix -n 2888.tsv 2888unix.tsv
      head 6 2888unix2.tsv
#date   price:open      price:high      price:low       price:close     vol
20110627        11.75   11.95   11.7    11.95   23942
20110628        11.95   12      11.8    11.8    21213
20110629        11.9    12.15   11.85   12.1    39928
20110630        12.1    12.4    12.05   12.35   65627
20110701        12.45   12.6    12.35   12.35   55655

  /opt/hypertable/current/bin/ht shell
  hypertable> create table candle_daily2(price,vol);
  hypertable> LOAD DATA INFILE ROW_KEY_COLUMN=date "~/2888unix2.tsv" INTO TABLE candle_daily2;


Loading 10,357 bytes of input data...

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
Load complete.


[Coding]



#include "Common/Compat.h"
#include "Common/System.h"

#include <iostream>
#include <fstream>

#include <netinet/in.h>
#include "ThriftBroker/Client.h"
#include "ThriftBroker/gen-cpp/HqlService.h"
#include "ThriftBroker/ThriftHelper.h"
#include "ThriftBroker/SerializedCellsReader.h"

using namespace Hypertable;
using namespace Hypertable::ThriftGen;

void run(Thrift::Client *client);
void test_hql(Thrift::Client *client, std::ostream &out);
void test_scan(Thrift::Client *client, std::ostream &out);
void test_getc(Thrift::Client *client, std::ostream &out);


int main() {
  Thrift::Client *client = new Thrift::Client("localhost", 38080);
  run(client);
}


void run(Thrift::Client *client) {
  try {
    std::ostream &out = std::cout;
    out << "running test_hql" << std::endl;
    test_hql(client, out);
    out << "running test_scan" << std::endl;
    test_scan(client, out);
    out << "running test_getc" << std::endl;
    test_getc(client, out);
  }
  catch (ClientException &e) {
    std::cout << e << std::endl;
    exit(1);
  }
}

void test_hql(Thrift::Client *client, std::ostream &out) {
  HqlResult result;
  if (!client->namespace_exists("quote"))
  {
    out << "no quote namespace exist" << std::endl;
    return;
  }
  Namespace ns = client->namespace_open("quote");
  if(client->table_exists(ns,"candle_daily2"))
  {
    HqlResultAsArrays result_as_arrays;
    client->hql_query_as_arrays(result_as_arrays, ns, "select * from candle_daily2 where row =^ '20110627'");

    out << result_as_arrays.cells[0] << std::endl << "****" << std::endl;
    for(unsigned int i = 0 ; i < 2 ; i++)
      for(unsigned int j = 0 ; j < result_as_arrays.cells[i].size() ; j++)
      out << result_as_arrays.cells[i][j] << std::endl << "****" << std::endl;
  }

  client->namespace_close(ns);
}

void test_scan(Thrift::Client *client, std::ostream &out) {
  ScanSpec ss;
  Namespace ns = client->namespace_open("quote");

/*default ss for all records inside specified table 'quote'*/
/*ScanSpec params just like 'where' conditions */
//  ss.cell_limit=1;
//  ss.__isset.cell_limit = true;
  ss.row_regexp = "^20110627";
  ss.__isset.row_regexp = true;
//  ss.value_regexp = "^v[24].*";
//  ss.__isset.value_regexp = true;
//  ss.columns.push_back("col");
//  ss.__isset.columns = true;

  Scanner s = client->open_scanner(ns, "candle_daily2", ss);
  std::vector<Hypertable::ThriftGen::Cell> cells;

  do {
    client->scanner_get_cells(cells, s);
      for(unsigned int i = 0 ; i < cells.size() ; i++)
      out << cells[i] << std::endl;
  } while (cells.size());

  client->scanner_close(s);

  client->namespace_close(ns);
}

void test_getc(Thrift::Client *client, std::ostream &out) {
  Namespace ns = client->namespace_open("quote");
  /*Hypertable::ThriftGen::Cell cell;*/
  std::string value;
  client->get_cell(value,ns, "candle_daily2", "20110627","price:open");
  out << value << std::endl;


  client->namespace_close(ns);
}



[Result]

running test_hql
{CellAsArray: row='20110627' cf='price' cq='close' value='11.95' ts=1346808356688330004}
****
20110627
****
price
****
close
****
11.95
****
1346808356688330004
****
20110627
****
price
****
high
****
11.95
****
1346808356688330002
****
running test_scan
{Cell: {Key: row='20110627' cf='price' cq='close' ts=1346808356688330004 rev=1346808356688330004 flag=255} value='11.95'}
{Cell: {Key: row='20110627' cf='price' cq='high' ts=1346808356688330002 rev=1346808356688330002 flag=255} value='11.95'}
{Cell: {Key: row='20110627' cf='price' cq='low' ts=1346808356688330003 rev=1346808356688330003 flag=255} value='11.7'}
{Cell: {Key: row='20110627' cf='price' cq='open' ts=1346808356688330001 rev=1346808356688330001 flag=255} value='11.75'}
{Cell: {Key: row='20110627' cf='vol' cq='' ts=1346808356688330005 rev=1346808356688330005 flag=255} value='23942'}
running test_getc
11.75


沒有留言:

張貼留言

文章分類