1 Reply Latest reply on Apr 25, 2016 12:58 PM by Yoda-Oracle

    Code sample for dax_extract() function

    kayhand

      Hi all,

       

      I wanted to share a simple code sample for the dax_extract() function in the DAX API.

       

      I didn't have the facility for creating an OZIP compressed stream, so for this example I tried to create it by hand by assigning specific values to specific byte addresses (see create_ozip() function below).

      Seems like first 16 bytes of an OZIP compressed stream consists of some signature values, after that actual values about compressed data starts.

      Starting from index 17, it keeps the number of values in the dictionary, the widths of the dictionary symbols, then real symbol values and number of code words in the data. Finally, the end of the stream consists of the actual compressed data.

       

      In this example, I used a really simple data set which has only 4 distinct values (8 elements in total), thus OZIP needs 2-bit compression. dax_extract() function takes the OZIP compressed stream as input and the program prints out the uncompressed values.

       

      I also have a question related with the dax_scan() operation :

       

      -- Can we do predicate checking directly on ozip compressed data or we always need to feed it with an uncompressed stream (i.e., the result of the dax_extract() function) ?

       

      Best,

      Kayhan

       

      #include "dax.h"
      #include "dax_query.h"
      #include <stdio.h>
      #include <stdlib.h>
      #include <unistd.h>
      
      /*
       Creates a sample OZIP compressed stream
      */
      void create_ozip(int8_t **data_stream){
        int errno;
        printf("Creating a sample OZIP compressed data stream ...\n\n");
      
        uint8_t byte0 = 0x4f;
        uint8_t byte1 = 0x5a;
        uint8_t byte2 = 0x49;
        uint8_t byte3 = 0x50;
        uint8_t byte4 = 0x00;
        uint8_t byte5 = 0x06;
        uint8_t byte6 = 0x00;
      
        *data_stream = (int8_t *) memalign(64, sizeof(uint8_t));
        (*data_stream)[0] = byte0;
        (*data_stream)[1] = byte1;
        (*data_stream)[2] = byte2;
        (*data_stream)[3] = byte3;
        (*data_stream)[4] = byte4;
        (*data_stream)[5] = byte5;
        (*data_stream)[6] = byte6;
      
        size_t i;
        for(i = 7; i < 16; i++){
          (*data_stream)[i] = byte6;
        }
      
        /* Number of symbols in the dictionary (4) */
        (*data_stream)[i++] = 0x00;
        (*data_stream)[i++] = 0x04;
      
        //Symbol widths
        //Ford : 4 , VW : 2
        (*data_stream)[i++] = 0x42;
        //BMW : 3 , Mercedes : 8
        (*data_stream)[i++] = 0x38;
      
        // Fill Symbol data//
        
        //Ford
        (*data_stream)[i++] = 0x46;
        (*data_stream)[i++] = 0x6f;
        (*data_stream)[i++] = 0x72;
        (*data_stream)[i++] = 0x64;
      
          //VW
        (*data_stream)[i++] = 0x56;
        (*data_stream)[i++] = 0x57;
      
          //BMW
        (*data_stream)[i++] = 0x42;
        (*data_stream)[i++] = 0x4d;
        (*data_stream)[i++] = 0x57;
      
          //Mercedes
        (*data_stream)[i++] = 0x4d;
        (*data_stream)[i++] = 0x65;
        (*data_stream)[i++] = 0x72;
        (*data_stream)[i++] = 0x63;
        (*data_stream)[i++] = 0x65;
        (*data_stream)[i++] = 0x64;
        (*data_stream)[i++] = 0x65;
        (*data_stream)[i++] = 0x73;
      
        //Number of code words(or values in data)  (8)
        (*data_stream)[i++] = 0x00;
        (*data_stream)[i++] = 0x00;
        (*data_stream)[i++] = 0x00;
        (*data_stream)[i++] = 0x08;
      
         //Compressed data
         //0001 0101
        (*data_stream)[i++] = 0x15;
        //0010 1110
        (*data_stream)[i++] = 0x2e;
      }
      
      
      int main(int argc, char * argv[])
      {
        /*Create OZIP stream*/
        int8_t *compressed_stream;
        create_ozip(&compressed_stream);
      
        dax_query_api_1_0_t *my_dax_api = (dax_query_api_1_0_t *) dax_query_init(1, 0);
        my_dax_api->dax_query_set_log_level(4);
      
        dax_enc_ctx_t *src_ctx_comp = (dax_enc_ctx_t *) malloc(sizeof(dax_enc_ctx_t));
        src_ctx_comp->enc_flags = 0x800;
        src_ctx_comp->data_width = 2;
        src_ctx_comp->len_width = 0;
        src_ctx_comp->flag_width = 0;
      
        dax_vec_t *comp_source = (dax_vec_t *) memalign(64, sizeof(dax_vec_t));
        comp_source->data_stream = (uint8_t *) compressed_stream;
        comp_source->enc_ctx = src_ctx_comp;
        comp_source->nrows = 8;
      
        dax_vec_t *comp_result = (dax_vec_t *) memalign(64, sizeof(dax_vec_t));
        comp_result->enc_ctx = src_ctx_comp;
        comp_result->data_stream = (uint8_t *) memalign(64, sizeof(uint8_t));
        comp_result->nrows = 8;
      
        int8_t errno = my_dax_api->dax_extract(comp_result, comp_source);
      
        printf("%s\n\n", comp_result->data_stream);
        printf("DAX EXTRACT Return Code: %s %d\n\n", dax_strerror(errno), errno);
      
        free(compressed_stream);
        free(src_ctx_comp);
        free(comp_source);
        free(comp_result->data_stream);
        free(comp_result);
      }