0 Replies Latest reply: Dec 16, 2013 4:57 AM by user11140167 RSS

    High performance CSV export

    user11140167

      Hi,

       

      I have been looking for a way to export huge datasets to CSV format for import with MySql.

      As sqlplus SPOOL turned out to be slow and difficult to use, I ended up coding my own importer using the OCCI library.

       

      I successfully compiled my code under Red Hat Enterprise Linux 6, 64 bit, and Oracle Instant Client 12.1. But as it only uses C++ STL and OCCI, it should as well build under other environments (e.g. Windows).

       

      Here's the code:

      /**
       * ociexport.cpp - High performance ORACLE to CSV export (using OCCI and STL)
       *
       * This program takes the ORACLE connection information from the following
       * environment variables:
       *
       * ORACLE_USER, ORACLE_PASS, ORACLE_CONN
       *
       * Fields are exported in a CSV with delimiter ';'. Semicola within the fields
       * are escaped with '\;'. Newlines and tabs in the fields are replaced by blanks.
       * Empty fields and NULL values are exported as '\N' for easy MySQL import.
       * CLOBS are exported as well. BLOBS and BFILES are not supported atm.
       *
       * Don't forget to set the LD_LIBRARY_PATH to your Oracle Client libs when
       * building and running this program.
       *
       * @param $1 Select-Statement
       * @param $2 Output file (optional - if empty, output goes to stdout)
       *
       * @author Bert Klauninger
       * @version 0.1.0
       * @changelog
       *    2013-12-13 - Created
       *    2013-12-16 - Added CLOB streaming support
       */
      
      #include <cstdlib>
      #include <cstring>
      #include <fstream>
      #include <iostream>
      #include <string>
      #include <vector>
      
      #include "occi.h"
      
      using namespace oracle::occi;
      using namespace std;
      
      #define BUFFER_SIZE        1024                        // Maximal characters of a LOB to be exported
      
      
      /**
       * Escape all occurrences of ';' and convert newlines and tabs to blanks
       */
      string csv_escape(string src) {
          string result;
      
          const int imax = src.length();
          for (int i = 0; i < imax; ++i) {
              switch (src[i]) {
                  case '\n':
                  case '\r':
                  case '\t':
                      result += ' ';
                      break;
      
                  case ';':
                      result += '\\';
                      result += ';';
                      break;
      
                  default:
                      result += src[i];
              }
          }
          return result;
      }
      
      /**
       * Execute a query and write CSV to the given file.
       *    NB: Empty fields are exported as NULL values!
       */
      void select_into(Connection *con, string sql, string file) {
          if (! con) {
              return;
          }
      
          bool fo = ! file.empty();
          ofstream o;
          if (fo) o.open(file.c_str(), ofstream::out);
      
          Statement *s = con->createStatement(sql);
          ResultSet *r = s->executeQuery();
          vector<MetaData> m = r->getColumnListMetaData();
          const int cols = m.size();
      
          if (fo) cout << "Result has " << cols << " cols" << endl;
          int cnt = 0;
          while (r->next()) {
              string line;
              ++cnt;
      
              /* Stupid ORACLE starts numbering at 1, not 0 */
              for (int i = 1; i <= cols; ++i) {
                  string col;
      
                  /* ...but: Metadata vector starts at 0 */
                  int t = m[i - 1].getInt(MetaData::ATTR_DATA_TYPE);
      
                  if (t == OCCI_SQLT_CLOB) {
                      /* Get the CLOB object via stream */
                      Clob clob = r->getClob(i);
                      if (! clob.isNull()) {
                          clob.open(OCCI_LOB_READONLY);
                          int len = clob.length();
      
                          Stream *instream = clob.getStream();
                          char *buffer = new char[BUFFER_SIZE];
                          memset(buffer, 0, BUFFER_SIZE);
                          int r = 0;
                          do {
                              r = instream->readBuffer(buffer, len);
                              for (int i = 0; i < r; ++i) {
                                  col += (char) buffer[i];
                              }
                          } while (r != -1);
      
                          delete[] buffer;
                          clob.closeStream(instream);
                          clob.close();
                      }
      
                  } else if (! r->isNull(i)) {
                      /* Try to get field value as string */
                      col = r->getString(i);
                  }
      
                  if (col.empty()) {
                      line += "\\N";
                  } else {
                      line += csv_escape(col);
                  }
                  if (i < cols) {
                      line += ';';
                  }
              }
      
              if (fo) {
                  o << line << endl;
              } else {
                  cout << line << endl;
              }
          }
      
          s->closeResultSet(r);
          con->terminateStatement(s);
          if (fo) cout << cnt << " rows exported" << endl;
      }
      
      
      /*** MAIN ***/
      
      int main (int argc, char* argv[]) {
          if (! (argc == 2 || argc == 3)) {
              cerr << "Usage: " << argv[0] << " sql-statement [output-file.csv]" << endl << endl;
              cerr << "Output file uses column separator ';'. Semicola are escaped using '\\;'." << endl;
              cerr << "NULL values and empty fields are exported as '\\N'." << endl;
              cerr << "If no output file is specified, quieted output is sent to stdout." << endl;
              cerr << "Login credentials can be set via the following environment variabes:" << endl;
              cerr << "ORACLE_USER, ORACLE_PASS, ORACLE_CONN" << endl << endl;
              return 1;
          }
      
          const string user = getenv("ORACLE_USER");
          const string pass = getenv("ORACLE_PASS");
          const string osid = getenv("ORACLE_CONN");
          const string sql = argv[1];
          const string outfile = (argc == 3) ? argv[2] : "";
          bool fo = ! outfile.empty();
      
          Environment* env = Environment::createEnvironment(Environment::DEFAULT);
          int ret = 0;
      
          try {
              if (fo) cout << "Connecting as " << user << "@" << osid << endl;
              Connection* const con = env->createConnection(user, pass, osid);
              if (fo) {
                  cout << "Executing query " << sql << endl;
                  cout << "Writing results to " << outfile << endl;
              }
              select_into(con, sql, outfile);
              if (fo) cout << "Closing connection" << endl;
              env->terminateConnection(con);
      
          } catch (SQLException ea) {
              cerr << "Error: " << ea.what();
              ret = 1;
          }
      
          Environment::terminateEnvironment(env);
          return ret;
      }
      
      

       

      My Makefile:

      ociexport: ociexport.cpp

              LD_LIBRARY_PATH=/usr/lib/oracle/12.1/client64/lib \

                      gcc -Wall -g -I /usr/include/oracle/12.1/client64 \

                      -L /usr/lib/oracle/12.1/client64/lib \

                      -o bin/ociexport ociexport.cpp \

                      -lclntsh -lnnz12 -locci

       

      Program call:

      $ export LD_LIBRARY_PATH=/usr/lib/oracle/12.1/client64/lib

      $ export ORACLE_USER=your_user

      $ export ORACLE_PASS=your_pass

      $ export ORACLE_CONN=your_connection_string

      $ ./ociexport "SELECT * FROM applications" > export.csv


       

      Best regards

      Bert