This discussion is archived
9 Replies Latest reply: Nov 13, 2012 10:21 AM by Mark Kelly RSS

Limit number of cases in k-means

762592 Newbie
Currently Being Moderated
Dear sirs,

I am trying to run a k-means model.
My databse has 2.800.000 records but the result I obtain only contains 1.000.000.

Is there any limit on the number of records considered?

Thank you.

Best regards.
  • 1. Re: Limit number of cases in k-means
    604934 Oracle ACE Director
    Currently Being Moderated
    Please check whether you have enabled sampling option somewhere in model build phase
  • 2. Re: Limit number of cases in k-means
    762592 Newbie
    Currently Being Moderated
    Sample option is not enabled...
    Do you have another idea?

    Thank you so much.
  • 3. Re: Limit number of cases in k-means
    604934 Oracle ACE Director
    Currently Being Moderated
    Let me just rephrase the question to check whether I got th problem correctly. You have 28000000 records and apply a k-means clustering model on it. But as the resulting output you only see 1000000 records assigned to a cluster and other 27 million is missing

    Is it right?
  • 4. Re: Limit number of cases in k-means
    762592 Newbie
    Currently Being Moderated
    I have 2.8 million records. Running the model only one million records are assigned, while the other 1.8 million are not assigned...

    You can check the print screen of the results I obtain by following this link:
    http://feupload.fe.up.pt/get/qdykVtm9PxFyLig

    Sorry if I was not able to explain my issue.

    Thank you.
  • 5. Re: Limit number of cases in k-means
    Mark Kelly Oracle ACE
    Currently Being Moderated
    Hi,
    Yes, you are correct, clustering will only take in 1,000,000 rows.
    I am quite sure that it applies a sampling technique that make this sufficient.
    Thanks, Mark
  • 6. Re: Limit number of cases in k-means
    770523 Newbie
    Currently Being Moderated
    send me the your code,
    i will check whats wrong with yours program.
    Regards
  • 7. Re: Limit number of cases in k-means
    770523 Newbie
    Currently Being Moderated
    send me the your code,
    i will check whats wrong with yours program.
    Regards
  • 8. Re: Limit number of cases in k-means
    773171 Newbie
    Currently Being Moderated
    Hi,

    My code is as follows:

    CREATE PACKAGE "K_MEANS" AUTHID DEFINER AS

    PROCEDURE "SONAE_K_MEANS267445543_BA"(case_table IN VARCHAR2 DEFAULT '"SYSTEM"."DM_DATA_MINER"',
    additional_table_1 IN VARCHAR2 DEFAULT NULL,
    model_name IN VARCHAR2 DEFAULT 'DM_DATA_MINER52153_CL',
    feature_table IN VARCHAR2 DEFAULT NULL,
    mapping_table IN VARCHAR2 DEFAULT NULL,
    drop_model IN BOOLEAN DEFAULT FALSE);

    END;


    /
    CREATE PACKAGE BODY "K_MEANS" AS

    c_long_sql_statement_length CONSTANT INTEGER := 32767;

    SUBTYPE SQL_STATEMENT_TYPE IS VARCHAR2(32767);
    SUBTYPE LONG_SQL_STATEMENT_TYPE IS DBMS_SQL.VARCHAR2A;

    TYPE TABLE_ARRAY is TABLE OF VARCHAR2(62);
    TYPE LSTMT_REC_TYPE IS RECORD (
    lstmt dbms_sql.VARCHAR2A,
    lb BINARY_INTEGER DEFAULT 1,
    ub BINARY_INTEGER DEFAULT 0);
    TYPE LSTMT_REC_TYPE_ARRAY is TABLE OF LSTMT_REC_TYPE;
    TYPE QUERY_ARRAY is TABLE OF SQL_STATEMENT_TYPE;
    TYPE TARGET_VALUES_LIST IS TABLE OF VARCHAR2(32);
    TYPE VALUE_COUNT_LIST IS TABLE OF NUMBER;

    PROCEDURE dump_varchar2a(vc2a dbms_sql.VARCHAR2A) IS
    v_str varchar2(32767);
    BEGIN
    DBMS_OUTPUT.PUT_LINE('dump_varchar2a:');
    FOR i IN 1..vc2a.COUNT LOOP
    v_str := vc2a(i);
    DBMS_OUTPUT.PUT_LINE(v_str);
    END LOOP;
    END;

    PROCEDURE ls_append(
    r_lstmt IN OUT NOCOPY LSTMT_REC_TYPE,
    p_txt VARCHAR2)
    IS
    BEGIN
    r_lstmt.ub := r_lstmt.ub + 1;
    r_lstmt.lstmt(r_lstmt.ub) := p_txt;
    END ls_append;

    PROCEDURE ls_append(
    r_lstmt IN OUT NOCOPY LSTMT_REC_TYPE,
    p_txt LSTMT_REC_TYPE) IS
    BEGIN
    FOR i IN p_txt.lb..p_txt.ub LOOP
    r_lstmt.ub := r_lstmt.ub + 1;
    r_lstmt.lstmt(r_lstmt.ub) := p_txt.lstmt(i);
    END LOOP;
    END ls_append;

    FUNCTION query_valid(
    p_query VARCHAR2) RETURN BOOLEAN
    IS
    v_is_valid BOOLEAN;
    BEGIN
    BEGIN
    EXECUTE IMMEDIATE p_query;
    v_is_valid := TRUE;
    EXCEPTION WHEN OTHERS THEN
    v_is_valid := FALSE;
    END;
    RETURN v_is_valid;
    END query_valid;

    FUNCTION table_exist(
    p_table_name VARCHAR2) RETURN BOOLEAN IS
    BEGIN
    RETURN query_valid('SELECT * FROM ' || dbms_assert.simple_sql_name(p_table_name));
    END table_exist;

    FUNCTION model_exist(
    p_model_name VARCHAR2) RETURN BOOLEAN
    IS
    v_model_cnt NUMBER;
    v_model_exists BOOLEAN := FALSE;
    BEGIN
    SELECT COUNT(*) INTO v_model_cnt FROM DM_USER_MODELS WHERE NAME = UPPER(p_model_name);
    IF v_model_cnt > 0 THEN
    v_model_exists := TRUE;
    END IF;
    --DBMS_OUTPUT.PUT_LINE('model exist: '||v_model_exists);
    RETURN v_model_exists;
    EXCEPTION WHEN OTHERS THEN
    RETURN FALSE;
    END model_exist;

    PROCEDURE drop_table(
    p_table_name VARCHAR2)
    IS
    v_stmt SQL_STATEMENT_TYPE;
    BEGIN
    v_stmt := 'DROP TABLE '||dbms_assert.simple_sql_name(p_table_name)||' PURGE';
    EXECUTE IMMEDIATE v_stmt;
    EXCEPTION WHEN OTHERS THEN
    NULL;
    --DBMS_OUTPUT.PUT_LINE('Failed drop_table: '||p_table_name);
    END drop_table;

    PROCEDURE drop_view(
    p_view_name VARCHAR2)
    IS
    v_stmt SQL_STATEMENT_TYPE;
    BEGIN
    v_stmt := 'DROP VIEW '||dbms_assert.simple_sql_name(p_view_name);
    EXECUTE IMMEDIATE v_stmt;
    EXCEPTION WHEN OTHERS THEN
    NULL;
    --DBMS_OUTPUT.PUT_LINE('Failed drop_view: '||p_view_name);
    END drop_view;

    PROCEDURE drop_model(
    p_model_name VARCHAR2)
    IS
    v_diagnostics_table VARCHAR2(30);
    BEGIN
    DBMS_DATA_MINING.DROP_MODEL(p_model_name);
    SELECT SETTING_VALUE INTO v_diagnostics_table
    FROM TABLE(DBMS_DATA_MINING.GET_MODEL_SETTINGS(p_model_name))
    WHERE SETTING_NAME = 'GLMS_DIAGNOSTICS_TABLE_NAME';
    IF (v_diagnostics_table IS NOT NULL) THEN
    drop_table(v_diagnostics_table);
    END IF;
    EXCEPTION WHEN OTHERS THEN
    NULL;
    --DBMS_OUTPUT.PUT_LINE('Failed drop_model: '||p_model_name);
    END drop_model;

    FUNCTION create_new_temp_table_name(prefix IN VARCHAR2, len IN NUMBER)
    RETURN VARCHAR2 IS
    v_table_name VARCHAR2(30);
    v_seed NUMBER;
    BEGIN
    dbms_random.seed(SYS_GUID());
    v_table_name := 'DM$T' || SUBSTR(prefix, 0, 4) || dbms_random.string(NULL, len-8);
    --DBMS_OUTPUT.PUT_LINE('create_new_temp_table_name: '||v_table_name);
    RETURN v_table_name;
    END create_new_temp_table_name;

    FUNCTION create_new_temp_table_name(prefix IN VARCHAR2)
    RETURN VARCHAR2 IS
    BEGIN
    RETURN create_new_temp_table_name(prefix, 30);
    END create_new_temp_table_name;

    FUNCTION ADD_TEMP_TABLE(tempTables IN OUT NOCOPY TABLE_ARRAY, temp_table IN VARCHAR2) RETURN VARCHAR2 IS
    BEGIN
    tempTables.EXTEND;
    tempTables(tempTables.COUNT) := temp_table;
    return temp_table;
    END;

    PROCEDURE DROP_TEMP_TABLES(tempTables IN OUT NOCOPY TABLE_ARRAY) IS
    v_temp VARCHAR2(30);
    BEGIN
    FOR i IN 1..tempTables.COUNT LOOP
    v_temp := tempTables(i);
    drop_table(v_temp);
    drop_view(v_temp);
    tempTables.DELETE(i);
    END LOOP;
    END;

    PROCEDURE CHECK_RESULTS(drop_output IN BOOLEAN,
    result_name IN VARCHAR2) IS
    BEGIN
    -- drop all results if drop = true, otherwise make sure all results don't exist already (raise exception)
    IF result_name IS NOT NULL THEN
    IF drop_output THEN
    drop_table(result_name);
    drop_view(result_name);
    ELSIF (table_exist(result_name)) THEN
    RAISE_APPLICATION_ERROR(-20000, 'Result table exists: '||result_name);
    END IF;
    END IF;
    END;

    PROCEDURE CHECK_MODEL(drop_output IN BOOLEAN,
    model_name IN VARCHAR2) IS
    BEGIN
    -- drop all results if drop = true, otherwise make sure all results don't exist already (raise exception)
    IF model_name IS NOT NULL THEN
    IF drop_output THEN
    drop_model(model_name);
    ELSIF (model_exist(model_name)) THEN
    RAISE_APPLICATION_ERROR(-20001, 'Model exists: '||model_name);
    END IF;
    END IF;
    END;

    PROCEDURE create_table_from_query(query IN OUT NOCOPY LSTMT_REC_TYPE)
    IS
    v_cursor NUMBER;
    v_feedback INTEGER;
    BEGIN
    v_cursor := DBMS_SQL.OPEN_CURSOR;

    DBMS_SQL.PARSE(
    c => v_cursor,
    statement => query.lstmt,
    lb => query.lb,
    ub => query.ub,
    lfflg => FALSE,
    language_flag => dbms_sql.native);
    v_feedback := DBMS_SQL.EXECUTE(v_cursor);
    DBMS_SQL.CLOSE_CURSOR(v_cursor);

    EXCEPTION WHEN OTHERS THEN
    IF DBMS_SQL.IS_OPEN(v_cursor) THEN
    DBMS_SQL.CLOSE_CURSOR(v_cursor);
    END IF;
    RAISE;
    END;

    FUNCTION get_row_count(tableName IN VARCHAR2)
    RETURN INTEGER IS
    v_stmt VARCHAR(100);
    qcount INTEGER := 0;
    BEGIN
    v_stmt := 'SELECT COUNT(*) FROM '|| tableName;
    EXECUTE IMMEDIATE v_stmt INTO qcount;
    RETURN qcount;
    END get_row_count;

    PROCEDURE SET_EQUAL_DISTRIBUTION (
    counts IN OUT VALUE_COUNT_LIST )
    IS
    v_minvalue NUMBER := 0;
    BEGIN
    FOR i IN counts.FIRST..counts.LAST
    LOOP
    IF ( i = counts.FIRST )
    THEN
    v_minvalue := counts(i);
    ELSIF ( counts(i) > 0 AND v_minvalue > counts(i) )
    THEN
    v_minvalue := counts(i);
    END IF;
    END LOOP;

    FOR i IN counts.FIRST..counts.LAST
    LOOP
    counts(i) := v_minvalue;
    END LOOP;
    END SET_EQUAL_DISTRIBUTION;

    PROCEDURE GET_STRATIFIED_DISTRIBUTION (
    table_name VARCHAR2,
    attribute_name VARCHAR2,
    percentage NUMBER,
    attr_values IN OUT NOCOPY TARGET_VALUES_LIST,
    counts IN OUT NOCOPY VALUE_COUNT_LIST,
    counts_sampled IN OUT NOCOPY VALUE_COUNT_LIST )
    IS
    v_tmp_stmt VARCHAR2(4000);
    BEGIN
    v_tmp_stmt :=
    'SELECT /*+ noparallel(t)*/ ' || attribute_name ||
    ', count(*), ROUND ( ( count(*) * ' || percentage || ') / 100.0 ) FROM '|| table_name ||
    ' WHERE ' || attribute_name ||' IS NOT NULL GROUP BY ' || attribute_name;
    EXECUTE IMMEDIATE v_tmp_stmt
    BULK COLLECT INTO attr_values, counts, counts_sampled;
    END GET_STRATIFIED_DISTRIBUTION;

    FUNCTION GENERATE_STRATIFIED_SQL (
    v_2d_temp_view VARCHAR2,
    src_table_name VARCHAR2,
    attr_names TARGET_VALUES_LIST,
    attribute_name VARCHAR2,
    percentage NUMBER,
    op VARCHAR2,
    equal_distribution IN BOOLEAN DEFAULT FALSE) RETURN LSTMT_REC_TYPE
    IS
    v_tmp_lstmt LSTMT_REC_TYPE;
    attr_values_res TARGET_VALUES_LIST;
    counts_res VALUE_COUNT_LIST;
    counts_sampled_res VALUE_COUNT_LIST;
    tmp_str VARCHAR2(4000);
    sample_count PLS_INTEGER;

    BEGIN
    GET_STRATIFIED_DISTRIBUTION(src_table_name, attribute_name, percentage, attr_values_res, counts_res, counts_sampled_res);
    IF ( equal_distribution = TRUE )
    THEN
    SET_EQUAL_DISTRIBUTION(counts_sampled_res);
    END IF;

    v_tmp_lstmt.ub := 0; -- initialize
    ls_append(v_tmp_lstmt, 'CREATE TABLE ');
    ls_append(v_tmp_lstmt, v_2d_temp_view);
    ls_append(v_tmp_lstmt, ' AS ');
    ls_append(v_tmp_lstmt, '( SELECT ');

    FOR i IN attr_names.FIRST..attr_names.LAST
    LOOP
    IF ( i != attr_names.FIRST )
    THEN
    ls_append(v_tmp_lstmt,',');
    END IF;

    ls_append(v_tmp_lstmt, attr_names(i));
    END LOOP;

    ls_append(v_tmp_lstmt, ' FROM (SELECT /*+ no_merge */ t.*, row_number() over(partition by '||attribute_name||' order by ora_hash(ROWNUM)) RNUM FROM ' || src_table_name || ' t) WHERE RNUM = 1 OR ');

    FOR i IN attr_values_res.FIRST..attr_values_res.LAST
    LOOP
    IF ( i != attr_values_res.FIRST )
    THEN
    tmp_str := ' OR ';
    END IF;
    IF ( counts_res(i) <= 2 ) THEN
    sample_count := counts_res(i);
    ELSE
    sample_count := counts_sampled_res(i);
    END IF;
    tmp_str := tmp_str ||
    '( ' || attribute_name || ' = ''' || REPLACE(attr_values_res(i), '''', '''''') || '''' ||
    ' AND ORA_HASH(RNUM,(' || counts_res(i) || ' -1),12345) ' || op || sample_count || ') ';
    ls_append(v_tmp_lstmt, tmp_str );
    END LOOP;
    ls_append(v_tmp_lstmt, ') ');
    return v_tmp_lstmt;
    END GENERATE_STRATIFIED_SQL;


    PROCEDURE "SONAE_K_MEANS267445543_BA"(case_table IN VARCHAR2 DEFAULT '"SYSTEM"."DM_DATA_MINER"',
    additional_table_1 IN VARCHAR2 DEFAULT NULL,
    model_name IN VARCHAR2 DEFAULT 'DM_DATA_MINER52153_CL',
    feature_table IN VARCHAR2 DEFAULT NULL,
    mapping_table IN VARCHAR2 DEFAULT NULL,
    drop_model IN BOOLEAN DEFAULT FALSE)
    IS
    additional_data TABLE_ARRAY := TABLE_ARRAY(
    additional_table_1
    );
    v_tempTables TABLE_ARRAY := TABLE_ARRAY();
    v_2d_view VARCHAR2(30);
    v_2d_temp_view VARCHAR2(30);
    v_txn_views TABLE_ARRAY := TABLE_ARRAY();
    v_txn_temp_views TABLE_ARRAY := TABLE_ARRAY();
    v_case_data SQL_STATEMENT_TYPE := case_table;
    v_case_id VARCHAR2(30) := 'DMR$CASE_ID';
    v_tmp_lstmt LSTMT_REC_TYPE;
    v_build_data VARCHAR2(30);
    v_build_setting VARCHAR2(30);
    v_gen_caseId BOOLEAN := FALSE;
    v_txt_build VARCHAR2(30);
    v_txt_test VARCHAR2(30);
    v_content_index VARCHAR2(30);
    v_content_index_pref VARCHAR2(30);
    v_category_temp_table VARCHAR2(30);
    v_term_final_table VARCHAR2(30);
    v_term_final_table_index VARCHAR2(30);
    v_mapping_table_index VARCHAR2(30);
    pragma autonomous_transaction;
    BEGIN
    execute immediate 'Alter session set NLS_NUMERIC_CHARACTERS=".,"';

    CHECK_MODEL(drop_model, model_name);
    CHECK_RESULTS(drop_model, feature_table);
    CHECK_RESULTS(drop_model, mapping_table);

    IF (v_gen_caseId) THEN
    v_case_data := ADD_TEMP_TABLE(v_tempTables, create_new_temp_table_name('DM$T'));
    EXECUTE IMMEDIATE 'CREATE TABLE '||v_case_data||' as SELECT rownum as DMR$CASE_ID, t.* FROM ('||case_table||') t ';
    EXECUTE IMMEDIATE 'ALTER TABLE '||v_case_data||' add constraint '||create_new_temp_table_name('PK')||' primary key (DMR$CASE_ID)';
    END IF;

    ----- Start: Input Data Preparation -----
    v_2d_temp_view := ADD_TEMP_TABLE(v_tempTables, create_new_temp_table_name('DM$T'));
    ls_append(v_tmp_lstmt, 'CREATE VIEW ');
    ls_append(v_tmp_lstmt, v_2d_temp_view);
    ls_append(v_tmp_lstmt, ' AS ');
    ls_append(v_tmp_lstmt, ' ( ');
    ls_append(v_tmp_lstmt, 'SELECT "DM_DATA_MINER"."ID" as "DMR$CASE_ID", "DM_DATA_MINER"."NUM_MEDIO_TRX_MES" AS "NUM_MEDIO_TRX_MES",
    "DM_DATA_MINER"."SD_MEDIO_COMPRAS" AS "SD_MEDIO_COMPRAS",
    "DM_DATA_MINER"."TAXA_REB" AS "TAXA_REB",
    "DM_DATA_MINER"."TEMPO_ENTRE_TRANSACCOES" AS "TEMPO_ENTRE_TRANSACCOES",
    "DM_DATA_MINER"."VALOR_MEDIO_MENSAL_COMPRAS" AS "VALOR_MEDIO_MENSAL_COMPRAS",
    "DM_DATA_MINER"."VALOR_MENSAL_BEBIDAS" AS "VALOR_MENSAL_BEBIDAS",
    "DM_DATA_MINER"."VALOR_MENSAL_CHARCUTARIA" AS "VALOR_MENSAL_CHARCUTARIA",
    "DM_DATA_MINER"."VALOR_MENSAL_CULTURA_LAZER" AS "VALOR_MENSAL_CULTURA_LAZER",
    "DM_DATA_MINER"."VALOR_MENSAL_DPH" AS "VALOR_MENSAL_DPH",
    "DM_DATA_MINER"."VALOR_MENSAL_FRUTAS_LEGUMES" AS "VALOR_MENSAL_FRUTAS_LEGUMES",
    "DM_DATA_MINER"."VALOR_MENSAL_LACTIC_CONG" AS "VALOR_MENSAL_LACTIC_CONG",
    "DM_DATA_MINER"."VALOR_MENSAL_MERCEARIA" AS "VALOR_MENSAL_MERCEARIA",
    "DM_DATA_MINER"."VALOR_MENSAL_PADARIA" AS "VALOR_MENSAL_PADARIA",
    "DM_DATA_MINER"."VALOR_MENSAL_PEIXARIA" AS "VALOR_MENSAL_PEIXARIA",
    "DM_DATA_MINER"."VALOR_MENSAL_SAZONAIS" AS "VALOR_MENSAL_SAZONAIS",
    "DM_DATA_MINER"."VALOR_MENSAL_TALHO" AS "VALOR_MENSAL_TALHO",
    "DM_DATA_MINER"."VALOR_MENSAL_UTILID_DOMESTICAS" AS "VALOR_MENSAL_UTILID_DOMESTICAS" FROM (' || v_case_data || ') "DM_DATA_MINER" ');
    ls_append(v_tmp_lstmt, ' ) ');
    create_table_from_query(v_tmp_lstmt);
    v_2d_view := v_2d_temp_view;

    ----- End: Input Data Preparation -----



    ----- Start: Normalize Transformation -----
    v_tmp_lstmt.ub := 0; -- initialize
    v_2d_temp_view := ADD_TEMP_TABLE(v_tempTables, create_new_temp_table_name('DM$T'));
    ls_append(v_tmp_lstmt, 'CREATE VIEW ');
    ls_append(v_tmp_lstmt, v_2d_temp_view);
    ls_append(v_tmp_lstmt, ' AS ');
    ls_append(v_tmp_lstmt, ' ( ');
    ls_append(v_tmp_lstmt, 'SELECT
    "DMR$CASE_ID",
    LEAST(1, GREATEST(0, (ROUND(("NUM_MEDIO_TRX_MES" - 1.0) / (17.0 - 1.0),15) * (1.0 - 0.0) + 0.0))) "NUM_MEDIO_TRX_MES",
    LEAST(1, GREATEST(0, (ROUND(("SD_MEDIO_COMPRAS" - 0.0) / (5679.177070325683 - 0.0),15) * (1.0 - 0.0) + 0.0))) "SD_MEDIO_COMPRAS",
    LEAST(1, GREATEST(0, (ROUND(("TAXA_REB" - 0.0) / (1.0 - 0.0),15) * (1.0 - 0.0) + 0.0))) "TAXA_REB",
    LEAST(1, GREATEST(0, (ROUND(("TEMPO_ENTRE_TRANSACCOES" - 6.134259259259259E-4) / (85.17270833333333 - 6.134259259259259E-4),15) * (1.0 - 0.0) + 0.0))) "TEMPO_ENTRE_TRANSACCOES",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MEDIO_MENSAL_COMPRAS" - 0.13) / (9727.74256 - 0.13),15) * (1.0 - 0.0) + 0.0))) "VALOR_MEDIO_MENSAL_COMPRAS",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_BEBIDAS" - 0.0) / (16402.9 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_BEBIDAS",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_CHARCUTARIA" - 0.0) / (198.07 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_CHARCUTARIA",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_CULTURA_LAZER" - 0.0) / (425.06 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_CULTURA_LAZER",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_DPH" - 0.0) / (973.83 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_DPH",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_FRUTAS_LEGUMES" - 0.0) / (279.66779 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_FRUTAS_LEGUMES",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_LACTIC_CONG" - 0.0) / (4373.75 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_LACTIC_CONG",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_MERCEARIA" - 0.0) / (1560.61 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_MERCEARIA",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_PADARIA" - 0.0) / (228.53 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_PADARIA",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_PEIXARIA" - 0.0) / (1795.33179 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_PEIXARIA",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_SAZONAIS" - 0.0) / (666.42 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_SAZONAIS",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_TALHO" - 0.0) / (564.85537 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_TALHO",
    LEAST(1, GREATEST(0, (ROUND(("VALOR_MENSAL_UTILID_DOMESTICAS" - 0.0) / (130.76 - 0.0),15) * (1.0 - 0.0) + 0.0))) "VALOR_MENSAL_UTILID_DOMESTICAS"
    FROM ');
    ls_append(v_tmp_lstmt, v_2d_view);
    ls_append(v_tmp_lstmt, ' ) ');
    create_table_from_query(v_tmp_lstmt);
    v_2d_view := v_2d_temp_view;

    ----- End: Normalize Transformation -----



    ----- Start: Mining Data Preparation -----
    v_tmp_lstmt.ub := 0; -- initialize
    v_2d_temp_view := ADD_TEMP_TABLE(v_tempTables, create_new_temp_table_name('DM$T'));
    ls_append(v_tmp_lstmt, 'CREATE VIEW ');
    ls_append(v_tmp_lstmt, v_2d_temp_view);
    ls_append(v_tmp_lstmt, ' AS ');
    ls_append(v_tmp_lstmt, ' ( ');
    ls_append(v_tmp_lstmt,
    'SELECT caseTable."DMR$CASE_ID"
    , caseTable."NUM_MEDIO_TRX_MES"
    , caseTable."SD_MEDIO_COMPRAS"
    , caseTable."TAXA_REB"
    , caseTable."TEMPO_ENTRE_TRANSACCOES"
    , caseTable."VALOR_MEDIO_MENSAL_COMPRAS"
    , caseTable."VALOR_MENSAL_BEBIDAS"
    , caseTable."VALOR_MENSAL_CHARCUTARIA"
    , caseTable."VALOR_MENSAL_CULTURA_LAZER"
    , caseTable."VALOR_MENSAL_DPH"
    , caseTable."VALOR_MENSAL_FRUTAS_LEGUMES"
    , caseTable."VALOR_MENSAL_LACTIC_CONG"
    , caseTable."VALOR_MENSAL_MERCEARIA"
    , caseTable."VALOR_MENSAL_PADARIA"
    , caseTable."VALOR_MENSAL_PEIXARIA"
    , caseTable."VALOR_MENSAL_SAZONAIS"
    , caseTable."VALOR_MENSAL_TALHO"
    , caseTable."VALOR_MENSAL_UTILID_DOMESTICAS"
    FROM ('); ls_append(v_tmp_lstmt, v_2d_view); ls_append(v_tmp_lstmt, ') caseTable

    '
    );
    ls_append(v_tmp_lstmt, ' ) ');
    create_table_from_query(v_tmp_lstmt);
    v_2d_view := v_2d_temp_view;

    v_build_data := v_2d_view;

    ----- End: Mining Data Preparation -----



    v_build_setting := ADD_TEMP_TABLE(v_tempTables, create_new_temp_table_name('DM$T'));
    EXECUTE IMMEDIATE 'CREATE TABLE ' || v_build_setting || ' (setting_name VARCHAR2(30), setting_value VARCHAR2(128))';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''KMNS_NUM_BINS'', ''10'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''CLUS_NUM_CLUSTERS'', ''5'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''ALGO_NAME'', ''ALGO_KMEANS'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''KMNS_SPLIT_CRITERION'', ''KMNS_VARIANCE'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''KMNS_DISTANCE'', ''KMNS_EUCLIDEAN'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''KMNS_MIN_PCT_ATTR_SUPPORT'', ''0.1'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''KMNS_BLOCK_GROWTH'', ''2'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''KMNS_CONV_TOLERANCE'', ''0.01'')';
    EXECUTE IMMEDIATE 'INSERT INTO ' || v_build_setting || ' VALUES (''KMNS_ITERATIONS'', ''3'')';
    COMMIT;


    -- BUILD MODEL
    DBMS_DATA_MINING.CREATE_MODEL(
    model_name => model_name,
    mining_function => dbms_data_mining.clustering,
    data_table_name => v_build_data,
    case_id_column_name => v_case_id,
    settings_table_name => v_build_setting);

    DROP_TEMP_TABLES(v_tempTables);

    EXCEPTION WHEN OTHERS THEN
    DROP_TEMP_TABLES(v_tempTables);
    RAISE;
    END;

    END;


    Best regards.
  • 9. Re: Limit number of cases in k-means
    Mark Kelly Oracle ACE
    Currently Being Moderated
    1000000 is the max number of rows used in cluster build. See earlier response to this in the posting.

Legend

  • Correct Answers - 10 points
  • Helpful Answers - 5 points