SQLScript Source Code
Part One : How to create test table schema
CREATE SCHEMA ESHOP_PAL;
ALTER SESSION SET CURRENT_SCHEMA = ESHOP_PAL;
GRANT SELECT ON SCHEMA ESHOP_PAL TO _SYS_REPO WITH GRANT OPTION;
DROP TABLE CUSTOMER;--200,000
CREATE COLUMN TABLE CUSTOMER(
CUSTOMER_ID INT,
NAME VARCHAR(100),
INCOME INT,--LOW:0 MEDIAN:1 HIGH:2
AGE INT,--10, 20, 30, 40, 50, 60
GENDER VARCHAR(10),--MALE, FEMALE
MARRIED VARCHAR(10),--Y, N
CHILDREN_NUMBER INT,--0,1,2,3
HAS_HOUSE_LOAN VARCHAR(10),--Y, N
OCCUPATION VARCHAR(100),
EDUCATION VARCHAR(100),
EMAIL VARCHAR(100),
CITY VARCHAR(100),
CREATED_AT TIMESTAMP,
PRIMARY KEY(CUSTOMER_ID)
);
DROP TABLE ITEM;--50
CREATE COLUMN TABLE ITEM(
ITEM_ID INT,
ITEM_NAME VARCHAR(100),
CATEGORY_NAME VARCHAR(100),
PRIMARY KEY(ITEM_ID)
);
DROP TABLE CALENDAR;--1990-2020
CREATE COLUMN TABLE CALENDAR(
CALENDAR_ID INT,
YEAR INT,
MONTH INT,
PRIMARY KEY(CALENDAR_ID)
);
DROP TABLE ORDER_FACTS;--100,000,000
CREATE COLUMN TABLE ORDER_FACTS(
ORDER_ID INT,
CALENDER_ID INT,
CUSTOMER_ID INT,
ITEM_ID INT,
QUANTITY INT,
AMOUNT DOUBLE,
PRIMARY KEY(ORDER_ID)
);
Part Two : How to create procedures using PAL (Predictive Analysis Library)
ALTER SESSION SET CURRENT_SCHEMA = ESHOP_PAL;
1. Create Procedure for ABC Classification
DROP TYPE CONTROL_T;
CREATE TYPE CONTROL_T AS TABLE("Name" VARCHAR(100), "intArgs" INT,
"doubleArgs" DOUBLE,"strArgs" VARCHAR(100));
DROP TYPE ABC_DATA_T;
CREATE TYPE ABC_DATA_T AS TABLE("ITEM" INT,"VALUE" DOUBLE);
DROP TYPE ABC_RESULT_T;
CREATE TYPE ABC_RESULT_T AS TABLE("ABC" VARCHAR(10),"ITEM" INT);
DROP PROCEDURE palAbcAnalysis;
CREATE PROCEDURE palAbcAnalysis( IN target ABC_DATA_T, IN control CONTROL_T,
OUT results ABC_RESULT_T )
LANGUAGE LLANG
AS
BEGIN
export Void main(Table<Int32 "ITEM", Double "VALUE"> "target" targetTab,
Table<String "Name", Int32 "intArgs", Double
"doubleArgs",String "strArgs"> "control" controlTab,
Table<String "ABC", Int32 "ITEM"> "results" &
resultsTab) {
pal::abcAnalysis(targetTab, controlTab, resultsTab);
}
END;
DROP TABLE ABC_CONTROL_TBL;
CREATE COLUMN TABLE ABC_CONTROL_TBL ("Name" VARCHAR(100), "intArgs" INT,
"doubleArgs" DOUBLE,"strArgs" VARCHAR(100));
INSERT INTO ABC_CONTROL_TBL VALUES ('START_COLUMN',0,null,NULL);
INSERT INTO ABC_CONTROL_TBL VALUES ('END_COLUMN',1,null,null);
INSERT INTO ABC_CONTROL_TBL VALUES ('THREAD_NUMBER',2,null,null);
INSERT INTO ABC_CONTROL_TBL VALUES ('PERCENT_A',null,0.6,null);
INSERT INTO ABC_CONTROL_TBL VALUES ('PERCENT_B',null,0.3,null);
INSERT INTO ABC_CONTROL_TBL VALUES ('PERCENT_C',null,0.1,null);
DROP TABLE ABC_RESULT_TBL;
CREATE COLUMN TABLE ABC_RESULT_TBL("ABC" VARCHAR(10),"ITEM" INT,PRIMARY
KEY("ITEM"));
DROP TABLE ABC_INPUT;
CREATE COLUMN TABLE ABC_INPUT("ITEM" INT,"VALUE" DOUBLE);
insert into ABC_INPUT select CUSTOMER_ID as "ITEM", sum(AMOUNT) as "VALUE"
from ORDER_FACTS group by CUSTOMER_ID
with parameters
('request_flags'='ANALYZE_MODEL','request_flags'='USE_PARALLEL_AGGREGATION');
2. Create Procedure for Building Decision Tree
DROP TYPE DT_DATA_T;
CREATE TYPE DT_DATA_T AS TABLE(INCOME INT,AGE INT,GENDER VARCHAR(10),MARRIED
VARCHAR(10),CHILDREN_NUMBER INT,HAS_HOUSE_LOAN VARCHAR(10),CLASS VARCHAR(10));
DROP TYPE DT_MODEL_T;
CREATE TYPE DT_MODEL_T AS TABLE(MODEL CLOB);
DROP PROCEDURE createTreeModel;
CREATE PROCEDURE createTreeModel( IN data DT_DATA_T, IN control CONTROL_T,
OUT model DT_MODEL_T )
LANGUAGE LLANG
AS
BEGIN
export Void main(Table<Int32 "INCOME",Int32 "AGE",String "GENDER",String
"MARRIED",Int32 "CHILDREN_NUMBER",String "HAS_HOUSE_LOAN",String "CLASS">
"data" dataTab,
Table<String "Name", Int32 "intArgs", Double
"doubleArgs", String "strArgs"> "control" argsTab,
Table<String "MODEL"> "model" & modelTab) {
pal::createDT(dataTab, argsTab, modelTab);
}
END;
DROP TABLE DTMODEL_CONTROL_TBL;
CREATE COLUMN TABLE DTMODEL_CONTROL_TBL ("Name" VARCHAR(100), "intArgs" INT,
"doubleArgs" DOUBLE,"strArgs" VARCHAR(100));
INSERT INTO DTMODEL_CONTROL_TBL VALUES ('START_COLUMN',0,null,null);
INSERT INTO DTMODEL_CONTROL_TBL VALUES ('END_COLUMN',6,null,null);
INSERT INTO DTMODEL_CONTROL_TBL VALUES ('PERCENTAGE',null,0.5,null);
INSERT INTO DTMODEL_CONTROL_TBL VALUES ('THREAD_NUMBER',8,null,null);
drop view CUSTOMER_V;
create view CUSTOMER_V as
select INCOME,AGE,GENDER,MARRIED,CHILDREN_NUMBER,HAS_HOUSE_LOAN,ABC as CLASS
from
ABC_RESULT_TBL a
join CUSTOMER b
on a.ITEM = b.CUSTOMER_ID;
DROP TABLE DT_MODEL_TABLE;
CREATE column TABLE DT_MODEL_TABLE ("MODEL" CLOB);
3. Create Procedure for Predicting with C4.5 Tree
DROP TYPE DTPREDICT_RESULT_T;
CREATE TYPE DTPREDICT_RESULT_T AS TABLE("ID" INT, "CLASS" VARCHAR(500));
DROP TYPE PREDCIT_DATA_T;
CREATE TYPE PREDCIT_DATA_T AS TABLE(ID INT,INCOME INT,AGE INT,GENDER
VARCHAR(10),MARRIED VARCHAR(10),CHILDREN_NUMBER INT,HAS_HOUSE_LOAN
VARCHAR(10));
DROP TABLE DTPREDICT_CONTROL_TBL;
CREATE COLUMN TABLE DTPREDICT_CONTROL_TBL ("Name" VARCHAR(100), "intArgs" INT,
"doubleArgs" DOUBLE,"strArgs" VARCHAR(100));
INSERT INTO DTPREDICT_CONTROL_TBL VALUES ('START_COLUMN',1,null,null);
INSERT INTO DTPREDICT_CONTROL_TBL VALUES ('END_COLUMN',6,null,null);
INSERT INTO DTPREDICT_CONTROL_TBL VALUES ('THREAD_NUMBER',1,null,null);
INSERT INTO DTPREDICT_CONTROL_TBL VALUES ('ID_COLUMN',0,null,null);
DROP PROCEDURE predictWithDT;
CREATE PROCEDURE predictWithDT( IN predictive PREDCIT_DATA_T, IN control
CONTROL_T, IN model DT_MODEL_T, OUT results DTPREDICT_RESULT_T )
LANGUAGE LLANG
AS
BEGIN
export Void main(Table<Int32 "ID",Int32 "INCOME",Int32 "AGE",String
"GENDER",String "MARRIED",Int32 "CHILDREN_NUMBER",String "HAS_HOUSE_LOAN">
"predictive" predictTab,
Table<String "Name", Int32 "intArgs", Double "doubleArgs", String
"strArgs"> "control" argsTab,
Table<String "MODEL"> "model" modelTab,
Table<Int32 "ID", String "CLASS"> "results" & resultsTab){
pal::predictWithDT(predictTab, argsTab, modelTab, resultsTab);
}
END;
DROP TABLE PREDCIT_DATA_TABLE;
CREATE COLUMN TABLE PREDCIT_DATA_TABLE(ID INT,INCOME INT,AGE INT,GENDER
VARCHAR(10),MARRIED VARCHAR(10),CHILDREN_NUMBER INT,HAS_HOUSE_LOAN
VARCHAR(10));
Part Three : How to call procedures in client side
1. Create Procedure for ABC Classification
UPDATE ABC_CONTROL_TBL set "doubleArgs" = 0.6 where "Name" = 'PERCENT_A';
UPDATE ABC_CONTROL_TBL set "doubleArgs" = 0.3 where "Name" = 'PERCENT_B';
UPDATE ABC_CONTROL_TBL set "doubleArgs" = 0.1 where "Name" = 'PERCENT_C';
CALL palAbcAnalysis(ABC_INPUT, ABC_CONTROL_TBL, ABC_RESULT_TBL) with overview;
2. Create Procedure for Building Decision Tree
truncate table ESHOP_PAL.DT_MODEL_TABLE;
CALL ESHOP_PAL.createTreeModel(CUSTOMER_V, DTMODEL_CONTROL_TBL,
DT_MODEL_TABLE) with overview;
3. Create Procedure for Predicting with C4.5 Tree
truncate table PREDCIT_DATA_TABLE;
INSERT into PREDCIT_DATA_TABLE values(1, 1, 30, 'Male', 'Y', 1, 'Y');
call ESHOP_PAL.predictWithDT(ESHOP_PAL.PREDCIT_DATA_TABLE,
ESHOP_PAL.DTPREDICT_CONTROL_TBL,ESHOP_PAL.DT_MODEL_TABLE, null ) with
overview;
select * from ESHOP_PAL.PREDCIT_DATA_TABLE;