ClickHouse TPC-H测试方法
本文在ClickHouse 20.10.1版本测试通过。
注意:ClickHouse数据类型、表名、字段名等大小写敏感。
一、建表语句
建库:
- snippet.sql
CREATE DATABASE tpch1s;
以下建表适用于TPC-H 100s及以下数据量,100s以上数据量,需要将Int32更换为Int64。
- snippet.sql
DROP TABLE IF EXISTS NATION; DROP TABLE IF EXISTS REGION; DROP TABLE IF EXISTS PART; DROP TABLE IF EXISTS SUPPLIER; DROP TABLE IF EXISTS PARTSUPP; DROP TABLE IF EXISTS CUSTOMER; DROP TABLE IF EXISTS ORDERS; DROP TABLE IF EXISTS LINEITEM; CREATE TABLE NATION( N_NATIONKEY Int32 NOT NULL, N_NAME FixedString(25) NOT NULL, N_REGIONKEY Int32 NOT NULL, N_COMMENT String) ENGINE = MergeTree() ORDER BY (N_NATIONKEY) PRIMARY KEY (N_NATIONKEY); CREATE TABLE REGION( R_REGIONKEY Int32 NOT NULL, R_NAME FixedString(25) NOT NULL, R_COMMENT String) ENGINE = MergeTree() ORDER BY (R_REGIONKEY) PRIMARY KEY (R_REGIONKEY); CREATE TABLE PART( P_PARTKEY Int32 NOT NULL, P_NAME String NOT NULL, P_MFGR FixedString(25) NOT NULL, P_BRAND FixedString(10) NOT NULL, P_TYPE String NOT NULL, P_SIZE Int32 NOT NULL, P_CONTAINER FixedString(10) NOT NULL, P_RETAILPRICE String NOT NULL, P_COMMENT String NOT NULL ) ENGINE = MergeTree() ORDER BY (P_PARTKEY) PRIMARY KEY (P_PARTKEY); CREATE TABLE SUPPLIER( S_SUPPKEY Int32 NOT NULL, S_NAME FixedString(25) NOT NULL, S_ADDRESS String NOT NULL, S_NATIONKEY Int32 NOT NULL, S_PHONE FixedString(15) NOT NULL, S_ACCTBAL DECIMAL(15,2) NOT NULL, S_COMMENT String NOT NULL) ENGINE = MergeTree() ORDER BY (S_SUPPKEY) PRIMARY KEY (S_SUPPKEY); CREATE TABLE PARTSUPP( PS_PARTKEY Int32 NOT NULL, PS_SUPPKEY Int32 NOT NULL, PS_AVAILQTY Int32 NOT NULL, PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, PS_COMMENT String NOT NULL ) ENGINE = MergeTree() ORDER BY (PS_PARTKEY) PRIMARY KEY (PS_PARTKEY); CREATE TABLE CUSTOMER ( C_CUSTKEY Int32 NOT NULL, C_NAME String NOT NULL, C_ADDRESS String NOT NULL, C_NATIONKEY Int32 NOT NULL, C_PHONE FixedString(15) NOT NULL, C_ACCTBAL String NOT NULL, C_MKTSEGMENT FixedString(10) NOT NULL, C_COMMENT String NOT NULL) ENGINE = MergeTree() ORDER BY (C_CUSTKEY) PRIMARY KEY (C_CUSTKEY); CREATE TABLE ORDERS( O_ORDERKEY Int32 NOT NULL, O_CUSTKEY Int32 NOT NULL, O_ORDERSTATUS FixedString(1) NOT NULL, O_TOTALPRICE DECIMAL(15,2) NOT NULL, O_ORDERDATE DATE NOT NULL, O_ORDERPRIORITY FixedString(15) NOT NULL, O_CLERK FixedString(15) NOT NULL, O_SHIPPRIORITY Int32 NOT NULL, O_COMMENT String NOT NULL) ENGINE = MergeTree() ORDER BY (O_ORDERKEY) PRIMARY KEY (O_ORDERKEY); CREATE TABLE LINEITEM( L_ORDERKEY Int32 NOT NULL, L_PARTKEY Int32 NOT NULL, L_SUPPKEY Int32 NOT NULL, L_LINENUMBER Int32 NOT NULL, L_QUANTITY DECIMAL(15,2) NOT NULL, L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, L_DISCOUNT DECIMAL(15,2) NOT NULL, L_TAX DECIMAL(15,2) NOT NULL, L_RETURNFLAG FixedString(1) NOT NULL, L_LINESTATUS FixedString(1) NOT NULL, L_SHIPDATE DATE NOT NULL, L_COMMITDATE DATE NOT NULL, L_RECEIPTDATE DATE NOT NULL, L_SHIPINSTRUCT FixedString(25) NOT NULL, L_SHIPMODE FixedString(10) NOT NULL, L_COMMENT String NOT NULL) ENGINE = MergeTree() ORDER BY (L_ORDERKEY) PRIMARY KEY (L_ORDERKEY);
二、加载数据
加载源数据表使用CSV格式,使用竖线( | )分割列。
- snippet.bash
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO NATION FORMAT CSV" < ~/nation.tbl clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO REGION FORMAT CSV" < ~/region.tbl clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO PART FORMAT CSV" < ~/part.tbl clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO SUPPLIER FORMAT CSV" < ~/supplier.tbl clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO PARTSUPP FORMAT CSV" < ~/partsupp.tbl clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO CUSTOMER FORMAT CSV" < ~/customer.tbl clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO ORDERS FORMAT CSV" < ~/orders.tbl clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO LINEITEM FORMAT CSV" < ~/lineitem.tbl
三、查询
q01
- snippet.sql
SELECT L_RETURNFLAG, L_LINESTATUS, SUM(L_QUANTITY) AS SUM_QTY, SUM(L_EXTENDEDPRICE) AS SUM_BASE_PRICE, SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS SUM_DISC_PRICE, SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT) * (1 + L_TAX)) AS SUM_CHARGE, AVG(L_QUANTITY) AS AVG_QTY, AVG(L_EXTENDEDPRICE) AS AVG_PRICE, AVG(L_DISCOUNT) AS AVG_DISC, COUNT(*) AS COUNT_ORDER FROM LINEITEM WHERE L_SHIPDATE <= DATE '1998-12-01' - INTERVAL '90' DAY GROUP BY L_RETURNFLAG, L_LINESTATUS ORDER BY L_RETURNFLAG, L_LINESTATUS;
q02
无法执行
q03
- snippet.sql
SELECT L_ORDERKEY, SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS REVENUE, O_ORDERDATE, O_SHIPPRIORITY FROM CUSTOMER, ORDERS, LINEITEM WHERE C_MKTSEGMENT = 'BUILDING' AND C_CUSTKEY = O_CUSTKEY AND L_ORDERKEY = O_ORDERKEY AND O_ORDERDATE < DATE '1995-03-15' AND L_SHIPDATE > DATE '1995-03-15' GROUP BY L_ORDERKEY, O_ORDERDATE, O_SHIPPRIORITY ORDER BY REVENUE DESC, O_ORDERDATE LIMIT 10;
q04
无法执行
q05
- snippet.sql
SELECT N_NAME, SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS REVENUE FROM CUSTOMER, ORDERS, LINEITEM, SUPPLIER, NATION, REGION WHERE C_CUSTKEY = O_CUSTKEY AND L_ORDERKEY = O_ORDERKEY AND L_SUPPKEY = S_SUPPKEY AND C_NATIONKEY = S_NATIONKEY AND S_NATIONKEY = N_NATIONKEY AND N_REGIONKEY = R_REGIONKEY AND R_NAME = 'ASIA' AND O_ORDERDATE >= DATE '1994-01-01' AND O_ORDERDATE < DATE '1994-01-01' + INTERVAL '1' YEAR GROUP BY N_NAME ORDER BY REVENUE DESC;
q18
- snippet.sql
SELECT C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE, SUM(L_QUANTITY) FROM CUSTOMER, ORDERS, LINEITEM WHERE O_ORDERKEY IN ( SELECT L_ORDERKEY FROM LINEITEM GROUP BY L_ORDERKEY HAVING SUM(L_QUANTITY) > 300 ) AND C_CUSTKEY = O_CUSTKEY AND O_ORDERKEY = L_ORDERKEY GROUP BY C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE ORDER BY O_TOTALPRICE DESC, O_ORDERDATE LIMIT 100;
打赏作者以资鼓励: