FAIRYFAR-INTERNAL
 
  FAIRYFAR-INTERNAL  |  SITEMAP  |  ABOUT-ME  |  HOME  
ClickHouse TPC-H测试方法

本文在ClickHouse 20.10.1版本测试通过。

注意:ClickHouse数据类型、表名、字段名等大小写敏感。

一、建表语句

建库:

snippet.sql
CREATE DATABASE tpch1s;

以下建表适用于TPC-H 100s及以下数据量,100s以上数据量,需要将Int32更换为Int64。

snippet.sql
DROP TABLE IF EXISTS NATION;
DROP TABLE IF EXISTS REGION;
DROP TABLE IF EXISTS PART;
DROP TABLE IF EXISTS SUPPLIER;
DROP TABLE IF EXISTS PARTSUPP;
DROP TABLE IF EXISTS CUSTOMER;
DROP TABLE IF EXISTS ORDERS;
DROP TABLE IF EXISTS LINEITEM;
 
CREATE TABLE NATION(
N_NATIONKEY  Int32 NOT NULL,
N_NAME       FixedString(25) NOT NULL,
N_REGIONKEY  Int32 NOT NULL,
N_COMMENT    String)
ENGINE = MergeTree() ORDER BY (N_NATIONKEY) PRIMARY KEY (N_NATIONKEY);
 
CREATE TABLE REGION(
R_REGIONKEY  Int32 NOT NULL,
R_NAME       FixedString(25) NOT NULL,
R_COMMENT    String)
ENGINE = MergeTree() ORDER BY (R_REGIONKEY) PRIMARY KEY (R_REGIONKEY);
 
CREATE TABLE PART(
P_PARTKEY     Int32 NOT NULL,
P_NAME        String NOT NULL,
P_MFGR        FixedString(25) NOT NULL,
P_BRAND       FixedString(10) NOT NULL,
P_TYPE        String NOT NULL,
P_SIZE        Int32 NOT NULL,
P_CONTAINER   FixedString(10) NOT NULL,
P_RETAILPRICE String NOT NULL,
P_COMMENT     String NOT NULL )
ENGINE = MergeTree() ORDER BY (P_PARTKEY) PRIMARY KEY (P_PARTKEY);
 
CREATE TABLE SUPPLIER(
S_SUPPKEY     Int32 NOT NULL,
S_NAME        FixedString(25) NOT NULL,
S_ADDRESS     String NOT NULL,
S_NATIONKEY   Int32 NOT NULL,
S_PHONE       FixedString(15) NOT NULL,
S_ACCTBAL     DECIMAL(15,2) NOT NULL,
S_COMMENT     String NOT NULL)
ENGINE = MergeTree() ORDER BY (S_SUPPKEY) PRIMARY KEY (S_SUPPKEY);
 
CREATE TABLE PARTSUPP(
PS_PARTKEY     Int32 NOT NULL,
PS_SUPPKEY     Int32 NOT NULL,
PS_AVAILQTY    Int32 NOT NULL,
PS_SUPPLYCOST  DECIMAL(15,2)  NOT NULL,
PS_COMMENT     String NOT NULL )
ENGINE = MergeTree() ORDER BY (PS_PARTKEY) PRIMARY KEY (PS_PARTKEY);
 
CREATE TABLE CUSTOMER (
C_CUSTKEY     Int32 NOT NULL,
C_NAME        String NOT NULL,
C_ADDRESS     String NOT NULL,
C_NATIONKEY   Int32 NOT NULL,
C_PHONE       FixedString(15) NOT NULL,
C_ACCTBAL     String   NOT NULL,
C_MKTSEGMENT  FixedString(10) NOT NULL,
C_COMMENT     String NOT NULL)
ENGINE = MergeTree() ORDER BY (C_CUSTKEY) PRIMARY KEY (C_CUSTKEY);
 
CREATE TABLE ORDERS(
O_ORDERKEY       Int32 NOT NULL,
O_CUSTKEY        Int32 NOT NULL,
O_ORDERSTATUS    FixedString(1) NOT NULL,
O_TOTALPRICE     DECIMAL(15,2) NOT NULL,
O_ORDERDATE      DATE NOT NULL,
O_ORDERPRIORITY  FixedString(15) NOT NULL,  
O_CLERK          FixedString(15) NOT NULL, 
O_SHIPPRIORITY   Int32 NOT NULL,
O_COMMENT        String NOT NULL)
ENGINE = MergeTree() ORDER BY (O_ORDERKEY) PRIMARY KEY (O_ORDERKEY);
 
CREATE TABLE LINEITEM(
L_ORDERKEY    Int32 NOT NULL,
L_PARTKEY     Int32 NOT NULL,
L_SUPPKEY     Int32 NOT NULL,
L_LINENUMBER  Int32 NOT NULL,
L_QUANTITY    DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE  DECIMAL(15,2) NOT NULL,
L_DISCOUNT    DECIMAL(15,2) NOT NULL,
L_TAX         DECIMAL(15,2) NOT NULL,
L_RETURNFLAG  FixedString(1) NOT NULL,
L_LINESTATUS  FixedString(1) NOT NULL,
L_SHIPDATE    DATE NOT NULL,
L_COMMITDATE  DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT FixedString(25) NOT NULL,
L_SHIPMODE     FixedString(10) NOT NULL,
L_COMMENT      String NOT NULL)
ENGINE = MergeTree() ORDER BY (L_ORDERKEY) PRIMARY KEY (L_ORDERKEY);

二、加载数据

加载源数据表使用CSV格式,使用竖线( | )分割列。

snippet.bash
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO NATION FORMAT CSV" < ~/nation.tbl
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO REGION FORMAT CSV" < ~/region.tbl
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO PART FORMAT CSV" < ~/part.tbl
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO SUPPLIER FORMAT CSV" < ~/supplier.tbl
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO PARTSUPP FORMAT CSV" < ~/partsupp.tbl
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO CUSTOMER FORMAT CSV" < ~/customer.tbl
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO ORDERS FORMAT CSV" < ~/orders.tbl
clickhouse-client -d tpch1s --format_csv_delimiter="|" --query "INSERT INTO LINEITEM FORMAT CSV" < ~/lineitem.tbl

三、查询

q01

snippet.sql
SELECT
	L_RETURNFLAG,
	L_LINESTATUS,
	SUM(L_QUANTITY) AS SUM_QTY,
	SUM(L_EXTENDEDPRICE) AS SUM_BASE_PRICE,
	SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS SUM_DISC_PRICE,
	SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT) * (1 + L_TAX)) AS SUM_CHARGE,
	AVG(L_QUANTITY) AS AVG_QTY,
	AVG(L_EXTENDEDPRICE) AS AVG_PRICE,
	AVG(L_DISCOUNT) AS AVG_DISC,
	COUNT(*) AS COUNT_ORDER
FROM
	LINEITEM
WHERE
	L_SHIPDATE <= DATE '1998-12-01' - INTERVAL '90' DAY
GROUP BY
	L_RETURNFLAG,
	L_LINESTATUS
ORDER BY
	L_RETURNFLAG,
	L_LINESTATUS;

q02

无法执行

q03

snippet.sql
SELECT
	L_ORDERKEY,
	SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS REVENUE,
	O_ORDERDATE,
	O_SHIPPRIORITY
FROM
	CUSTOMER,
	ORDERS,
	LINEITEM
WHERE
	C_MKTSEGMENT = 'BUILDING'
	AND C_CUSTKEY = O_CUSTKEY
	AND L_ORDERKEY = O_ORDERKEY
	AND O_ORDERDATE < DATE '1995-03-15'
	AND L_SHIPDATE > DATE '1995-03-15'
GROUP BY
	L_ORDERKEY,
	O_ORDERDATE,
	O_SHIPPRIORITY
ORDER BY
	REVENUE DESC,
	O_ORDERDATE
LIMIT 10;

q04

无法执行

q05

snippet.sql
SELECT
	N_NAME,
	SUM(L_EXTENDEDPRICE * (1 - L_DISCOUNT)) AS REVENUE
FROM
	CUSTOMER,
	ORDERS,
	LINEITEM,
	SUPPLIER,
	NATION,
	REGION
WHERE
	C_CUSTKEY = O_CUSTKEY
	AND L_ORDERKEY = O_ORDERKEY
	AND L_SUPPKEY = S_SUPPKEY
	AND C_NATIONKEY = S_NATIONKEY
	AND S_NATIONKEY = N_NATIONKEY
	AND N_REGIONKEY = R_REGIONKEY
	AND R_NAME = 'ASIA'
	AND O_ORDERDATE >= DATE '1994-01-01'
	AND O_ORDERDATE < DATE '1994-01-01' + INTERVAL '1' YEAR
GROUP BY
	N_NAME
ORDER BY
	REVENUE DESC;

q18

snippet.sql
SELECT
        C_NAME,
        C_CUSTKEY,
        O_ORDERKEY,
        O_ORDERDATE,
        O_TOTALPRICE,
        SUM(L_QUANTITY)
FROM
        CUSTOMER,
        ORDERS,
        LINEITEM
WHERE
        O_ORDERKEY IN (
                SELECT
                        L_ORDERKEY
                FROM
                        LINEITEM
                GROUP BY
                        L_ORDERKEY HAVING
                                SUM(L_QUANTITY) > 300
        )
        AND C_CUSTKEY = O_CUSTKEY
        AND O_ORDERKEY = L_ORDERKEY
GROUP BY
        C_NAME,
        C_CUSTKEY,
        O_ORDERKEY,
        O_ORDERDATE,
        O_TOTALPRICE
ORDER BY
        O_TOTALPRICE DESC,
        O_ORDERDATE
LIMIT 100;


打赏作者以资鼓励:
移动端扫码阅读: