正常情况下,浅度关联分析并不一定需要图数据库,这里我们就当是热热身,回顾一下 GSQL 语法。同时读者们也可以对比一下,在浅度关联分析下,图数据库对比关系型数据库是否存在优势。
CREATE QUERY find_risky_imeis(INT th_num_phones_per_imei=3) FOR GRAPH MyGraph {
SetAccum<VERTEX> @@risky_imeis, @@risky_accounts, @@risky_orders;
all_imeis = {IMEI.*};
accounts =
SELECT t
FROM all_imeis:s -(use_imei:e)-> Account:t
WHERE s.outdegree("use_imei") >= th_num_phones_per_imei
ACCUM @@risky_imeis += s,
@@risky_accounts += t
;
orders =
SELECT t
FROM accounts:s -(send_bonus:e)-> BonusOrder:t
ACCUM @@risky_orders += t
;
PRINT @@risky_imeis AS imeis,
@@risky_accounts AS accounts,
@@risky_orders AS orders
;
}
find_risky_order_recvrs.gsql
CREATE QUERY find_risky_order_recvrs(INT th_num_sendrs_per_recvrs=3) FOR GRAPH MyGraph {
SetAccum<VERTEX> @order_sendrs;
SetAccum<VERTEX> @@risky_recvrs, @@risky_accounts, @@risky_orders;
all_accounts = {Account.*};
orders =
SELECT t
FROM all_accounts:s -(send_bonus:e)-> BonusOrder:t
ACCUM t.@order_sendrs += s
;
recvrs =
SELECT s
FROM orders:s -(recv_bonus:e)-> Account:t
ACCUM t.@order_sendrs += s.@order_sendrs
;
_t0 =
SELECT t
FROM orders:s -(recv_bonus:e)-> Account:t
WHERE t.@order_sendrs.size() >= th_num_sendrs_per_recvrs
ACCUM @@risky_recvrs += t,
@@risky_accounts += t.@order_sendrs,
@@risky_orders += s
;
PRINT @@risky_recvrs AS recvrs,
@@risky_accounts AS accounts,
@@risky_orders AS orders
;
}
上面的实现方式可能有点儿绕,另外一种可能的设计,是在 Schema 中,直接建立一条 Account -(send_bonus)-> Account 边。大家也可以思考一下这两种设计上的区别以及各自的优缺点。