...
Expand | |||||
---|---|---|---|---|---|
| |||||
|
First attempts
As the first attempt, we did a logistic regression based on the basic item properties of GA4 (brand, category, and price):
Code Block |
---|
CREATE OR REPLACE TABLE `@reports.Data_PDPPrediction_logistic_reg_v1_11M`
as
SELECT
count(distinct vi.session_id) as sessions
, sum(case when p.session_id is null then 0 else 1 end) as purchases
, vi.items[safe_offset(0)].item_brand
, vi.items[safe_offset(0)].item_category
, vi.items[safe_offset(0)].item_category2
, vi.items[safe_offset(0)].item_category3
, vi.items[safe_offset(0)].item_category4
, vi.items[safe_offset(0)].item_category5
, vi.items[safe_offset(0)].price
FROM
`@core.ga_events_enriched` as vi
left join `@core.ga_events_enriched`as p on vi.session_id = p.session_id and p.date >= date_sub(current_date(), interval 6 month) and p.event_name = 'purchase'
WHERE vi.date >= date_sub(current_date(), interval 6 month) and vi.event_name = 'view_item'
group by item_brand, item_category, item_category2, item_category3, item_category4, item_category5, price
;
CREATE OR REPLACE MODEL `@reports.Model_PDPPrediction_logistic_reg_v1_11M`
OPTIONS(
model_type='logistic_reg'
, CATEGORY_ENCODING_METHOD = 'DUMMY_ENCODING'
, CALCULATE_P_VALUES = TRUE,
ENABLE_GLOBAL_EXPLAIN = TRUE
) AS
SELECT
(IF(purchases >= it,1,0)) AS label
, item_brand
, item_category
, item_category2
, item_category3
, item_category4
, item_category5
, price
FROM
`@reports.Data_PDPPrediction_logistic_reg_v1_11M`
join unnest(GENERATE_ARRAY(1,sessions)) as it
; |
The results showed a quite limited predictive power (which was to be expected with such a basic set of parameters):
...
The Interpretability tab shows some information about the attribution to each parameter, showing that the brand and the category have much higher attribution than the price (but this could be partially because the price is a numeric field):
...
Radical Tracking
As we have released our Radical Tracking on the PDP of some of our customers like https://www.nahrin.ch/de/raclette-mix we did a second attempt considering these visual markers:
Code Block |
---|
CREATE OR REPLACE TABLE `live_nahrin_reports.Data_PDPRadicalPrediction_logistic_reg_v1_11M`
as
SELECT
count(distinct session_id) as sessions
, count(distinct order_id) as purchases
, ifnull((select string_agg(v order by v) from unnest(feature_1) as v where v != ''),'none') as feature_1
, ifnull((select string_agg(v order by v) from unnest(feature_2) as v where v != ''),'none') as feature_2
, ifnull((select string_agg(v order by v) from unnest(feature_3) as v where v != ''),'none') as feature_3
, ifnull((select string_agg(v order by v) from unnest(feature_4) as v where v != ''),'none') as feature_4
, ifnull((select string_agg(v order by v) from unnest(feature_5) as v where v != ''),'none') as feature_5
, ifnull((select string_agg(v order by v) from unnest(feature_6) as v where v != ''),'none') as feature_6
, ifnull((select string_agg(v order by v) from unnest(feature_7) as v where v != ''),'none') as feature_7
, ifnull((select string_agg(v order by v) from unnest(feature_8) as v where v != ''),'none') as feature_8
, ifnull((select string_agg(v order by v) from unnest(feature_9) as v where v != ''),'none') as feature_9
, ifnull((select string_agg(v order by v) from unnest(feature_10) as v where v != ''),'none') as feature_10
from
(
SELECT
s.session_id
, any_value(s.order_id) as order_id
, array_agg(distinct case when feature = 1 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_1
, array_agg(distinct case when feature = 2 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_2
, array_agg(distinct case when feature = 3 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_3
, array_agg(distinct case when feature = 4 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_4
, array_agg(distinct case when feature = 5 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_5
, array_agg(distinct case when feature = 6 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_6
, array_agg(distinct case when feature = 7 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_7
, array_agg(distinct case when feature = 8 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_8
, array_agg(distinct case when feature = 9 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_9
, array_agg(distinct case when feature = 10 then ifnull(concat(v.name, '-', v.value), '-') else '' end) as feature_10
FROM `live_nahrin_views.pdp_radical_tracking_markers`
join unnest(values) as v
join unnest(sessions) as sid
join `live_nahrin_reports.session_primary` as s on s.session_id = sid
group by session_id, feature
)
group by feature_1, feature_2, feature_3, feature_4, feature_5, feature_6, feature_7, feature_8, feature_9, feature_10
;
CREATE OR REPLACE MODEL `live_nahrin_reports.Model_PDPRadicalPrediction_logistic_reg_v1_11M`
OPTIONS(
model_type='logistic_reg'
, CATEGORY_ENCODING_METHOD = 'DUMMY_ENCODING'
, CALCULATE_P_VALUES = TRUE,
ENABLE_GLOBAL_EXPLAIN = TRUE
) AS
SELECT
(IF(purchases >= it,1,0)) AS label
, feature_1, feature_2, feature_3, feature_4, feature_5, feature_6, feature_7, feature_8, feature_9, feature_10
FROM
`live_nahrin_reports.Data_PDPRadicalPrediction_logistic_reg_v1_11M`
join unnest(GENERATE_ARRAY(1,sessions)) as it
; |
However, it is still not very clear how this will work
...
and the features indicated as most important as simply the ones which are detected as so by the feature number logic, so no value so far there in the outcome…
Factor analysis of parameters affecting the performance of product recommendations on the PDP
...
Expand | |||||
---|---|---|---|---|---|
| |||||
|
First attempts
We did the first attempt for the case of accessories recommendations (cross-selling on the PDP) as follows:
...