train classification model from a dataset created by a query

CREATE OR REPLACE MODEL `ecommerce.classification_model`
OPTIONS
(
model_type='logistic_reg',
labels = ['will_buy_on_return_visit']
)
AS
#standardSQL
SELECT
  * EXCEPT(fullVisitorId)
FROM
  # features
  (SELECT
    fullVisitorId,
    IFNULL(totals.bounces, 0) AS bounces,
    IFNULL(totals.timeOnSite, 0) AS time_on_site
  FROM
    `data-to-insights.ecommerce.web_analytics`
  WHERE
    totals.newVisits = 1
    AND date BETWEEN '20160801' AND '20170430') # train on first 9 months
  JOIN
  (SELECT
    fullvisitorid,
    IF(COUNTIF(totals.transactions > 0 AND totals.newVisits IS NULL) > 0, 1, 0) AS will_buy_on_return_visit
  FROM
      `data-to-insights.ecommerce.web_analytics`
  GROUP BY fullvisitorid)
  USING (fullVisitorId)
;

evaluate roc auc score

SELECT
  roc_auc,
  CASE
    WHEN roc_auc > .9 THEN 'good'
    WHEN roc_auc > .8 THEN 'fair'
    WHEN roc_auc > .7 THEN 'not great'
  ELSE
  'poor'
END
  AS model_quality
FROM
  ML.EVALUATE(MODEL ecommerce.classification_model,
    (
    SELECT
      * EXCEPT(fullVisitorId)
    FROM
      # features
      (
      SELECT
        fullVisitorId,
        IFNULL(totals.bounces,
          0) AS bounces,
        IFNULL(totals.timeOnSite,
          0) AS time_on_site
      FROM
        `data-to-insights.ecommerce.web_analytics`
      WHERE
        totals.newVisits = 1
        AND date BETWEEN '20170501'
        AND '20170630') # eval on 2 months
    JOIN (
      SELECT
        fullvisitorid,
      IF
        (COUNTIF(totals.transactions > 0
            AND totals.newVisits IS NULL) > 0,
          1,
          0) AS will_buy_on_return_visit
      FROM
        `data-to-insights.ecommerce.web_analytics`
      GROUP BY
        fullvisitorid)
    USING
      (fullVisitorId) ));

predict with trained model on the later dataset

SELECT
  *
FROM
  ml.PREDICT(MODEL `ecommerce.classification_model_2`,
    (
    WITH
      all_visitor_stats AS (
      SELECT
        fullvisitorid,
      IF
        (COUNTIF(totals.transactions > 0
            AND totals.newVisits IS NULL) > 0,
          1,
          0) AS will_buy_on_return_visit
      FROM
        `data-to-insights.ecommerce.web_analytics`
      GROUP BY
        fullvisitorid )
    SELECT
      CONCAT(fullvisitorid, '-',CAST(visitId AS STRING)) AS unique_session_id,
      # labels
      will_buy_on_return_visit,
      MAX(CAST(h.eCommerceAction.action_type AS INT64)) AS latest_ecommerce_progress,
      # behavior on the site
      IFNULL(totals.bounces,
        0) AS bounces,
      IFNULL(totals.timeOnSite,
        0) AS time_on_site,
      totals.pageviews,
      # where the visitor came from
      trafficSource.source,
      trafficSource.medium,
      channelGrouping,
      # mobile or desktop
      device.deviceCategory,
      # geographic
      IFNULL(geoNetwork.country,
        "") AS country
    FROM
      `data-to-insights.ecommerce.web_analytics`,
      UNNEST(hits) AS h
    JOIN
      all_visitor_stats
    USING
      (fullvisitorid)
    WHERE
      # only predict for new visits
      totals.newVisits = 1
      AND date BETWEEN '20170701'
      AND '20170801' # test 1 month
    GROUP BY
      unique_session_id,
      will_buy_on_return_visit,
      bounces,
      time_on_site,
      totals.pageviews,
      trafficSource.source,
      trafficSource.medium,
      channelGrouping,
      device.deviceCategory,
      country ) )
ORDER BY
  predicted_will_buy_on_return_visit DESC;

types of models available

https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create#model_type

Deep learning model creation from sql syntax ref

https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create-dnn-models

XGBoost model creation from sql syntax ref

https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create-boosted-tree

AutoML syntax and ref for sql in BQ

https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create-automl