## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(data.table) library(DBmaps) ## ----setup_metadata----------------------------------------------------------- # Define metadata for each table customers_meta <- table_info("customers", "c.csv", "customer_id", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="region"))))) products_meta <- table_info("products", "p.csv", "product_id", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="category"))))) transactions_meta <- table_info("transactions", "t.csv", "trans_id", list( list(OutcomeName="rev", ValueExpression=1, AggregationMethods=list( # This grouping variable will match the primary key of 'customers' list(AggregatedName="a", AggregationFunction="sum", GroupingVariables="customer_id"), # This one will match the primary key of 'products' list(AggregatedName="b", AggregationFunction="sum", GroupingVariables="product_id") )) )) # Combine into a master metadata object master_meta <- rbindlist(list(customers_meta, products_meta, transactions_meta)) ## ----metadata_only_run-------------------------------------------------------- # Find paths without looking at the data metadata_paths <- map_join_paths(master_meta) print(metadata_paths) ## ----multi_key_setup---------------------------------------------------------- daily_promos_meta <- table_info("daily_promos", "d.csv", c("product_id", "region"), list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="region"))))) # Add a grouping variable to transactions that matches this composite key transactions_multi_meta <- table_info("transactions", "t.csv", "trans_id", list( list(OutcomeName="rev", ValueExpression=1, AggregationMethods=list( list(AggregatedName="promo_rev", AggregationFunction="sum", GroupingVariables=c("product_id", "region")) )) )) multi_key_meta <- rbindlist(list(daily_promos_meta, transactions_multi_meta)) ## ----multi_key_run------------------------------------------------------------ multi_key_paths <- map_join_paths(multi_key_meta) print(multi_key_paths) ## ----inferred_setup----------------------------------------------------------- # Define the data inventory_data <- data.table(sku = c("s1", "s2", "s3"), stock = c(10, 20, 5)) orders_data <- data.table(order_id = 1:2, customer_ref = "c1", product_code = c("s1", "s2")) data_list <- list( inventory = inventory_data, orders = orders_data ) # Define the metadata. Note the mismatched names. inventory_meta <- table_info("inventory", "i.csv", "sku", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="stock"))))) orders_meta <- table_info("orders", "o.csv", "order_id", list(list(OutcomeName="x",ValueExpression=1,AggregationMethods=list(list(AggregatedName="y",AggregationFunction="z",GroupingVariables="product_code"))))) inferred_meta <- rbindlist(list(inventory_meta, orders_meta)) ## ----inferred_run------------------------------------------------------------- inferred_paths <- map_join_paths(inferred_meta, data_list = data_list) print(inferred_paths)