diff --git a/02_activities/assignments/Cohort_8/Assignment2.md b/02_activities/assignments/Cohort_8/Assignment2.md index 47118b2ba..8510836e8 100644 --- a/02_activities/assignments/Cohort_8/Assignment2.md +++ b/02_activities/assignments/Cohort_8/Assignment2.md @@ -54,7 +54,8 @@ The store wants to keep customer addresses. Propose two architectures for the CU **HINT:** search type 1 vs type 2 slowly changing dimensions. ``` -Your answer... +CUSTOMER_ADDRESS_TYPE1 keeps one current row per customer. Updates overwrite prior values. It can keep the table simple, always current, but there is no history that we can search. +CUSTOMER_ADDRESS_TYPE2 stores multiple rows per customer with the start and end date, effective boolean. It gives the full history, but will need more complex queries and database storage. ``` *** diff --git a/02_activities/assignments/Cohort_8/Assignment2_ERD1.png b/02_activities/assignments/Cohort_8/Assignment2_ERD1.png new file mode 100644 index 000000000..1a13c3475 Binary files /dev/null and b/02_activities/assignments/Cohort_8/Assignment2_ERD1.png differ diff --git a/02_activities/assignments/Cohort_8/Assignment2_ERD2.png b/02_activities/assignments/Cohort_8/Assignment2_ERD2.png new file mode 100644 index 000000000..154ad4b6c Binary files /dev/null and b/02_activities/assignments/Cohort_8/Assignment2_ERD2.png differ diff --git a/02_activities/assignments/Cohort_8/assignment2.sql b/02_activities/assignments/Cohort_8/assignment2.sql index c2743d3b7..5e96e13b5 100644 --- a/02_activities/assignments/Cohort_8/assignment2.sql +++ b/02_activities/assignments/Cohort_8/assignment2.sql @@ -20,8 +20,19 @@ nulls, and 'unit' for the second column with nulls. The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same. */ +SELECT * +FROM product +WHERE product_size IS NULL + OR product_qty_type IS NULL; - +SELECT + product_name + || ', ' + || COALESCE(product_size, '') + || ' (' + || COALESCE(product_qty_type, 'unit') + || ')' AS product_list +FROM product; --Windowed Functions @@ -34,16 +45,43 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ +SELECT + cp.*, + DENSE_RANK() OVER ( + PARTITION BY cp.customer_id + ORDER BY cp.market_date + ) AS visit_number +FROM customer_purchases cp; /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ - +WITH ranked AS ( + SELECT + customer_id, + market_date, + DENSE_RANK() OVER ( + PARTITION BY customer_id + ORDER BY market_date DESC + ) AS rev_visit_number + FROM customer_purchases + GROUP BY customer_id, market_date +) +SELECT customer_id, market_date +FROM ranked +WHERE rev_visit_number = 1 +ORDER BY customer_id; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ +SELECT + cp.*, + COUNT(*) OVER ( + PARTITION BY customer_id, product_id + ) AS times_bought +FROM customer_purchases cp; @@ -58,11 +96,19 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for | Habanero Peppers - Organic | Organic | Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ - - +SELECT + product_name, + CASE + WHEN INSTR(product_name, '-') > 0 + THEN TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1)) + END AS description +FROM product; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ +SELECT * +FROM product +WHERE product_size REGEXP '[0-9]'; -- UNION @@ -76,6 +122,28 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling with a UNION binding them. */ +WITH sales_by_date AS ( + SELECT + market_date, + ROUND(SUM(quantity * cost_to_customer_per_qty), 2) AS total_sales + FROM customer_purchases + GROUP BY market_date +), +ranked AS ( + SELECT + market_date, + total_sales, + RANK() OVER (ORDER BY total_sales DESC) AS r_desc, + RANK() OVER (ORDER BY total_sales ASC) AS r_asc + FROM sales_by_date +) +SELECT 'best_day' AS tag, market_date, total_sales +FROM ranked +WHERE r_desc = 1 +UNION ALL +SELECT 'worst_day' AS tag, market_date, total_sales +FROM ranked +WHERE r_asc = 1; /* SECTION 3 */ @@ -90,7 +158,37 @@ Remember, CROSS JOIN will explode your table rows, so CROSS JOIN should likely b Think a bit about the row counts: how many distinct vendors, product names are there (x)? How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ - +WITH latest_price AS ( + SELECT vendor_id, product_id, original_price + FROM ( + SELECT vi.*, + ROW_NUMBER() OVER ( + PARTITION BY vendor_id, product_id + ORDER BY market_date DESC + ) AS rn + FROM vendor_inventory vi + ) t + WHERE rn = 1 +), +cust AS ( + SELECT COUNT(*) AS customers FROM customer +), +vendor_products AS ( + SELECT v.vendor_name, p.product_name, lp.original_price + FROM latest_price lp + JOIN vendor v USING (vendor_id) + JOIN product p USING (product_id) +) +SELECT + vp.vendor_name, + vp.product_name, + vp.original_price, + c.customers, + 5 AS qty_per_customer, + c.customers * 5 * vp.original_price AS revenue +FROM vendor_products vp +CROSS JOIN cust c +ORDER BY vp.vendor_name; -- INSERT @@ -99,17 +197,34 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ +CREATE TABLE product_units AS +SELECT + p.*, + CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product p +WHERE COALESCE(p.product_qty_type, '') = 'unit'; /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ - +INSERT INTO product_units +SELECT + p.*, + CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product p +WHERE p.product_name = 'Apple Pie'; -- DELETE /* 1. Delete the older record for the whatever product you added. HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ +DELETE FROM product_units +WHERE product_name = 'Apple Pie' + AND snapshot_timestamp < + (SELECT MAX(snapshot_timestamp) + FROM product_units + WHERE product_name = 'Apple Pie'); @@ -130,6 +245,16 @@ Finally, make sure you have a WHERE statement to update the right row, you'll need to use product_units.product_id to refer to the correct row within the product_units table. When you have all of these components, you can run the update statement. */ - +ALTER TABLE product_units +ADD COLUMN current_quantity INT; + +UPDATE product_units AS pu +SET current_quantity = COALESCE(( + SELECT vi.quantity + FROM vendor_inventory vi + WHERE vi.product_id = pu.product_id + ORDER BY vi.market_date DESC, vi.vendor_id DESC + LIMIT 1 +), 0)