diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md
index 6cc76d916..7e52301b4 100644
--- a/02_activities/assignments/Assignment2.md
+++ b/02_activities/assignments/Assignment2.md
@@ -32,6 +32,7 @@ Steps to complete this part of the assignment:
- Write, within this markdown file, an answer to Prompt 3
+
### Design a Logical Model
#### Prompt 1
@@ -43,7 +44,12 @@ Additionally, include a date table.
There are several tools online you can use, I'd recommend [Draw.io](https://www.drawio.com/) or [LucidChart](https://www.lucidchart.com/pages/).
+
+**HINT:** You do not need to create any data for this prompt. This is a logical model (ERD) only.
+
**HINT:** You do not need to create any data for this prompt. This is a conceptual model only.
+# Created base bookstore schema with core tables
+
#### Prompt 2
We want to create employee shifts, splitting up the day into morning and evening. Add this to the ERD.
@@ -57,7 +63,9 @@ The store wants to keep customer addresses. Propose two architectures for the CU
Your answer...
```
-***
+# The Type 1 architecture is where the CUSTOMER_ADDRESS simply overwrites old information when a customer's address changes. This saves storage space but loses historical data.
+
+# The Type 2 architecture is the model where the CUSTOMER_ADDRESS table retains historical address changes by creating new records with effective_date, end_date, and an is_current flag. This method preserves historical data, allowing us to know past addresses, but it requires more storage and slightly more complex queries (for example, finding the current address where is_current = TRUE or end_date IS NULL).
## Section 2:
You can start this section following *session 4*.
diff --git a/02_activities/assignments/FirstModel-assignment-two.jpg b/02_activities/assignments/FirstModel-assignment-two.jpg
new file mode 100644
index 000000000..0a344c33e
Binary files /dev/null and b/02_activities/assignments/FirstModel-assignment-two.jpg differ
diff --git a/02_activities/assignments/SecondModel-Type-1_assignment-two.jpg b/02_activities/assignments/SecondModel-Type-1_assignment-two.jpg
new file mode 100644
index 000000000..a4f6c1109
Binary files /dev/null and b/02_activities/assignments/SecondModel-Type-1_assignment-two.jpg differ
diff --git a/02_activities/assignments/SecondModel-Type2_assignment-two.jpg b/02_activities/assignments/SecondModel-Type2_assignment-two.jpg
new file mode 100644
index 000000000..a3477efbb
Binary files /dev/null and b/02_activities/assignments/SecondModel-Type2_assignment-two.jpg differ
diff --git a/02_activities/assignments/assignment2-db browser.sql b/02_activities/assignments/assignment2-db browser.sql
new file mode 100644
index 000000000..d666409f0
--- /dev/null
+++ b/02_activities/assignments/assignment2-db browser.sql
@@ -0,0 +1,180 @@
+--COALESCE
+SELECT
+ product_name || ', ' || COALESCE(product_size, '') || ' (' || COALESCE(product_qty_type, 'unit') || ')' AS product_full_description
+FROM product;
+
+--Windowed Functions
+--Q1
+SELECT
+ customer_id,
+ market_date,
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
+FROM customer_purchases;
+
+SELECT
+ customer_id,
+ market_date,
+ DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
+FROM customer_purchases;
+
+--Q2
+WITH visit_ranking AS (
+ SELECT
+ customer_id,
+ market_date,
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS rn
+ FROM customer_purchases
+)
+SELECT
+ customer_id,
+ market_date
+FROM visit_ranking
+WHERE rn = 1;
+
+SELECT
+ customer_id,
+ product_id,
+ COUNT(*) OVER (PARTITION BY customer_id, product_id) AS purchase_count
+FROM customer_purchases;
+
+--String manipulation
+SELECT
+ product_name,
+ CASE
+ WHEN INSTR(product_name, '-') > 0 THEN TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1))
+ ELSE NULL
+ END AS description
+FROM product;
+
+SELECT
+ product_name,
+ product_size
+FROM product
+WHERE product_size REGEXP '[0-9]';
+
+-- UNION
+WITH daily_sales AS (
+ SELECT
+ market_date,
+ SUM(quantity * cost_to_customer_per_qty) AS total_sales
+ FROM customer_purchases
+ GROUP BY market_date
+),
+ranked_sales AS (
+ SELECT
+ market_date,
+ total_sales,
+ RANK() OVER (ORDER BY total_sales DESC) AS sales_rank_high,
+ RANK() OVER (ORDER BY total_sales ASC) AS sales_rank_low
+ FROM daily_sales
+)
+
+SELECT market_date, total_sales, 'Highest Sales' AS sales_type
+FROM ranked_sales
+WHERE sales_rank_high = 1
+
+UNION
+
+SELECT market_date, total_sales, 'Lowest Sales' AS sales_type
+FROM ranked_sales
+WHERE sales_rank_low = 1
+
+ORDER BY total_sales DESC;
+
+
+
+-- Cross Join
+WITH product_vendor_info AS (
+ SELECT
+ v.vendor_name,
+ p.product_name,
+ p.product_id,
+ vi.vendor_id,
+ vi.original_price AS product_price
+ FROM vendor_inventory vi
+ JOIN product p ON vi.product_id = p.product_id
+ JOIN vendor v ON vi.vendor_id = v.vendor_id
+),
+cross_joined_sales AS (
+ SELECT
+ pvi.vendor_name,
+ pvi.product_name,
+ pvi.product_price,
+ c.customer_id,
+ 5 AS quantity -- Each customer buys 5 of each product
+ FROM product_vendor_info pvi
+ CROSS JOIN customer c
+)
+
+SELECT
+ vendor_name,
+ product_name,
+ SUM(quantity * product_price) AS potential_revenue
+FROM cross_joined_sales
+GROUP BY vendor_name, product_name
+ORDER BY vendor_name, product_name;
+
+
+-- First drop table if it exists to avoid errors
+DROP TABLE IF EXISTS product_units;
+
+-- Then create the table
+CREATE TABLE product_units AS
+SELECT
+ product_id,
+ product_name,
+ product_size,
+ product_category_id,
+ product_qty_type,
+ CURRENT_TIMESTAMP AS snapshot_timestamp
+FROM
+ product
+WHERE
+ product_qty_type = 'unit';
+
+-- 2. Insert a new unit product with current timestamp
+INSERT INTO product_units (
+ product_id,
+ product_name,
+ product_size,
+ product_category_id,
+ product_qty_type,
+ snapshot_timestamp
+)
+VALUES (
+ (SELECT COALESCE(MAX(product_id), 0) + 1 FROM product_units), -- Auto-increment ID
+ 'Gourmet Apple Pie', -- New product name
+ '10 inch', -- Product size
+ (SELECT product_category_id FROM product WHERE product_name LIKE '%Pie%' LIMIT 1), -- Matching category
+ 'unit', -- Quantity type
+ CURRENT_TIMESTAMP -- Current timestamp
+);
+
+-- DELETE
+/* 1. Delete the older record for the whatever product you added. */
+DELETE FROM product_units
+WHERE product_id = (
+ SELECT product_id
+ FROM product_units
+ WHERE product_name = 'Gourmet Apple Pie'
+ ORDER BY snapshot_timestamp ASC
+ LIMIT 1
+);
+
+-- UPDATE
+/* 1. Add current_quantity to product_units and update with last quantity from vendor_inventory */
+
+-- First add the column
+ALTER TABLE product_units
+ADD current_quantity INT;
+
+-- Then update with the last quantity values
+UPDATE product_units
+SET current_quantity = COALESCE(
+ (SELECT quantity
+ FROM vendor_inventory vi
+ WHERE vi.product_id = product_units.product_id
+ ORDER BY market_date DESC, vendor_id DESC
+ LIMIT 1),
+ 0
+);
diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql
index 5ad40748a..7ddb32ac1 100644
--- a/02_activities/assignments/assignment2.sql
+++ b/02_activities/assignments/assignment2.sql
@@ -20,6 +20,10 @@ The `||` values concatenate the columns into strings.
Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed.
All the other rows will remain the same.) */
+SELECT
+ product_name || ', ' || COALESCE(product_size, '') || ' (' || COALESCE(product_qty_type, 'unit') || ')' AS product_full_description
+FROM product;
+
--Windowed Functions
@@ -32,6 +36,21 @@ each new market date for each customer, or select only the unique market dates p
(without purchase details) and number those visits.
HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */
+--Option 1
+SELECT
+ customer_id,
+ market_date,
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
+FROM customer_purchases;
+
+--Option 2
+SELECT
+ customer_id,
+ market_date,
+ DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
+FROM customer_purchases;
+
+
/* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1,
@@ -39,10 +58,33 @@ then write another query that uses this one as a subquery (or temp table) and fi
only the customer’s most recent visit. */
+WITH visit_ranking AS (
+ SELECT
+ customer_id,
+ market_date,
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS rn
+ FROM customer_purchases
+)
+SELECT
+ customer_id,
+ market_date
+FROM visit_ranking
+WHERE rn = 1;
+
+
+
+
/* 3. Using a COUNT() window function, include a value along with each row of the
customer_purchases table that indicates how many different times that customer has purchased that product_id. */
+SELECT
+ customer_id,
+ product_id,
+ COUNT(*) OVER (PARTITION BY customer_id, product_id) AS purchase_count
+FROM customer_purchases;
+
+
-- String manipulations
@@ -57,10 +99,22 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for
Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */
+SELECT
+ product_name,
+ CASE
+ WHEN INSTR(product_name, '-') > 0 THEN TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1))
+ ELSE NULL
+ END AS description
+FROM product;
/* 2. Filter the query to show any product_size value that contain a number with REGEXP. */
+SELECT
+ product_name,
+ product_size
+FROM product
+WHERE product_size REGEXP '[0-9]';
-- UNION
@@ -73,6 +127,33 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling
3) Query the second temp table twice, once for the best day, once for the worst day,
with a UNION binding them. */
+WITH daily_sales AS (
+ SELECT
+ market_date,
+ SUM(quantity * cost_to_customer_per_qty) AS total_sales
+ FROM customer_purchases
+ GROUP BY market_date
+),
+ranked_sales AS (
+ SELECT
+ market_date,
+ total_sales,
+ RANK() OVER (ORDER BY total_sales DESC) AS sales_rank_high,
+ RANK() OVER (ORDER BY total_sales ASC) AS sales_rank_low
+ FROM daily_sales
+)
+
+SELECT market_date, total_sales, 'Highest Sales' AS sales_type
+FROM ranked_sales
+WHERE sales_rank_high = 1
+
+UNION
+
+SELECT market_date, total_sales, 'Lowest Sales' AS sales_type
+FROM ranked_sales
+WHERE sales_rank_low = 1
+
+ORDER BY total_sales DESC;
@@ -89,7 +170,36 @@ Think a bit about the row counts: how many distinct vendors, product names are t
How many customers are there (y).
Before your final group by you should have the product of those two queries (x*y). */
-
+-- Cross Join
+WITH product_vendor_info AS (
+ SELECT
+ v.vendor_name,
+ p.product_name,
+ p.product_id,
+ vi.vendor_id,
+ vi.original_price AS product_price
+ FROM vendor_inventory vi
+ JOIN product p ON vi.product_id = p.product_id
+ JOIN vendor v ON vi.vendor_id = v.vendor_id
+),
+cross_joined_sales AS (
+ SELECT
+ pvi.vendor_name,
+ pvi.product_name,
+ pvi.product_price,
+ c.customer_id,
+ 5 AS quantity -- Each customer buys 5 of each product
+ FROM product_vendor_info pvi
+ CROSS JOIN customer c
+)
+
+SELECT
+ vendor_name,
+ product_name,
+ SUM(quantity * product_price) AS potential_revenue
+FROM cross_joined_sales
+GROUP BY vendor_name, product_name
+ORDER BY vendor_name, product_name;
-- INSERT
/*1. Create a new table "product_units".
@@ -97,11 +207,43 @@ This table will contain only products where the `product_qty_type = 'unit'`.
It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`.
Name the timestamp column `snapshot_timestamp`. */
+-- First drop table if it exists to avoid errors
+DROP TABLE IF EXISTS product_units;
+-- Then create the table
+CREATE TABLE product_units AS
+SELECT
+ product_id,
+ product_name,
+ product_size,
+ product_category_id,
+ product_qty_type,
+ CURRENT_TIMESTAMP AS snapshot_timestamp
+FROM
+ product
+WHERE
+ product_qty_type = 'unit';
/*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp).
This can be any product you desire (e.g. add another record for Apple Pie). */
+-- 2. Insert a new unit product with current timestamp
+INSERT INTO product_units (
+ product_id,
+ product_name,
+ product_size,
+ product_category_id,
+ product_qty_type,
+ snapshot_timestamp
+)
+VALUES (
+ (SELECT COALESCE(MAX(product_id), 0) + 1 FROM product_units), -- Auto-increment ID
+ 'Gourmet Apple Pie', -- New product name
+ '10 inch', -- Product size
+ (SELECT product_category_id FROM product WHERE product_name LIKE '%Pie%' LIMIT 1), -- Matching category
+ 'unit', -- Quantity type
+ CURRENT_TIMESTAMP -- Current timestamp
+);
-- DELETE
@@ -109,7 +251,16 @@ This can be any product you desire (e.g. add another record for Apple Pie). */
HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/
-
+-- DELETE
+/* 1. Delete the older record for the whatever product you added. */
+DELETE FROM product_units
+WHERE product_id = (
+ SELECT product_id
+ FROM product_units
+ WHERE product_name = 'Gourmet Apple Pie'
+ ORDER BY snapshot_timestamp ASC
+ LIMIT 1
+);
-- UPDATE
/* 1.We want to add the current_quantity to the product_units table.
@@ -128,6 +279,22 @@ Finally, make sure you have a WHERE statement to update the right row,
you'll need to use product_units.product_id to refer to the correct row within the product_units table.
When you have all of these components, you can run the update statement. */
+-- UPDATE
+/* 1. Add current_quantity to product_units and update with last quantity from vendor_inventory */
+
+-- First add the column
+ALTER TABLE product_units
+ADD current_quantity INT;
+-- Then update with the last quantity values
+UPDATE product_units
+SET current_quantity = COALESCE(
+ (SELECT quantity
+ FROM vendor_inventory vi
+ WHERE vi.product_id = product_units.product_id
+ ORDER BY market_date DESC, vendor_id DESC
+ LIMIT 1),
+ 0
+);
diff --git a/04_this_cohort/live_code/module_3/DATES.sql b/04_this_cohort/live_code/module_3/DATES.sql
new file mode 100644
index 000000000..5dc4d9813
--- /dev/null
+++ b/04_this_cohort/live_code/module_3/DATES.sql
@@ -0,0 +1,27 @@
+-- dates
+
+--now
+
+SELECT DISTINCT
+DATE('now') as [now]
+,DATETIME() as [rightnow]
+
+--strftime
+,strftime('%Y/%m','now')
+,strftime('%Y-%m-%d', '2025-04-01','+50 days') as the_future
+,market_date
+,strftime('%m-%d-%Y', market_date, '+50 days', '-1 year') as the_past
+
+--dateadd
+--last date of LAST month
+,DATE(market_date,'start of month','-1 day') as end_of_prev_month
+
+--datediff "equiv"
+,market_date
+,julianday('now') - julianday(market_date) -- number of days between now and each market_date
+,(julianday('now') - julianday(market_date)) / 365.25 -- number of YEARS between now and market_date
+,(julianday('now') - julianday(market_date)) * 24 -- number of HOURS between now and each market_date
+
+
+FROM market_date_info
+
diff --git a/04_this_cohort/live_code/module_4/IFNULL_coalesce_NULLIF.sql b/04_this_cohort/live_code/module_4/IFNULL_coalesce_NULLIF.sql
new file mode 100644
index 000000000..ae8e40694
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/IFNULL_coalesce_NULLIF.sql
@@ -0,0 +1,22 @@
+--IFNULL and coalesce + NULLIF
+
+SELECT *
+,IFNULL(product_size, 'Unknown')
+
+--replacing with another COLUMN
+,IFNULL(product_size,product_qty_type)
+,coalesce(product_size, product_qty_type, 'missing') -- the first value is null, then the second value, then the third value if 2nd is null
+,IFNULL(IFNULL(product_size, product_qty_type),'missing') -- same as above but with two ifnulls
+
+FROM product;
+
+
+SELECT *
+,coalesce(product_size, 'Unknown') -- we aren't handling the blank
+nullif
+,NULLIF(product_size,'') -- finding values in product_size that are "blanks" and setting it equal to NULL
+,coalesce(NULLIF(product_size,''), 'unknown')
+
+FROM product
+
+WHERE NULLIF(product_size, '') IS NULL -- captures BOTH nulls and blanks in one!
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql b/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql
new file mode 100644
index 000000000..a54f6b9e3
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql
@@ -0,0 +1,29 @@
+-- INTERSECT / EXCEPT
+
+--products that have been sold (e.g. are in the customer_purchases AND product)
+-- direction does not matter
+
+SELECT product_id
+FROM customer_purchases
+INTERSECT
+SELECT product_id
+FROM product;
+
+--products that have NOT been sold (e.g. are NOT in customer_purchases even though they are in product)
+SELECT x.product_id, product_name
+FROM (
+ SELECT product_id
+ FROM product
+ EXCEPT
+ SELECT product_id
+ FROM customer_purchases
+) x
+JOIN product p on x.product_id = p.product_id
+
+--NOTHING!
+-- direction matters....
+SELECT product_id
+FROM customer_purchases
+EXCEPT
+SELECT product_id
+FROM product
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/NTILE.sql b/04_this_cohort/live_code/module_4/NTILE.sql
new file mode 100644
index 000000000..9e8bb5ae1
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/NTILE.sql
@@ -0,0 +1,29 @@
+--ntile 4,5,100
+
+--make quartiles, make quintiles, make percentiles
+
+-- daily sales by ntiles
+
+SELECT *
+,NTILE(4) OVER (PARTITION BY vendor_name ORDER BY sales ASC) as quartile
+,NTILE(5) OVER (PARTITION BY vendor_name ORDER BY sales ASC) as quintile
+,NTILE(100) OVER (PARTITION BY vendor_name ORDER BY sales ASC) as percentile
+
+
+FROM (
+ SELECT md.market_date
+ ,market_day
+ ,market_week
+ ,market_year
+ ,vendor_name
+ ,sum(quantity*cost_to_customer_per_qty) as sales
+
+
+ FROM market_date_info md
+ JOIN customer_purchases cp
+ ON md.market_date = cp.market_date
+ JOIN vendor v
+ ON cp.vendor_id = v.vendor_id
+
+ GROUP BY md.market_date, v.vendor_id
+) x
diff --git a/04_this_cohort/live_code/module_4/ROW_NUMBER.sql b/04_this_cohort/live_code/module_4/ROW_NUMBER.sql
new file mode 100644
index 000000000..ffbaf9948
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/ROW_NUMBER.sql
@@ -0,0 +1,41 @@
+-- row_number
+-- what product is the highest price per vendor
+
+--outer query
+SELECT x.*, product_name
+FROM
+--inner QUERY
+(
+ SELECT vendor_id
+ ,market_date
+ ,product_id
+ ,original_price
+ ,ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as price_rank
+ /* ROW_NUMBER() OVER(PARTITION BY vendor_id, product_id ORDER BY original_price DESC) as price_rank
+ if we add product_id, we could potentially track the time the original_price was HIGHEST PER PRODUCT too
+ */
+ FROM vendor_inventory
+) x
+INNER JOIN product p
+ ON x.product_id = p.product_id
+
+WHERE x.price_rank = 1;
+
+-- highest single purchase in a day per customer
+
+SELECT *
+FROM (
+ SELECT
+ customer_id
+ ,product_id
+ ,market_date
+ ,quantity
+ ,quantity*cost_to_customer_per_qty as cost
+ ,ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY quantity*cost_to_customer_per_qty DESC) as sales_rank
+
+ FROM customer_purchases
+) x
+WHERE x.sales_rank = 1
+
+ORDER BY cost DESC
+
diff --git a/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql b/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql
new file mode 100644
index 000000000..9ea09076f
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql
@@ -0,0 +1,31 @@
+--UNION/UNION ALL
+
+--most and least expensive product per vendor with a UNION
+
+SELECT vendor_id, product_id, original_price, rn_max as [row_number]
+FROM
+(
+ SELECT DISTINCT
+ vendor_id
+ ,product_id
+ ,original_price
+ ,row_number() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as rn_max
+
+ FROM vendor_inventory
+)
+where rn_max = 1
+
+UNION -- union returned 5 rows...UNION all returned 6 rows (vendor #4 duplicated)
+
+SELECT *
+FROM
+(
+ SELECT DISTINCT
+ vendor_id
+ ,product_id
+ ,original_price
+ ,ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price ASC) as rn_min
+
+ FROM vendor_inventory
+)
+where rn_min = 1
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/budget_coalesce_NULLIF.sql b/04_this_cohort/live_code/module_4/budget_coalesce_NULLIF.sql
new file mode 100644
index 000000000..51f215cba
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/budget_coalesce_NULLIF.sql
@@ -0,0 +1,24 @@
+
+-- create a budge temp table
+DROP TABLE IF EXISTS temp.budgets;
+
+-- here i am specifying the column types, this was asked, so budget is a string, current year is an integer, prev year also int
+CREATE TEMP TABLE IF NOT EXISTS temp.budgets (budget STRING, current_year INT, previous_year INT);
+
+
+--nothing is yet in budget
+INSERT INTO temp.budgets
+
+-- so put as row 1
+VALUES ('software',1000,1000)
+--and row 2
+, ('candles',300,500);
+
+--show me the average difference in years
+--NULLIF, if the numbers are the same, then NULL
+--COALESCE, if the result is NULL then 0.00
+--average across the values = change in years
+SELECT AVG(COALESCE(NULLIF(current_year, previous_year), 0.00))
+FROM budgets
+
+--result(300 [current year for candles] +0 / 2 [two rows] = 150.0)
diff --git a/04_this_cohort/live_code/module_4/full_outer_join_with_union.sql b/04_this_cohort/live_code/module_4/full_outer_join_with_union.sql
new file mode 100644
index 000000000..7e4a78e8a
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/full_outer_join_with_union.sql
@@ -0,0 +1,40 @@
+--FULL OUTER JOIN WITH A UNION
+--two stores, determining which customes they have in stock
+
+DROP TABLE IF EXISTS temp.store1;
+CREATE TEMP TABLE IF NOT EXISTS temp.store1
+(
+costume TEXT,
+quantity INT
+);
+
+INSERT INTO temp.store1
+VALUES("tiger",6),
+ ("elephant",2),
+ ("princess", 4);
+
+
+DROP TABLE IF EXISTS temp.store2;
+CREATE TEMP TABLE IF NOT EXISTS temp.store2
+(
+costume TEXT,
+quantity INT
+);
+
+INSERT INTO temp.store2
+VALUES("tiger",2),
+ ("dancer",7),
+ ("superhero", 5);
+
+
+SELECT s1.costume, s1.quantity as store1_quantity, s2.quantity as store2_quantity
+FROM store1 s1
+LEFT JOIN store2 s2 on s1.costume = s2.costume
+
+UNION ALL
+
+SELECT s2.costume, s1.quantity, s2.quantity, s1.costume
+FROM store2 s2
+LEFT JOIN store1 s1 on s1.costume = s2.costume
+WHERE s1.costume IS NULL
+
diff --git a/04_this_cohort/live_code/module_4/row_rank_dense.sql b/04_this_cohort/live_code/module_4/row_rank_dense.sql
new file mode 100644
index 000000000..0b125f8d6
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/row_rank_dense.sql
@@ -0,0 +1,30 @@
+-- dense_rank vs rank vs row_number
+
+
+DROP TABLE IF EXISTS temp.row_rank_dense;
+
+CREATE TEMP TABLE IF NOT EXISTS temp.row_rank_dense
+(
+emp_id INT,
+salary INT
+);
+
+INSERT INTO temp.row_rank_dense
+VALUES(1,200000),
+ (2,200000),
+ (3, 160000),
+ (4, 120000),
+ (5, 125000),
+ (6, 165000),
+ (7, 230000),
+ (8, 100000),
+ (9, 165000),
+ (10, 100000);
+
+SELECT *
+,row_number() OVER(ORDER BY salary DESC) as [row_number]
+,rank() OVER(ORDER BY salary DESC) as [rank]
+,dense_rank() OVER(ORDER BY salary DESC) as [dense_rank]
+
+FROM row_rank_dense
+
diff --git a/04_this_cohort/live_code/module_4/string_manipulations.sql b/04_this_cohort/live_code/module_4/string_manipulations.sql
new file mode 100644
index 000000000..96a32a39f
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/string_manipulations.sql
@@ -0,0 +1,48 @@
+-- string manipulations
+
+SELECT DISTINCT
+
+LTRIM(' THOMAS ROSENTHAL ') as [ltrim]
+,RTRIM(' THOMAS ROSENTHAL ') as [rtrim]
+,TRIM(' THOMAS ROSENTHAL ') as [trimmed]
+,LTRIM(RTRIM(' THOMAS ROSENTHAL ')) as [both]
+
+--replace
+,REPLACE('THOMAS ROSENTHAL', ' ', ' WILLIAM ' ) -- add my middle name between first name and last name
+,REPLACE('THOMAS ROSENTHAL', 'a','') -- case sensitive
+,REPLACE('THOMAS ROSENTHAL', 'A','') -- replaces both A's
+,REPLACE(REPLACE(customer_first_name, 'a',''),'e','') as new_customer_first_name
+
+,UPPER(customer_first_name) as [upper]
+,LOWER(customer_first_name) as [lower]
+
+--concat
+,customer_first_name || ' ' || customer_last_name as customer_name
+,UPPER(customer_first_name) || ' ' || UPPER(customer_last_name) || ' ' || customer_postal_code as upper_cust_name
+
+
+FROM customer;
+
+SELECT DISTINCT
+customer_last_name
+,SUBSTR(customer_last_name,4) -- any length from the 4th character
+,substr(customer_last_name,4,2)
+,substr(customer_last_name, -5,4) -- counting from the right
+,substr(customer_last_name,1,2) -- results with 2 characters
+,substr(customer_last_name,0,2) -- results with 1 character instead of 2
+
+--length
+,length(customer_last_name)
+
+
+,'THOMAS
+
+ROSENTHAL'
+
+,replace('THOMAS
+
+ROSENTHAL', char(10), ' ' ) -- removing all instances of the line break from the string
+
+FROM customer
+
+WHERE customer_last_name REGEXP '(a)$' -- filtering to only end in a
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/CROSS_JOIN.sql b/04_this_cohort/live_code/module_5/CROSS_JOIN.sql
new file mode 100644
index 000000000..383ca9682
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/CROSS_JOIN.sql
@@ -0,0 +1,15 @@
+--CROSS JOIN
+
+DROP TABLE IF EXISTS temp.sizes;
+CREATE TEMP TABLE IF NOT EXISTS temp.sizes (size TEXT);
+
+INSERT INTO temp.sizes
+VALUES('small'),
+('medium'),
+('large');
+
+SELECT * FROM temp.sizes;
+
+SELECT product_name, size
+FROM product
+CROSS JOIN temp.sizes
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/FIRST_VIEW.sql b/04_this_cohort/live_code/module_5/FIRST_VIEW.sql
new file mode 100644
index 000000000..3ac5d603d
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/FIRST_VIEW.sql
@@ -0,0 +1,21 @@
+-- VIEW
+
+-- vendor daily sales
+DROP VIEW IF EXISTS vendor_daily_sales;
+CREATE VIEW IF NOT EXISTS vendor_daily_sales AS
+
+ SELECT
+ md.market_date
+ ,market_day
+ ,market_year
+ ,vendor_name
+ ,SUM(quantity*cost_to_customer_per_qty) as sales
+
+
+ FROM market_date_info md
+ INNER JOIN customer_purchases cp
+ ON md.market_date = cp.market_date
+ INNER JOIN vendor v
+ ON v.vendor_id = cp.vendor_id
+
+ GROUP BY cp.market_date, v.vendor_id
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql b/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql
new file mode 100644
index 000000000..e2845f97c
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql
@@ -0,0 +1,21 @@
+--INSERT UPDATE AND DELETE
+
+DROP TABLE IF EXISTS temp.product_expanded;
+CREATE TEMP TABLE product_expanded AS
+ SELECT * FROM product;
+
+INSERT INTO product_expanded
+VALUES(26, 'Almonds', '1 lb',1, 'lbs');
+
+--update our new record product_size to 1/2 kg
+UPDATE product_expanded
+--SELECT * FROM product_expanded
+SET product_size = '1/2 kg', product_qty_type = 'kg'
+WHERE product_id = 26;
+
+-- delete our almonds
+DELETE FROM product_expanded
+--SELECT * FROM product_expanded
+WHERE product_id = 26;
+
+SELECT * from product_expanded
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/INSERT_UPDATE_FOR_DYNAMIC_VIEW.sql b/04_this_cohort/live_code/module_5/INSERT_UPDATE_FOR_DYNAMIC_VIEW.sql
new file mode 100644
index 000000000..3c01a090c
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/INSERT_UPDATE_FOR_DYNAMIC_VIEW.sql
@@ -0,0 +1,5 @@
+UPDATE new_customer_purchases
+SET market_date = DATE('now')
+
+INSERT INTO market_date_info
+VALUES('2025-04-23','Wednesday','17','2025','8:00 AM','2:00 PM', 'nothing interesting','Spring', '20','28',0,0);
diff --git a/04_this_cohort/live_code/module_5/LIVE_DYNAMIC_VIEW.sql b/04_this_cohort/live_code/module_5/LIVE_DYNAMIC_VIEW.sql
new file mode 100644
index 000000000..3bbee3ede
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/LIVE_DYNAMIC_VIEW.sql
@@ -0,0 +1,40 @@
+--VIEW
+-- THIS ONLY WORKS IF YOU HAVE THE PROPER STEPS
+-- IMPORT as csv
+-- ?? update the market date info table to also have todays date!
+-- UPDATING THE NEW DATA TO "TODAY" in the where statement
+
+
+DROP VIEW IF EXISTS vendor_daily_sales;
+CREATE VIEW IF NOT EXISTS vendor_daily_sales AS
+
+ SELECT
+ md.market_date
+ ,market_day
+ ,market_year
+ ,vendor_name
+ ,SUM(quantity*cost_to_customer_per_qty) as sales
+
+ -- want to update the VIEW
+ -- need to bring in the new data
+ -- new data is called new_customer_purchases
+ --but we want the old data too!
+ --....use a union to combine old and new
+
+ FROM market_date_info md
+
+ INNER JOIN (
+ SELECT * FROM
+ customer_purchases
+ UNION
+ SELECT * FROM
+ new_customer_purchases ) as cp
+ ON md.market_date = cp.market_date
+ INNER JOIN vendor v
+ ON v.vendor_id = cp.vendor_id
+
+ -- we want TODAYS data only
+ WHERE md.market_date = date('now')
+
+ GROUP BY cp.market_date, v.vendor_id
+
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/SELF_JOIN.sql b/04_this_cohort/live_code/module_5/SELF_JOIN.sql
new file mode 100644
index 000000000..88a519b33
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/SELF_JOIN.sql
@@ -0,0 +1,22 @@
+-- SELF JOIN
+drop table if exists temp.employees;
+create temp table temp.employees
+(
+emp_id int,
+emp_name text,
+mgr_id int
+);
+
+insert into temp.employees
+Values(1,'Thomas',3)
+,(2,'Laura',4)
+,(3,'Rohan',null)
+,(4,'Jennie',3);
+
+
+SELECT * FROM temp.employees
+
+select a.emp_name,b.emp_name as mgr_name
+from temp.employees a
+left join temp.employees b
+ on a.mgr_id = b.emp_id
diff --git a/04_this_cohort/live_code/module_5/VIEW_IN_ANOTHER_QUERY.sql b/04_this_cohort/live_code/module_5/VIEW_IN_ANOTHER_QUERY.sql
new file mode 100644
index 000000000..eb79d08b9
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/VIEW_IN_ANOTHER_QUERY.sql
@@ -0,0 +1,12 @@
+-- using a view in another query
+SELECT
+market_year
+,market_week
+,vendor_name
+,SUM(sales) as weekly_sales
+
+FROM vendor_daily_sales
+
+GROUP BY market_year
+,market_week
+,vendor_name
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/self_join_example_screenshot.png b/04_this_cohort/live_code/module_5/self_join_example_screenshot.png
new file mode 100644
index 000000000..4d9504ce8
Binary files /dev/null and b/04_this_cohort/live_code/module_5/self_join_example_screenshot.png differ
diff --git a/04_this_cohort/live_code/module_6/1nf.sql b/04_this_cohort/live_code/module_6/1nf.sql
new file mode 100644
index 000000000..f57239761
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/1nf.sql
@@ -0,0 +1,18 @@
+--1nf
+drop table if exists temp.hold;
+CREATE TABLE temp.hold AS
+SELECT DISTINCT
+name,
+OS,
+SUBSTR(software, 1, INSTR(software,',')-1) AS s1,
+SUBSTR(software,INSTR(software,',')+1, INSTR(SUBSTR(software, INSTR(software, ',')+1),',')-1) as s2,
+SUBSTR(software,INSTR(SUBSTR(software,INSTR(software,',')+1),',')+INSTR(software,',')+1) as s3,
+supervisor
+
+FROM skills;
+
+SELECT name,OS,s1 as software, supervisor FROM hold
+UNION
+SELECT name,OS,s2 as software, supervisor FROM hold
+UNION
+SELECT name,OS,s3 as software, supervisor FROM hold
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_6/2nf.sql b/04_this_cohort/live_code/module_6/2nf.sql
new file mode 100644
index 000000000..234d98d9a
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/2nf.sql
@@ -0,0 +1,52 @@
+-- 2nf
+drop table if exists temp.student;
+drop table if exists temp.supervisor;
+drop table if exists temp.student_software;
+
+create temp table if not exists temp.supervisor
+(
+id INTEGER PRIMARY KEY AUTOINCREMENT,
+name TEXT
+);
+
+INSERT INTO temp.supervisor(name)
+select distinct supervisor
+from skills;
+
+create temp table if not exists temp.student
+(
+id INTEGER PRIMARY KEY AUTOINCREMENT,
+name TEXT,
+OS TEXT,
+supervisor_id INTEGER,
+CONSTRAINT "fk_supervisor_id" FOREIGN KEY ("supervisor_id") REFERENCES "supervisor" ("id")
+)
+
+INSERT INTO student(name, OS, supervisor_id)
+SELECT DISTINCT
+h.name
+,OS
+,s.id AS supervisor_id
+
+FROM hold h
+JOIN supervisor s
+ on h.supervisor = s.name
+
+CREATE TABLE temp.student_software AS
+SELECT id, software
+
+FROM student s
+JOIN (
+ SELECT name,OS,s1 as software, supervisor FROM hold
+ UNION
+ SELECT name,OS,s2 as software, supervisor FROM hold
+ UNION
+ SELECT name,OS,s3 as software, supervisor FROM hold
+) u
+ON s.name = u.name
+
+--select * from student
+--select * from supervisor
+select * from student_software
+
+
diff --git a/04_this_cohort/live_code/module_6/3nf.sql b/04_this_cohort/live_code/module_6/3nf.sql
new file mode 100644
index 000000000..d5f037013
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/3nf.sql
@@ -0,0 +1,37 @@
+--3nf
+drop table if exists temp.OS;
+drop table if exists temp.software;
+create temp table if not exists temp.OS
+(
+OS_id INTEGER,
+OS TEXT,
+win_only TEXT
+);
+
+insert into temp.OS
+values(1,"win","TRUE"),
+ (2,"mac","FALSE");
+
+
+create temp table if not exists temp.software
+(
+software_id INTEGER PRIMARY KEY AUTOINCREMENT,
+software TEXT,
+win_only TEXT
+);
+
+INSERT INTO temp.software(software, win_only)
+SELECT DISTINCT software, win_only
+FROM student_software s
+CROSS JOIN (
+ SELECT * FROM OS WHERE OS = 'mac'
+);
+
+UPDATE software
+SET win_only = 'TRUE'
+WHERE software.software = ' MSSQL';
+
+SELECT * FROM OS
+--SELECT * FROM software
+
+
diff --git a/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb b/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb
new file mode 100644
index 000000000..9fddb799a
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb
@@ -0,0 +1,717 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "05e1dbf0",
+ "metadata": {},
+ "source": [
+ "# Connect to FarmersMarket.db"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "f1d8cb62",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import sqlite3\n",
+ "#set your location, slash direction will change for windows and mac\n",
+ "DB = '/Users/thomas/Documents/GitHub/02-intro_sql/05_src/sql/farmersmarket.db' \n",
+ "#establish your connection\n",
+ "conn = sqlite3.connect(DB, isolation_level=None,\n",
+ " detect_types=sqlite3.PARSE_COLNAMES)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "1204e343",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#run your query, use \"\\\" to allow line breaks\n",
+ "db_df = pd.read_sql_query(\"SELECT p.*,pc.product_category_name \\\n",
+ " FROM product p \\\n",
+ " JOIN product_category pc \\\n",
+ " ON p.product_category_id = pc.product_category_id\"\n",
+ " ,conn)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "5c7863ee-08cd-4095-b80a-61f82425bd2e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " product_name | \n",
+ " product_size | \n",
+ " product_category_id | \n",
+ " product_qty_type | \n",
+ " product_category_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Habanero Peppers - Organic | \n",
+ " medium | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Jalapeno Peppers - Organic | \n",
+ " small | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Poblano Peppers - Organic | \n",
+ " large | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " Banana Peppers - Jar | \n",
+ " 8 oz | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " Whole Wheat Bread | \n",
+ " 1.5 lbs | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6 | \n",
+ " Cut Zinnias Bouquet | \n",
+ " medium | \n",
+ " 5 | \n",
+ " unit | \n",
+ " Plants & Flowers | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 7 | \n",
+ " Apple Pie | \n",
+ " 10\" | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 9 | \n",
+ " Sweet Potatoes | \n",
+ " medium | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 10 | \n",
+ " Eggs | \n",
+ " 1 dozen | \n",
+ " 6 | \n",
+ " unit | \n",
+ " Eggs & Meat (Fresh or Frozen) | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 11 | \n",
+ " Pork Chops | \n",
+ " 1 lb | \n",
+ " 6 | \n",
+ " lbs | \n",
+ " Eggs & Meat (Fresh or Frozen) | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 12 | \n",
+ " Baby Salad Lettuce Mix - Bag | \n",
+ " 1/2 lb | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 13 | \n",
+ " Baby Salad Lettuce Mix | \n",
+ " 1 lb | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 14 | \n",
+ " Red Potatoes | \n",
+ " None | \n",
+ " 1 | \n",
+ " None | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 15 | \n",
+ " Red Potatoes - Small | \n",
+ " | \n",
+ " 1 | \n",
+ " None | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 16 | \n",
+ " Sweet Corn | \n",
+ " Ear | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 17 | \n",
+ " Carrots | \n",
+ " sold by weight | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 18 | \n",
+ " Carrots - Organic | \n",
+ " bunch | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 19 | \n",
+ " Farmer's Market Resuable Shopping Bag | \n",
+ " medium | \n",
+ " 7 | \n",
+ " unit | \n",
+ " Non-Edible Products | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 20 | \n",
+ " Homemade Beeswax Candles | \n",
+ " 6\" | \n",
+ " 7 | \n",
+ " unit | \n",
+ " Non-Edible Products | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 21 | \n",
+ " Organic Cherry Tomatoes | \n",
+ " pint | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 22 | \n",
+ " Roma Tomatoes | \n",
+ " medium | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 23 | \n",
+ " Maple Syrup - Jar | \n",
+ " 8 oz | \n",
+ " 2 | \n",
+ " unit | \n",
+ " Packaged Pantry Goods | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 8 | \n",
+ " Cherry Pie | \n",
+ " 10\" | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " product_id product_name product_size \\\n",
+ "0 1 Habanero Peppers - Organic medium \n",
+ "1 2 Jalapeno Peppers - Organic small \n",
+ "2 3 Poblano Peppers - Organic large \n",
+ "3 4 Banana Peppers - Jar 8 oz \n",
+ "4 5 Whole Wheat Bread 1.5 lbs \n",
+ "5 6 Cut Zinnias Bouquet medium \n",
+ "6 7 Apple Pie 10\" \n",
+ "7 9 Sweet Potatoes medium \n",
+ "8 10 Eggs 1 dozen \n",
+ "9 11 Pork Chops 1 lb \n",
+ "10 12 Baby Salad Lettuce Mix - Bag 1/2 lb \n",
+ "11 13 Baby Salad Lettuce Mix 1 lb \n",
+ "12 14 Red Potatoes None \n",
+ "13 15 Red Potatoes - Small \n",
+ "14 16 Sweet Corn Ear \n",
+ "15 17 Carrots sold by weight \n",
+ "16 18 Carrots - Organic bunch \n",
+ "17 19 Farmer's Market Resuable Shopping Bag medium \n",
+ "18 20 Homemade Beeswax Candles 6\" \n",
+ "19 21 Organic Cherry Tomatoes pint \n",
+ "20 22 Roma Tomatoes medium \n",
+ "21 23 Maple Syrup - Jar 8 oz \n",
+ "22 8 Cherry Pie 10\" \n",
+ "\n",
+ " product_category_id product_qty_type product_category_name \n",
+ "0 1 lbs Fresh Fruits & Vegetables \n",
+ "1 1 lbs Fresh Fruits & Vegetables \n",
+ "2 1 unit Fresh Fruits & Vegetables \n",
+ "3 3 unit Packaged Prepared Food \n",
+ "4 3 unit Packaged Prepared Food \n",
+ "5 5 unit Plants & Flowers \n",
+ "6 3 unit Packaged Prepared Food \n",
+ "7 1 lbs Fresh Fruits & Vegetables \n",
+ "8 6 unit Eggs & Meat (Fresh or Frozen) \n",
+ "9 6 lbs Eggs & Meat (Fresh or Frozen) \n",
+ "10 1 unit Fresh Fruits & Vegetables \n",
+ "11 1 lbs Fresh Fruits & Vegetables \n",
+ "12 1 None Fresh Fruits & Vegetables \n",
+ "13 1 None Fresh Fruits & Vegetables \n",
+ "14 1 unit Fresh Fruits & Vegetables \n",
+ "15 1 lbs Fresh Fruits & Vegetables \n",
+ "16 1 unit Fresh Fruits & Vegetables \n",
+ "17 7 unit Non-Edible Products \n",
+ "18 7 unit Non-Edible Products \n",
+ "19 1 unit Fresh Fruits & Vegetables \n",
+ "20 1 lbs Fresh Fruits & Vegetables \n",
+ "21 2 unit Packaged Pantry Goods \n",
+ "22 3 unit Packaged Prepared Food "
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "db_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8b7c36c0",
+ "metadata": {},
+ "source": [
+ "Export the query:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "ee17555e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#save\n",
+ "db_df.to_csv('database-py.CSV', index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed14b573",
+ "metadata": {},
+ "source": [
+ "# Run a SQL query with pandasql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ac82fb05",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#!pip install pandasql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "4f783bd4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import pandasql as sql #this allows us to run SQLite queries!\n",
+ "p = \"https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins.csv\"\n",
+ "penguins = pd.read_csv(p) #create a dataframe\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "7892f454",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " male | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 339 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 55.8 | \n",
+ " 19.8 | \n",
+ " 207.0 | \n",
+ " 4000.0 | \n",
+ " male | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 340 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 43.5 | \n",
+ " 18.1 | \n",
+ " 202.0 | \n",
+ " 3400.0 | \n",
+ " female | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 341 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 49.6 | \n",
+ " 18.2 | \n",
+ " 193.0 | \n",
+ " 3775.0 | \n",
+ " male | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 342 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 50.8 | \n",
+ " 19.0 | \n",
+ " 210.0 | \n",
+ " 4100.0 | \n",
+ " male | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 343 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 50.2 | \n",
+ " 18.7 | \n",
+ " 198.0 | \n",
+ " 3775.0 | \n",
+ " female | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
344 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "0 Adelie Torgersen 39.1 18.7 181.0 \n",
+ "1 Adelie Torgersen 39.5 17.4 186.0 \n",
+ "2 Adelie Torgersen 40.3 18.0 195.0 \n",
+ "3 Adelie Torgersen NaN NaN NaN \n",
+ "4 Adelie Torgersen 36.7 19.3 193.0 \n",
+ ".. ... ... ... ... ... \n",
+ "339 Chinstrap Dream 55.8 19.8 207.0 \n",
+ "340 Chinstrap Dream 43.5 18.1 202.0 \n",
+ "341 Chinstrap Dream 49.6 18.2 193.0 \n",
+ "342 Chinstrap Dream 50.8 19.0 210.0 \n",
+ "343 Chinstrap Dream 50.2 18.7 198.0 \n",
+ "\n",
+ " body_mass_g sex year \n",
+ "0 3750.0 male 2007 \n",
+ "1 3800.0 female 2007 \n",
+ "2 3250.0 female 2007 \n",
+ "3 NaN NaN 2007 \n",
+ "4 3450.0 female 2007 \n",
+ ".. ... ... ... \n",
+ "339 4000.0 male 2009 \n",
+ "340 3400.0 female 2009 \n",
+ "341 3775.0 male 2009 \n",
+ "342 4100.0 male 2009 \n",
+ "343 3775.0 female 2009 \n",
+ "\n",
+ "[344 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "penguins"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "8036d336",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yrly_penguins = sql.sqldf('''SELECT DISTINCT year, COUNT(*) AS count, \n",
+ " SUM(COUNT(*)) OVER (ORDER BY year) AS running_total\n",
+ " FROM penguins\n",
+ " GROUP BY year''') #run a SQLite query with sqldf()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "80fd4dd6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " count | \n",
+ " running_total | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2007 | \n",
+ " 110 | \n",
+ " 110 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2008 | \n",
+ " 114 | \n",
+ " 224 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2009 | \n",
+ " 120 | \n",
+ " 344 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year count running_total\n",
+ "0 2007 110 110\n",
+ "1 2008 114 224\n",
+ "2 2009 120 344"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "yrly_penguins"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0cd3de3f-fb4f-46ac-ad42-23971226e5d0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/04_this_cohort/live_code/module_6/denormalized.sql b/04_this_cohort/live_code/module_6/denormalized.sql
new file mode 100644
index 000000000..da4208587
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/denormalized.sql
@@ -0,0 +1,14 @@
+-- normal forms creation
+
+drop table if exists temp.skills;
+create temp table if not exists temp.skills
+(
+name TEXT,
+OS TEXT,
+software TEXT,
+supervisor TEXT
+);
+
+insert into temp.skills
+values("A","win","VSCode, MSSQL, RStudio", "Eric Yu"),
+ ("Thomas","mac", "Spyder, SQLite, RStudio", "Rohan Alexander");
diff --git a/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql b/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql
new file mode 100644
index 000000000..2326c1d29
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql
@@ -0,0 +1,9 @@
+select * from penguins;
+
+-- how many penguins were identified each year
+SELECT DISTINCT year
+,COUNT(*) AS count
+,SUM(COUNT(*)) OVER (ORDER BY year) AS running_total
+
+ FROM penguins
+GROUP BY year
\ No newline at end of file