diff --git a/pandas_quiz.ipynb b/pandas_quiz.ipynb
index 22545cd..33221f0 100644
--- a/pandas_quiz.ipynb
+++ b/pandas_quiz.ipynb
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -54,7 +54,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -118,6 +118,7 @@
"
sales_ymd | \n",
" customer_id | \n",
" product_cd | \n",
+ " quantity | \n",
" amount | \n",
" \n",
" \n",
@@ -127,6 +128,7 @@
" 20180911 | \n",
" CS018205000001 | \n",
" P071401012 | \n",
+ " 1 | \n",
" 2200 | \n",
" \n",
" \n",
@@ -134,6 +136,7 @@
" | 20180414 | \n",
" CS018205000001 | \n",
" P060104007 | \n",
+ " 6 | \n",
" 600 | \n",
"
\n",
" \n",
@@ -141,6 +144,7 @@
" | 20170614 | \n",
" CS018205000001 | \n",
" P050206001 | \n",
+ " 5 | \n",
" 990 | \n",
"
\n",
" \n",
@@ -148,6 +152,7 @@
" | 20190226 | \n",
" CS018205000001 | \n",
" P071401020 | \n",
+ " 1 | \n",
" 2200 | \n",
"
\n",
" \n",
@@ -155,6 +160,7 @@
" | 20180911 | \n",
" CS018205000001 | \n",
" P071401005 | \n",
+ " 1 | \n",
" 1100 | \n",
"
\n",
" \n",
@@ -162,12 +168,12 @@
""
],
"text/plain": [
- " sales_ymd customer_id product_cd amount\n",
- "36 20180911 CS018205000001 P071401012 2200\n",
- "9843 20180414 CS018205000001 P060104007 600\n",
- "21110 20170614 CS018205000001 P050206001 990\n",
- "68117 20190226 CS018205000001 P071401020 2200\n",
- "72254 20180911 CS018205000001 P071401005 1100"
+ " sales_ymd customer_id product_cd quantity amount\n",
+ "36 20180911 CS018205000001 P071401012 1 2200\n",
+ "9843 20180414 CS018205000001 P060104007 6 600\n",
+ "21110 20170614 CS018205000001 P050206001 5 990\n",
+ "68117 20190226 CS018205000001 P071401020 1 2200\n",
+ "72254 20180911 CS018205000001 P071401005 1 1100"
]
},
"execution_count": 4,
@@ -175,7 +181,13 @@
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "df_receipt1 = df_receipt[[\"sales_ymd\",\"customer_id\",\"product_cd\",\"quantity\",\"amount\"]]\n",
+ "mask1 = (df_receipt['customer_id']==\"CS018205000001\")\n",
+ "mask2 = (df_receipt['amount']>=1000 )| (df_receipt['quantity']>= 5)\n",
+ "mask = mask1 & mask2 \n",
+ "df_receipt1[mask]"
+ ]
},
{
"cell_type": "markdown",
@@ -339,7 +351,10 @@
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "df_as = df_customer.sort_values(by='birth_day',ascending=False)\n",
+ "df_as.head()"
+ ]
},
{
"cell_type": "markdown",
@@ -377,7 +392,9 @@
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "len(df_receipt)"
+ ]
},
{
"cell_type": "markdown",
@@ -414,7 +431,9 @@
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "df_receipt['customer_id'].nunique()"
+ ]
},
{
"attachments": {},
@@ -481,7 +500,12 @@
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "df_receipt_grouped=df_receipt_only_member.groupby('customer_id')[\"amount\"].sum()\n",
+ "df_receipt_grouped_mean = df_receipt_only_member.groupby('customer_id')[\"amount\"].sum().mean()\n",
+ "mask_5 = (df_receipt_grouped >= df_receipt_grouped_mean)\n",
+ "df_receipt_grouped[mask_5].head()"
+ ]
},
{
"attachments": {},
@@ -580,74 +604,74 @@
" 0 | \n",
" CS021313000114 | \n",
" 大野 あや子 | \n",
- " 1.0 | \n",
+ " 1 | \n",
" 女性 | \n",
" 1981-04-29 | \n",
- " 37.0 | \n",
+ " 37 | \n",
" 259-1113 | \n",
" 神奈川県伊勢原市粟窪********** | \n",
" S14021 | \n",
- " 20150905.0 | \n",
+ " 20150905 | \n",
" 0-00000000-0 | \n",
" 0.0 | \n",
" \n",
" \n",
- " | 1 | \n",
+ " 2 | \n",
" CS031415000172 | \n",
" 宇多田 貴美子 | \n",
- " 1.0 | \n",
+ " 1 | \n",
" 女性 | \n",
" 1976-10-04 | \n",
- " 42.0 | \n",
+ " 42 | \n",
" 151-0053 | \n",
" 東京都渋谷区代々木********** | \n",
" S13031 | \n",
- " 20150529.0 | \n",
+ " 20150529 | \n",
" D-20100325-C | \n",
" 5088.0 | \n",
"
\n",
" \n",
- " | 2 | \n",
+ " 3 | \n",
" CS028811000001 | \n",
" 堀井 かおり | \n",
- " 1.0 | \n",
+ " 1 | \n",
" 女性 | \n",
" 1933-03-27 | \n",
- " 86.0 | \n",
+ " 86 | \n",
" 245-0016 | \n",
" 神奈川県横浜市泉区和泉町********** | \n",
" S14028 | \n",
- " 20160115.0 | \n",
+ " 20160115 | \n",
" 0-00000000-0 | \n",
" 0.0 | \n",
"
\n",
" \n",
- " | 3 | \n",
+ " 4 | \n",
" CS001215000145 | \n",
" 田崎 美紀 | \n",
- " 1.0 | \n",
+ " 1 | \n",
" 女性 | \n",
" 1995-03-29 | \n",
- " 24.0 | \n",
+ " 24 | \n",
" 144-0055 | \n",
" 東京都大田区仲六郷********** | \n",
" S13001 | \n",
- " 20170605.0 | \n",
+ " 20170605 | \n",
" 6-20090929-2 | \n",
" 875.0 | \n",
"
\n",
" \n",
- " | 4 | \n",
+ " 6 | \n",
" CS015414000103 | \n",
" 奥野 陽子 | \n",
- " 1.0 | \n",
+ " 1 | \n",
" 女性 | \n",
" 1977-08-09 | \n",
- " 41.0 | \n",
+ " 41 | \n",
" 136-0073 | \n",
" 東京都江東区北砂********** | \n",
" S13015 | \n",
- " 20150722.0 | \n",
+ " 20150722 | \n",
" B-20100609-B | \n",
" 3122.0 | \n",
"
\n",
@@ -656,26 +680,26 @@
""
],
"text/plain": [
- " customer_id customer_name gender_cd gender birth_day age postal_cd \\\n",
- "0 CS021313000114 大野 あや子 1.0 女性 1981-04-29 37.0 259-1113 \n",
- "1 CS031415000172 宇多田 貴美子 1.0 女性 1976-10-04 42.0 151-0053 \n",
- "2 CS028811000001 堀井 かおり 1.0 女性 1933-03-27 86.0 245-0016 \n",
- "3 CS001215000145 田崎 美紀 1.0 女性 1995-03-29 24.0 144-0055 \n",
- "4 CS015414000103 奥野 陽子 1.0 女性 1977-08-09 41.0 136-0073 \n",
+ " customer_id customer_name gender_cd gender birth_day age postal_cd \\\n",
+ "0 CS021313000114 大野 あや子 1 女性 1981-04-29 37 259-1113 \n",
+ "2 CS031415000172 宇多田 貴美子 1 女性 1976-10-04 42 151-0053 \n",
+ "3 CS028811000001 堀井 かおり 1 女性 1933-03-27 86 245-0016 \n",
+ "4 CS001215000145 田崎 美紀 1 女性 1995-03-29 24 144-0055 \n",
+ "6 CS015414000103 奥野 陽子 1 女性 1977-08-09 41 136-0073 \n",
"\n",
" address application_store_cd application_date \\\n",
- "0 神奈川県伊勢原市粟窪********** S14021 20150905.0 \n",
- "1 東京都渋谷区代々木********** S13031 20150529.0 \n",
- "2 神奈川県横浜市泉区和泉町********** S14028 20160115.0 \n",
- "3 東京都大田区仲六郷********** S13001 20170605.0 \n",
- "4 東京都江東区北砂********** S13015 20150722.0 \n",
+ "0 神奈川県伊勢原市粟窪********** S14021 20150905 \n",
+ "2 東京都渋谷区代々木********** S13031 20150529 \n",
+ "3 神奈川県横浜市泉区和泉町********** S14028 20160115 \n",
+ "4 東京都大田区仲六郷********** S13001 20170605 \n",
+ "6 東京都江東区北砂********** S13015 20150722 \n",
"\n",
" status_cd amount \n",
"0 0-00000000-0 0.0 \n",
- "1 D-20100325-C 5088.0 \n",
- "2 0-00000000-0 0.0 \n",
- "3 6-20090929-2 875.0 \n",
- "4 B-20100609-B 3122.0 "
+ "2 D-20100325-C 5088.0 \n",
+ "3 0-00000000-0 0.0 \n",
+ "4 6-20090929-2 875.0 \n",
+ "6 B-20100609-B 3122.0 "
]
},
"execution_count": 11,
@@ -683,7 +707,12 @@
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "df_receipt_grouped=df_receipt_only_member.groupby('customer_id')[\"amount\"].sum()\n",
+ "df_merge = pd.merge(df_customer_only_member,df_receipt_grouped,on='customer_id',how=\"left\")\n",
+ "mask_6 = (df_merge['gender_cd']==1.0)\n",
+ "df_merge[mask_6].fillna(0).head()\n"
+ ]
},
{
"attachments": {},
@@ -802,7 +831,10 @@
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "df_customer[\"application_date\"] = pd.to_datetime(df_customer[\"application_date\"],format=\"%Y%m%d\")\n",
+ "df_customer[[\"customer_id\",\"application_date\"]].head()\n"
+ ]
},
{
"attachments": {},
@@ -839,7 +871,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -871,8 +903,8 @@
" \n",
" \n",
" | \n",
- " amount | \n",
- " log_amount | \n",
+ " amount_x | \n",
+ " amount_y | \n",
"
\n",
" \n",
" | customer_id | \n",
@@ -911,21 +943,24 @@
""
],
"text/plain": [
- " amount log_amount\n",
+ " amount_x amount_y\n",
"customer_id \n",
- "CS001113000004 1298 3.113275\n",
- "CS001114000005 626 2.796574\n",
- "CS001115000010 3044 3.483445\n",
- "CS001205000004 1988 3.298416\n",
- "CS001205000006 3337 3.523356"
+ "CS001113000004 1298 3.113275\n",
+ "CS001114000005 626 2.796574\n",
+ "CS001115000010 3044 3.483445\n",
+ "CS001205000004 1988 3.298416\n",
+ "CS001205000006 3337 3.523356"
]
},
- "execution_count": 13,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "log_amount=np.log10(df_receipt_grouped)\n",
+ "pd.merge(df_receipt_grouped,log_amount,on='customer_id',how='left').head()\n"
+ ]
},
{
"cell_type": "markdown",
@@ -941,7 +976,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 47,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -955,7 +990,7 @@
{
"data": {
"text/plain": [
- "product_cd 0\n",
+ "product_cd 0\n",
"category_major_cd 0\n",
"category_medium_cd 0\n",
"category_small_cd 0\n",
@@ -964,12 +999,14 @@
"dtype: int64"
]
},
- "execution_count": 14,
+ "execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "df_product.isnull().sum()"
+ ]
},
{
"cell_type": "markdown",
@@ -985,18 +1022,20 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 49,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "q3_9sLdHhhzt"
},
"outputs": [],
- "source": []
+ "source": [
+ "df_product_1=df_product.dropna()"
+ ]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 50,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -1009,7 +1048,7 @@
"(10030, 10023)"
]
},
- "execution_count": 16,
+ "execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
@@ -1032,7 +1071,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 69,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -1042,7 +1081,7 @@
{
"data": {
"text/plain": [
- "product_cd 0\n",
+ "product_cd 0\n",
"category_major_cd 0\n",
"category_medium_cd 0\n",
"category_small_cd 0\n",
@@ -1051,12 +1090,16 @@
"dtype: int64"
]
},
- "execution_count": 17,
+ "execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
- "source": []
+ "source": [
+ "mean = df_product[[\"unit_price\",\"unit_cost\"]].mean().round()\n",
+ "df_product_2 = df_product.fillna(mean)\n",
+ "df_product_2.isnull().sum()\n"
+ ]
},
{
"attachments": {},
@@ -1094,7 +1137,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.4"
+ "version": "3.13.1"
}
},
"nbformat": 4,