From 289f41773c748d0b31c13fe38a6c0074f196d391 Mon Sep 17 00:00:00 2001 From: shiho629 Date: Fri, 7 Mar 2025 13:38:30 +0900 Subject: [PATCH 1/2] pandas --- pandas_quiz.ipynb | 276 ++++++++++++++++++++++------------------------ 1 file changed, 130 insertions(+), 146 deletions(-) diff --git a/pandas_quiz.ipynb b/pandas_quiz.ipynb index 22545cd..75c626b 100644 --- a/pandas_quiz.ipynb +++ b/pandas_quiz.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 36, "metadata": { "colab": {}, "colab_type": "code", @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 37, "metadata": { "colab": {}, "colab_type": "code", @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -118,6 +118,7 @@ " sales_ymd\n", " customer_id\n", " product_cd\n", + " quantity\n", " amount\n", " \n", " \n", @@ -127,6 +128,7 @@ " 20180911\n", " CS018205000001\n", " P071401012\n", + " 1\n", " 2200\n", " \n", " \n", @@ -134,6 +136,7 @@ " 20180414\n", " CS018205000001\n", " P060104007\n", + " 6\n", " 600\n", " \n", " \n", @@ -141,6 +144,7 @@ " 20170614\n", " CS018205000001\n", " P050206001\n", + " 5\n", " 990\n", " \n", " \n", @@ -148,6 +152,7 @@ " 20190226\n", " CS018205000001\n", " P071401020\n", + " 1\n", " 2200\n", " \n", " \n", @@ -155,6 +160,7 @@ " 20180911\n", " CS018205000001\n", " P071401005\n", + " 1\n", " 1100\n", " \n", " \n", @@ -162,20 +168,26 @@ "" ], "text/plain": [ - " sales_ymd customer_id product_cd amount\n", - "36 20180911 CS018205000001 P071401012 2200\n", - "9843 20180414 CS018205000001 P060104007 600\n", - "21110 20170614 CS018205000001 P050206001 990\n", - "68117 20190226 CS018205000001 P071401020 2200\n", - "72254 20180911 CS018205000001 P071401005 1100" + " sales_ymd customer_id product_cd quantity amount\n", + "36 20180911 CS018205000001 P071401012 1 2200\n", + "9843 20180414 CS018205000001 P060104007 6 600\n", + "21110 20170614 CS018205000001 P050206001 5 990\n", + "68117 20190226 CS018205000001 P071401020 1 2200\n", + "72254 20180911 CS018205000001 P071401005 1 1100" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt1 = df_receipt[[\"sales_ymd\",\"customer_id\",\"product_cd\",\"quantity\",\"amount\"]]\n", + "mask1 = (df_receipt['customer_id']==\"CS018205000001\")\n", + "mask2 = (df_receipt['amount']>=1000 )| (df_receipt['quantity']>= 5)\n", + "mask = mask1 & mask2 \n", + "df_receipt1[mask]" + ] }, { "cell_type": "markdown", @@ -191,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -334,12 +346,15 @@ "7039 6-20100510-1 " ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_as = df_customer.sort_values(by='birth_day',ascending=False)\n", + "df_as.head()" + ] }, { "cell_type": "markdown", @@ -355,7 +370,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 48, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -372,12 +387,14 @@ "104681" ] }, - "execution_count": 6, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "len(df_receipt)" + ] }, { "cell_type": "markdown", @@ -392,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -409,12 +426,14 @@ "8307" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt['customer_id'].nunique()" + ] }, { "attachments": {}, @@ -443,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -453,7 +472,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -476,12 +495,17 @@ "Name: amount, dtype: int64" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt_grouped=df_receipt_only_member.groupby('customer_id')[\"amount\"].sum()\n", + "df_receipt_grouped_mean = df_receipt_only_member.groupby('customer_id')[\"amount\"].sum().mean()\n", + "mask_5 = (df_receipt_grouped >= df_receipt_grouped_mean)\n", + "df_receipt_grouped[mask_5].head()" + ] }, { "attachments": {}, @@ -519,7 +543,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -529,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -580,74 +604,74 @@ " 0\n", " CS021313000114\n", " 大野 あや子\n", - " 1.0\n", + " 1\n", " 女性\n", " 1981-04-29\n", - " 37.0\n", + " 37\n", " 259-1113\n", " 神奈川県伊勢原市粟窪**********\n", " S14021\n", - " 20150905.0\n", + " 20150905\n", " 0-00000000-0\n", " 0.0\n", " \n", " \n", - " 1\n", + " 2\n", " CS031415000172\n", " 宇多田 貴美子\n", - " 1.0\n", + " 1\n", " 女性\n", " 1976-10-04\n", - " 42.0\n", + " 42\n", " 151-0053\n", " 東京都渋谷区代々木**********\n", " S13031\n", - " 20150529.0\n", + " 20150529\n", " D-20100325-C\n", " 5088.0\n", " \n", " \n", - " 2\n", + " 3\n", " CS028811000001\n", " 堀井 かおり\n", - " 1.0\n", + " 1\n", " 女性\n", " 1933-03-27\n", - " 86.0\n", + " 86\n", " 245-0016\n", " 神奈川県横浜市泉区和泉町**********\n", " S14028\n", - " 20160115.0\n", + " 20160115\n", " 0-00000000-0\n", " 0.0\n", " \n", " \n", - " 3\n", + " 4\n", " CS001215000145\n", " 田崎 美紀\n", - " 1.0\n", + " 1\n", " 女性\n", " 1995-03-29\n", - " 24.0\n", + " 24\n", " 144-0055\n", " 東京都大田区仲六郷**********\n", " S13001\n", - " 20170605.0\n", + " 20170605\n", " 6-20090929-2\n", " 875.0\n", " \n", " \n", - " 4\n", + " 6\n", " CS015414000103\n", " 奥野 陽子\n", - " 1.0\n", + " 1\n", " 女性\n", " 1977-08-09\n", - " 41.0\n", + " 41\n", " 136-0073\n", " 東京都江東区北砂**********\n", " S13015\n", - " 20150722.0\n", + " 20150722\n", " B-20100609-B\n", " 3122.0\n", " \n", @@ -656,34 +680,39 @@ "" ], "text/plain": [ - " customer_id customer_name gender_cd gender birth_day age postal_cd \\\n", - "0 CS021313000114 大野 あや子 1.0 女性 1981-04-29 37.0 259-1113 \n", - "1 CS031415000172 宇多田 貴美子 1.0 女性 1976-10-04 42.0 151-0053 \n", - "2 CS028811000001 堀井 かおり 1.0 女性 1933-03-27 86.0 245-0016 \n", - "3 CS001215000145 田崎 美紀 1.0 女性 1995-03-29 24.0 144-0055 \n", - "4 CS015414000103 奥野 陽子 1.0 女性 1977-08-09 41.0 136-0073 \n", + " customer_id customer_name gender_cd gender birth_day age postal_cd \\\n", + "0 CS021313000114 大野 あや子 1 女性 1981-04-29 37 259-1113 \n", + "2 CS031415000172 宇多田 貴美子 1 女性 1976-10-04 42 151-0053 \n", + "3 CS028811000001 堀井 かおり 1 女性 1933-03-27 86 245-0016 \n", + "4 CS001215000145 田崎 美紀 1 女性 1995-03-29 24 144-0055 \n", + "6 CS015414000103 奥野 陽子 1 女性 1977-08-09 41 136-0073 \n", "\n", " address application_store_cd application_date \\\n", - "0 神奈川県伊勢原市粟窪********** S14021 20150905.0 \n", - "1 東京都渋谷区代々木********** S13031 20150529.0 \n", - "2 神奈川県横浜市泉区和泉町********** S14028 20160115.0 \n", - "3 東京都大田区仲六郷********** S13001 20170605.0 \n", - "4 東京都江東区北砂********** S13015 20150722.0 \n", + "0 神奈川県伊勢原市粟窪********** S14021 20150905 \n", + "2 東京都渋谷区代々木********** S13031 20150529 \n", + "3 神奈川県横浜市泉区和泉町********** S14028 20160115 \n", + "4 東京都大田区仲六郷********** S13001 20170605 \n", + "6 東京都江東区北砂********** S13015 20150722 \n", "\n", " status_cd amount \n", "0 0-00000000-0 0.0 \n", - "1 D-20100325-C 5088.0 \n", - "2 0-00000000-0 0.0 \n", - "3 6-20090929-2 875.0 \n", - "4 B-20100609-B 3122.0 " + "2 D-20100325-C 5088.0 \n", + "3 0-00000000-0 0.0 \n", + "4 6-20090929-2 875.0 \n", + "6 B-20100609-B 3122.0 " ] }, - "execution_count": 11, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt_grouped=df_receipt_only_member.groupby('customer_id')[\"amount\"].sum()\n", + "df_merge = pd.merge(df_customer_only_member,df_receipt_grouped,on='customer_id',how=\"left\")\n", + "mask_6 = (df_merge['gender_cd']==1.0)\n", + "df_merge[mask_6].fillna(0).head()\n" + ] }, { "attachments": {}, @@ -722,7 +751,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 44, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -797,12 +826,15 @@ "4 CS001215000145 2017-06-05" ] }, - "execution_count": 12, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_customer[\"application_date\"] = pd.to_datetime(df_customer[\"application_date\"],format=\"%Y%m%d\")\n", + "df_customer[[\"customer_id\",\"application_date\"]].head()\n" + ] }, { "attachments": {}, @@ -839,7 +871,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -852,80 +884,24 @@ "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
amountlog_amount
customer_id
CS00111300000412983.113275
CS0011140000056262.796574
CS00111500001030443.483445
CS00120500000419883.298416
CS00120500000633373.523356
\n", - "
" - ], "text/plain": [ - " amount log_amount\n", - "customer_id \n", - "CS001113000004 1298 3.113275\n", - "CS001114000005 626 2.796574\n", - "CS001115000010 3044 3.483445\n", - "CS001205000004 1988 3.298416\n", - "CS001205000006 3337 3.523356" + "customer_id\n", + "CS001113000004 1298\n", + "CS001114000005 626\n", + "CS001115000010 3044\n", + "CS001205000004 1988\n", + "CS001205000006 3337\n", + "Name: amount, dtype: int64" ] }, - "execution_count": 13, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt_grouped.head()\n" + ] }, { "cell_type": "markdown", @@ -941,7 +917,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 47, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -955,7 +931,7 @@ { "data": { "text/plain": [ - "product_cd 0\n", + "product_cd 0\n", "category_major_cd 0\n", "category_medium_cd 0\n", "category_small_cd 0\n", @@ -964,12 +940,14 @@ "dtype: int64" ] }, - "execution_count": 14, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_product.isnull().sum()" + ] }, { "cell_type": "markdown", @@ -985,18 +963,20 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 49, "metadata": { "colab": {}, "colab_type": "code", "id": "q3_9sLdHhhzt" }, "outputs": [], - "source": [] + "source": [ + "df_product_1=df_product.dropna()" + ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 50, "metadata": { "colab": {}, "colab_type": "code", @@ -1009,7 +989,7 @@ "(10030, 10023)" ] }, - "execution_count": 16, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -1032,7 +1012,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 69, "metadata": { "colab": {}, "colab_type": "code", @@ -1042,7 +1022,7 @@ { "data": { "text/plain": [ - "product_cd 0\n", + "product_cd 0\n", "category_major_cd 0\n", "category_medium_cd 0\n", "category_small_cd 0\n", @@ -1051,12 +1031,16 @@ "dtype: int64" ] }, - "execution_count": 17, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "mean = df_product[[\"unit_price\",\"unit_cost\"]].mean().round()\n", + "df_product_2 = df_product.fillna(mean)\n", + "df_product_2.isnull().sum()\n" + ] }, { "attachments": {}, @@ -1094,7 +1078,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.13.1" } }, "nbformat": 4, From 268418684f93e5e29a833d1b854326ba4ae4d382 Mon Sep 17 00:00:00 2001 From: shiho629 Date: Tue, 15 Apr 2025 22:41:28 +0900 Subject: [PATCH 2/2] =?UTF-8?q?=E5=95=8F8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas_quiz.ipynb | 115 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 28 deletions(-) diff --git a/pandas_quiz.ipynb b/pandas_quiz.ipynb index 75c626b..33221f0 100644 --- a/pandas_quiz.ipynb +++ b/pandas_quiz.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 2, "metadata": { "colab": {}, "colab_type": "code", @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 3, "metadata": { "colab": {}, "colab_type": "code", @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -176,7 +176,7 @@ "72254 20180911 CS018205000001 P071401005 1 1100" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -203,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -346,7 +346,7 @@ "7039 6-20100510-1 " ] }, - "execution_count": 7, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -370,7 +370,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -387,7 +387,7 @@ "104681" ] }, - "execution_count": 48, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -409,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -426,7 +426,7 @@ "8307" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -462,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -472,7 +472,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -495,7 +495,7 @@ "Name: amount, dtype: int64" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -543,7 +543,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -553,7 +553,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -702,7 +702,7 @@ "6 B-20100609-B 3122.0 " ] }, - "execution_count": 31, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -751,7 +751,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -826,7 +826,7 @@ "4 CS001215000145 2017-06-05" ] }, - "execution_count": 44, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -871,7 +871,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -884,23 +884,82 @@ "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
amount_xamount_y
customer_id
CS00111300000412983.113275
CS0011140000056262.796574
CS00111500001030443.483445
CS00120500000419883.298416
CS00120500000633373.523356
\n", + "
" + ], "text/plain": [ - "customer_id\n", - "CS001113000004 1298\n", - "CS001114000005 626\n", - "CS001115000010 3044\n", - "CS001205000004 1988\n", - "CS001205000006 3337\n", - "Name: amount, dtype: int64" + " amount_x amount_y\n", + "customer_id \n", + "CS001113000004 1298 3.113275\n", + "CS001114000005 626 2.796574\n", + "CS001115000010 3044 3.483445\n", + "CS001205000004 1988 3.298416\n", + "CS001205000006 3337 3.523356" ] }, - "execution_count": 45, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_receipt_grouped.head()\n" + "log_amount=np.log10(df_receipt_grouped)\n", + "pd.merge(df_receipt_grouped,log_amount,on='customer_id',how='left').head()\n" ] }, {