diff --git a/pandas_quiz.ipynb b/pandas_quiz.ipynb index 22545cd..33221f0 100644 --- a/pandas_quiz.ipynb +++ b/pandas_quiz.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": { "colab": {}, "colab_type": "code", @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "colab": {}, "colab_type": "code", @@ -118,6 +118,7 @@ " sales_ymd\n", " customer_id\n", " product_cd\n", + " quantity\n", " amount\n", " \n", " \n", @@ -127,6 +128,7 @@ " 20180911\n", " CS018205000001\n", " P071401012\n", + " 1\n", " 2200\n", " \n", " \n", @@ -134,6 +136,7 @@ " 20180414\n", " CS018205000001\n", " P060104007\n", + " 6\n", " 600\n", " \n", " \n", @@ -141,6 +144,7 @@ " 20170614\n", " CS018205000001\n", " P050206001\n", + " 5\n", " 990\n", " \n", " \n", @@ -148,6 +152,7 @@ " 20190226\n", " CS018205000001\n", " P071401020\n", + " 1\n", " 2200\n", " \n", " \n", @@ -155,6 +160,7 @@ " 20180911\n", " CS018205000001\n", " P071401005\n", + " 1\n", " 1100\n", " \n", " \n", @@ -162,12 +168,12 @@ "" ], "text/plain": [ - " sales_ymd customer_id product_cd amount\n", - "36 20180911 CS018205000001 P071401012 2200\n", - "9843 20180414 CS018205000001 P060104007 600\n", - "21110 20170614 CS018205000001 P050206001 990\n", - "68117 20190226 CS018205000001 P071401020 2200\n", - "72254 20180911 CS018205000001 P071401005 1100" + " sales_ymd customer_id product_cd quantity amount\n", + "36 20180911 CS018205000001 P071401012 1 2200\n", + "9843 20180414 CS018205000001 P060104007 6 600\n", + "21110 20170614 CS018205000001 P050206001 5 990\n", + "68117 20190226 CS018205000001 P071401020 1 2200\n", + "72254 20180911 CS018205000001 P071401005 1 1100" ] }, "execution_count": 4, @@ -175,7 +181,13 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt1 = df_receipt[[\"sales_ymd\",\"customer_id\",\"product_cd\",\"quantity\",\"amount\"]]\n", + "mask1 = (df_receipt['customer_id']==\"CS018205000001\")\n", + "mask2 = (df_receipt['amount']>=1000 )| (df_receipt['quantity']>= 5)\n", + "mask = mask1 & mask2 \n", + "df_receipt1[mask]" + ] }, { "cell_type": "markdown", @@ -339,7 +351,10 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_as = df_customer.sort_values(by='birth_day',ascending=False)\n", + "df_as.head()" + ] }, { "cell_type": "markdown", @@ -377,7 +392,9 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "len(df_receipt)" + ] }, { "cell_type": "markdown", @@ -414,7 +431,9 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt['customer_id'].nunique()" + ] }, { "attachments": {}, @@ -481,7 +500,12 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt_grouped=df_receipt_only_member.groupby('customer_id')[\"amount\"].sum()\n", + "df_receipt_grouped_mean = df_receipt_only_member.groupby('customer_id')[\"amount\"].sum().mean()\n", + "mask_5 = (df_receipt_grouped >= df_receipt_grouped_mean)\n", + "df_receipt_grouped[mask_5].head()" + ] }, { "attachments": {}, @@ -580,74 +604,74 @@ " 0\n", " CS021313000114\n", " 大野 あや子\n", - " 1.0\n", + " 1\n", " 女性\n", " 1981-04-29\n", - " 37.0\n", + " 37\n", " 259-1113\n", " 神奈川県伊勢原市粟窪**********\n", " S14021\n", - " 20150905.0\n", + " 20150905\n", " 0-00000000-0\n", " 0.0\n", " \n", " \n", - " 1\n", + " 2\n", " CS031415000172\n", " 宇多田 貴美子\n", - " 1.0\n", + " 1\n", " 女性\n", " 1976-10-04\n", - " 42.0\n", + " 42\n", " 151-0053\n", " 東京都渋谷区代々木**********\n", " S13031\n", - " 20150529.0\n", + " 20150529\n", " D-20100325-C\n", " 5088.0\n", " \n", " \n", - " 2\n", + " 3\n", " CS028811000001\n", " 堀井 かおり\n", - " 1.0\n", + " 1\n", " 女性\n", " 1933-03-27\n", - " 86.0\n", + " 86\n", " 245-0016\n", " 神奈川県横浜市泉区和泉町**********\n", " S14028\n", - " 20160115.0\n", + " 20160115\n", " 0-00000000-0\n", " 0.0\n", " \n", " \n", - " 3\n", + " 4\n", " CS001215000145\n", " 田崎 美紀\n", - " 1.0\n", + " 1\n", " 女性\n", " 1995-03-29\n", - " 24.0\n", + " 24\n", " 144-0055\n", " 東京都大田区仲六郷**********\n", " S13001\n", - " 20170605.0\n", + " 20170605\n", " 6-20090929-2\n", " 875.0\n", " \n", " \n", - " 4\n", + " 6\n", " CS015414000103\n", " 奥野 陽子\n", - " 1.0\n", + " 1\n", " 女性\n", " 1977-08-09\n", - " 41.0\n", + " 41\n", " 136-0073\n", " 東京都江東区北砂**********\n", " S13015\n", - " 20150722.0\n", + " 20150722\n", " B-20100609-B\n", " 3122.0\n", " \n", @@ -656,26 +680,26 @@ "" ], "text/plain": [ - " customer_id customer_name gender_cd gender birth_day age postal_cd \\\n", - "0 CS021313000114 大野 あや子 1.0 女性 1981-04-29 37.0 259-1113 \n", - "1 CS031415000172 宇多田 貴美子 1.0 女性 1976-10-04 42.0 151-0053 \n", - "2 CS028811000001 堀井 かおり 1.0 女性 1933-03-27 86.0 245-0016 \n", - "3 CS001215000145 田崎 美紀 1.0 女性 1995-03-29 24.0 144-0055 \n", - "4 CS015414000103 奥野 陽子 1.0 女性 1977-08-09 41.0 136-0073 \n", + " customer_id customer_name gender_cd gender birth_day age postal_cd \\\n", + "0 CS021313000114 大野 あや子 1 女性 1981-04-29 37 259-1113 \n", + "2 CS031415000172 宇多田 貴美子 1 女性 1976-10-04 42 151-0053 \n", + "3 CS028811000001 堀井 かおり 1 女性 1933-03-27 86 245-0016 \n", + "4 CS001215000145 田崎 美紀 1 女性 1995-03-29 24 144-0055 \n", + "6 CS015414000103 奥野 陽子 1 女性 1977-08-09 41 136-0073 \n", "\n", " address application_store_cd application_date \\\n", - "0 神奈川県伊勢原市粟窪********** S14021 20150905.0 \n", - "1 東京都渋谷区代々木********** S13031 20150529.0 \n", - "2 神奈川県横浜市泉区和泉町********** S14028 20160115.0 \n", - "3 東京都大田区仲六郷********** S13001 20170605.0 \n", - "4 東京都江東区北砂********** S13015 20150722.0 \n", + "0 神奈川県伊勢原市粟窪********** S14021 20150905 \n", + "2 東京都渋谷区代々木********** S13031 20150529 \n", + "3 神奈川県横浜市泉区和泉町********** S14028 20160115 \n", + "4 東京都大田区仲六郷********** S13001 20170605 \n", + "6 東京都江東区北砂********** S13015 20150722 \n", "\n", " status_cd amount \n", "0 0-00000000-0 0.0 \n", - "1 D-20100325-C 5088.0 \n", - "2 0-00000000-0 0.0 \n", - "3 6-20090929-2 875.0 \n", - "4 B-20100609-B 3122.0 " + "2 D-20100325-C 5088.0 \n", + "3 0-00000000-0 0.0 \n", + "4 6-20090929-2 875.0 \n", + "6 B-20100609-B 3122.0 " ] }, "execution_count": 11, @@ -683,7 +707,12 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_receipt_grouped=df_receipt_only_member.groupby('customer_id')[\"amount\"].sum()\n", + "df_merge = pd.merge(df_customer_only_member,df_receipt_grouped,on='customer_id',how=\"left\")\n", + "mask_6 = (df_merge['gender_cd']==1.0)\n", + "df_merge[mask_6].fillna(0).head()\n" + ] }, { "attachments": {}, @@ -802,7 +831,10 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_customer[\"application_date\"] = pd.to_datetime(df_customer[\"application_date\"],format=\"%Y%m%d\")\n", + "df_customer[[\"customer_id\",\"application_date\"]].head()\n" + ] }, { "attachments": {}, @@ -839,7 +871,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -871,8 +903,8 @@ " \n", " \n", " \n", - " amount\n", - " log_amount\n", + " amount_x\n", + " amount_y\n", " \n", " \n", " customer_id\n", @@ -911,21 +943,24 @@ "" ], "text/plain": [ - " amount log_amount\n", + " amount_x amount_y\n", "customer_id \n", - "CS001113000004 1298 3.113275\n", - "CS001114000005 626 2.796574\n", - "CS001115000010 3044 3.483445\n", - "CS001205000004 1988 3.298416\n", - "CS001205000006 3337 3.523356" + "CS001113000004 1298 3.113275\n", + "CS001114000005 626 2.796574\n", + "CS001115000010 3044 3.483445\n", + "CS001205000004 1988 3.298416\n", + "CS001205000006 3337 3.523356" ] }, - "execution_count": 13, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "log_amount=np.log10(df_receipt_grouped)\n", + "pd.merge(df_receipt_grouped,log_amount,on='customer_id',how='left').head()\n" + ] }, { "cell_type": "markdown", @@ -941,7 +976,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 47, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -955,7 +990,7 @@ { "data": { "text/plain": [ - "product_cd 0\n", + "product_cd 0\n", "category_major_cd 0\n", "category_medium_cd 0\n", "category_small_cd 0\n", @@ -964,12 +999,14 @@ "dtype: int64" ] }, - "execution_count": 14, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "df_product.isnull().sum()" + ] }, { "cell_type": "markdown", @@ -985,18 +1022,20 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 49, "metadata": { "colab": {}, "colab_type": "code", "id": "q3_9sLdHhhzt" }, "outputs": [], - "source": [] + "source": [ + "df_product_1=df_product.dropna()" + ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 50, "metadata": { "colab": {}, "colab_type": "code", @@ -1009,7 +1048,7 @@ "(10030, 10023)" ] }, - "execution_count": 16, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -1032,7 +1071,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 69, "metadata": { "colab": {}, "colab_type": "code", @@ -1042,7 +1081,7 @@ { "data": { "text/plain": [ - "product_cd 0\n", + "product_cd 0\n", "category_major_cd 0\n", "category_medium_cd 0\n", "category_small_cd 0\n", @@ -1051,12 +1090,16 @@ "dtype: int64" ] }, - "execution_count": 17, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "mean = df_product[[\"unit_price\",\"unit_cost\"]].mean().round()\n", + "df_product_2 = df_product.fillna(mean)\n", + "df_product_2.isnull().sum()\n" + ] }, { "attachments": {}, @@ -1094,7 +1137,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.13.1" } }, "nbformat": 4,