From bcac5d04cd5cda0ce6542716ed80abcc9e258978 Mon Sep 17 00:00:00 2001
From: huangxiaofei <huangxiaofei@bilibili.com>
Date: Sat, 9 Jul 2016 23:53:09 +0800
Subject: [PATCH 1/2] 	modified:   auth.py 	add headers to requets.get

---
 auth.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/auth.py b/auth.py
index ed51290..f6f6763 100644
--- a/auth.py
+++ b/auth.py
@@ -68,7 +68,15 @@ def __init__(self, message):
 
 def download_captcha():
     url = "https://www.zhihu.com/captcha.gif"
-    r = requests.get(url, params={"r": random.random(), "type": "login"}, verify=False)
+    headers = {
+        'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
+        'Host': "www.zhihu.com",
+        'Origin': "http://www.zhihu.com",
+        'Pragma': "no-cache",
+        'Referer': "http://www.zhihu.com/",
+        'X-Requested-With': "XMLHttpRequest"
+    }
+    r = requests.get(url, params={"r": random.random(), "type": "login"}, verify=False, headers=headers)
     if int(r.status_code) != 200:
         raise NetworkError(u"验证码请求失败")
     image_name = u"verify." + r.headers['content-type'].split("/")[1]
@@ -96,7 +104,15 @@ def download_captcha():
 
 def search_xsrf():
     url = "http://www.zhihu.com/"
-    r = requests.get(url, verify=False)
+    headers = {
+        'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
+        'Host': "www.zhihu.com",
+        'Origin': "http://www.zhihu.com",
+        'Pragma': "no-cache",
+        'Referer': "http://www.zhihu.com/",
+        'X-Requested-With': "XMLHttpRequest"
+    }
+    r = requests.get(url, verify=False, headers=headers)
     if int(r.status_code) != 200:
         raise NetworkError(u"验证码请求失败")
     results = re.compile(r"\<input\stype=\"hidden\"\sname=\"_xsrf\"\svalue=\"(\S+)\"", re.DOTALL).findall(r.text)
@@ -163,7 +179,15 @@ def upload_form(form):
 def islogin():
     # check session
     url = "https://www.zhihu.com/settings/profile"
-    r = requests.get(url, allow_redirects=False, verify=False)
+    headers = {
+        'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
+        'Host': "www.zhihu.com",
+        'Origin': "http://www.zhihu.com",
+        'Pragma': "no-cache",
+        'Referer': "http://www.zhihu.com/",
+        'X-Requested-With': "XMLHttpRequest"
+    }
+    r = requests.get(url, allow_redirects=False, verify=False, headers=headers)
     status_code = int(r.status_code)
     if status_code == 301 or status_code == 302:
         # 未登录

From 82000fb9f72abe8b9ea9a1ca982ea1043f4be3a4 Mon Sep 17 00:00:00 2001
From: huangxiaofei <huangxiaofei@bilibili.com>
Date: Sun, 10 Jul 2016 12:45:24 +0800
Subject: [PATCH 2/2] 	modified:   zhihu.py 	modify some encode problem and
 beautifulsoup find html match

---
 zhihu.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/zhihu.py b/zhihu.py
index e00e939..8234cf9 100755
--- a/zhihu.py
+++ b/zhihu.py
@@ -314,7 +314,7 @@ def get_title(self):
             if self.soup == None:
                 self.parser()
             soup = self.soup
-            title = soup.find("h2", class_="zm-item-title").string.encode("utf-8").replace("\n", "")
+            title = soup.find("span", class_="zm-editable-content").string.encode("utf-8").replace("\n", "")
             self.title = title
             if platform.system() == 'Windows':
                 title = title.decode('utf-8').encode('gbk')
@@ -662,7 +662,7 @@ def get_topics_num(self):
             if self.soup == None:
                 self.parser()
             soup = self.soup
-            topics_num = soup.find_all("div", class_="zm-profile-side-section-title")[1].strong.string.encode("utf-8")
+            topics_num = soup.find_all("div", class_="zm-profile-side-section-title")[2].strong.string.encode("utf-8")
             I=''
             for i in topics_num:
                 if i.isdigit():
@@ -1067,7 +1067,7 @@ def get_question(self):
             if self.soup == None:
                 self.parser()
             soup = self.soup
-            question_link = soup.find("h2", class_="zm-item-title zm-editable-content").a
+            question_link = soup.find("h2", class_="zm-item-title").a
             url = "http://www.zhihu.com" + question_link["href"]
             title = question_link.string.encode("utf-8")
             question = Question(url, title)
@@ -1144,7 +1144,8 @@ def to_txt(self):
         if platform.system() == 'Windows':
             anon_user_id = "匿名用户".decode('utf-8').encode('gbk')
         else:
-            anon_user_id = "匿名用户"
+            anon_uget_titleser_id = "匿名用户"
+        anon_user_id = "匿名用户".decode('utf-8').encode('gbk')
         if self.get_author().get_user_id() == anon_user_id:
             if not os.path.isdir(os.path.join(os.path.join(os.getcwd(), "text"))):
                 os.makedirs(os.path.join(os.path.join(os.getcwd(), "text")))