From 00fbddef41fc05cb5fe74a2969b287b429989549 Mon Sep 17 00:00:00 2001
From: Thinkerinhell <wuchen2010@gmail.com>
Date: Fri, 6 May 2016 17:41:48 +1000
Subject: [PATCH 1/3] add comments object and test case to get comment

---
 test.py  | 10 +++++++
 zhihu.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/test.py b/test.py
index c7a9ceb..c291f20 100755
--- a/test.py
+++ b/test.py
@@ -87,6 +87,9 @@ def answer_test(answer_url):
     answer.to_txt()
     # 把答案输出为markdown文件
     answer.to_md()
+    #该回答下的所有评论
+    all_comments = answer.get_comments()
+
 
     print question
     # <zhihu.Question instance at 0x7f0b25d13f80>
@@ -101,6 +104,11 @@ def answer_test(answer_url):
     print visit_times  # 输出: 改答案所属问题被浏览次数
 
 
+    # 输出: 所有答主在该问题下的评论
+    for c in all_comments :
+        if c.get_answer_author_flag():
+            print c.get_content()
+
 def user_test(user_url):
     user = User(user_url)
     # 获取用户ID
@@ -282,6 +290,7 @@ def main():
     question_test(url)
     answer_url = "http://www.zhihu.com/question/24269892/answer/29960616"
     answer_test(answer_url)
+
     user_url = "http://www.zhihu.com/people/jixin"
     user_test(user_url)
     collection_url = "http://www.zhihu.com/collection/36750683"
@@ -293,6 +302,7 @@ def main():
     test()
 
 
+
 if __name__ == '__main__':
     main()
 
diff --git a/zhihu.py b/zhihu.py
index 222eaea..95bd5b7 100755
--- a/zhihu.py
+++ b/zhihu.py
@@ -490,6 +490,7 @@ def get_visit_times(self):
         return int(soup.find("meta", itemprop="visitsCount")["content"])
 
 
+
 class User:
     user_url = None
     # session = None
@@ -1173,6 +1174,32 @@ def get_voters(self):
                     voter_id = voter_info.a["title"].encode("utf-8")
                     yield User(voter_url, voter_id)
 
+    def get_comments(self):
+        if self.soup == None:
+            self.parser()
+        soup = self.soup
+
+        try:
+            #print soup.find("div", {"class":lambda x : x and "zm-item-answer" in x.split()})["data-aid"]
+            data_aid = soup.find("div", {"class":lambda x : x and "zm-item-answer" in x.split()})["data-aid"]
+            request_url = 'http://www.zhihu.com/node/AnswerCommentListV2'
+            # if session == None:
+            #     create_session()
+            # s = session
+            # r = s.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)})
+            r = requests.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)})
+            soup = BeautifulSoup(r.content, "lxml")
+            comments = soup.findAll("div",{"class":"zm-item-comment"})
+
+            #print comments
+            if len(comments) == 0:
+                return
+                yield
+            else:
+                for comment in comments:
+                    yield Comment(comment["data-id"],comment)
+        except TypeError as err:
+            print 'type error in get comments'
 
 class Collection:
     url = None
@@ -1293,3 +1320,65 @@ def get_top_i_answers(self, n):
             if j > n:
                 break
             yield answer
+
+
+class Comment:
+    comment_id = None
+    soup = None
+
+    def setFlag(self, input):
+        if (u"提问者" in input):
+            self.question_author_flag = True
+        if (u"作者" in input):
+            self.answer_author_flag = True
+
+    def parser(self):
+        soup = self.soup
+        commenthddiv = soup.find("div",{"class":"zm-comment-hd"})
+
+        if (commenthddiv.contents[0].strip() == u"匿名用户"):
+            #print(u"user link is {0}, user id is {1}".format(None,u"匿名用户"))
+            self.author = User(None, u"匿名用户")
+            self.setFlag(commenthddiv.contents[1].string)
+        else:
+            apart = commenthddiv.find("a", {"class":"zg-link"})
+            if (apart is not None):
+                #print(u"user link is {0}, user id is {1}".format(apart['href'],apart.string))
+                self.author = User(apart['href'], apart.string)
+                self.setFlag(apart.next_sibling.string)
+
+        self.content = (" ".join(soup.find("div",{"class":"zm-comment-content"}).stripped_strings))
+
+#    def __init__(self, comment_id, soup, author=None, question_author_flag=None, answer_author_flag=None, content=None):
+    def __init__(self, comment_id, soup):
+        self.comment_id = comment_id
+        self.soup = soup
+        # print 'collection url',url
+        #if author != None:
+        #    self.author = author
+        #if question_author_flag != None:
+        #    self.question_author_flag = question_author_flag
+        #if answer_author_flag != None:
+        #    self.creator = answer_author_flag
+        #if content != None:
+        #    self.content = content
+        self.question_author_flag = False
+        self.answer_author_flag = False
+        self.parser()
+
+    def get_author(self):
+        return self.author
+
+    def get_content(self):
+        content = self.content
+        if platform.system() == 'Windows':
+            content = content.decode('utf-8').encode('gbk')
+            return content
+        else:
+            return content
+
+    def get_question_author_flag(self):
+        return self.question_author_flag
+
+    def get_answer_author_flag(self):
+        return self.answer_author_flag
\ No newline at end of file

From f6b0e26b7afe1037fa6786958baeeb79f1a47f25 Mon Sep 17 00:00:00 2001
From: Thinkerinhell <wuchen2010@gmail.com>
Date: Fri, 6 May 2016 17:51:15 +1000
Subject: [PATCH 2/3] removed some print debug comments

---
 .idea/vcs.xml |  6 ++++++
 zhihu.py      | 24 ++++--------------------
 2 files changed, 10 insertions(+), 20 deletions(-)
 create mode 100644 .idea/vcs.xml

diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/zhihu.py b/zhihu.py
index 95bd5b7..6312a19 100755
--- a/zhihu.py
+++ b/zhihu.py
@@ -1180,18 +1180,13 @@ def get_comments(self):
         soup = self.soup
 
         try:
-            #print soup.find("div", {"class":lambda x : x and "zm-item-answer" in x.split()})["data-aid"]
             data_aid = soup.find("div", {"class":lambda x : x and "zm-item-answer" in x.split()})["data-aid"]
             request_url = 'http://www.zhihu.com/node/AnswerCommentListV2'
-            # if session == None:
-            #     create_session()
-            # s = session
-            # r = s.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)})
+
             r = requests.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)})
             soup = BeautifulSoup(r.content, "lxml")
             comments = soup.findAll("div",{"class":"zm-item-comment"})
 
-            #print comments
             if len(comments) == 0:
                 return
                 yield
@@ -1337,31 +1332,19 @@ def parser(self):
         commenthddiv = soup.find("div",{"class":"zm-comment-hd"})
 
         if (commenthddiv.contents[0].strip() == u"匿名用户"):
-            #print(u"user link is {0}, user id is {1}".format(None,u"匿名用户"))
             self.author = User(None, u"匿名用户")
             self.setFlag(commenthddiv.contents[1].string)
         else:
             apart = commenthddiv.find("a", {"class":"zg-link"})
             if (apart is not None):
-                #print(u"user link is {0}, user id is {1}".format(apart['href'],apart.string))
                 self.author = User(apart['href'], apart.string)
                 self.setFlag(apart.next_sibling.string)
 
         self.content = (" ".join(soup.find("div",{"class":"zm-comment-content"}).stripped_strings))
 
-#    def __init__(self, comment_id, soup, author=None, question_author_flag=None, answer_author_flag=None, content=None):
     def __init__(self, comment_id, soup):
         self.comment_id = comment_id
         self.soup = soup
-        # print 'collection url',url
-        #if author != None:
-        #    self.author = author
-        #if question_author_flag != None:
-        #    self.question_author_flag = question_author_flag
-        #if answer_author_flag != None:
-        #    self.creator = answer_author_flag
-        #if content != None:
-        #    self.content = content
         self.question_author_flag = False
         self.answer_author_flag = False
         self.parser()
@@ -1376,9 +1359,10 @@ def get_content(self):
             return content
         else:
             return content
-
+		#是否提问者
     def get_question_author_flag(self):
         return self.question_author_flag
-
+		
+		#是否答案作者
     def get_answer_author_flag(self):
         return self.answer_author_flag
\ No newline at end of file

From 4bcf04753983780cb1ef5305728c02d50b85684f Mon Sep 17 00:00:00 2001
From: Thinkerinhell <wuchen2010@gmail.com>
Date: Fri, 6 May 2016 17:53:57 +1000
Subject: [PATCH 3/3] removed vcs.xml

---
 .idea/vcs.xml | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 .idea/vcs.xml

diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file