From 727b82d5323d4a11bcc1456a2f994730d7850354 Mon Sep 17 00:00:00 2001
From: 2314 <131743576+gxy2314@users.noreply.github.com>
Date: Thu, 26 Dec 2024 14:37:16 +0800
Subject: [PATCH 1/2] fix: resolve training issues with errors

- Fixed `ModuleNotFoundError: No module named 'fastai.utils'` during training.
- Resolved `RuntimeError: indices should be either on CPU or on the same device as the indexed tensor`.
- Addressed output errors during result display.
---
 README.md         |  10 +-
 build_graph.py    | 388 ++++++++++++++++----------------
 data_processor.py | 324 +++++++++++++--------------
 layer.py          |  59 +----
 trainer.py        | 557 ++++++++++++++++++++++++----------------------
 utils.py          | 477 +++++++++++++++++++--------------------
 6 files changed, 899 insertions(+), 916 deletions(-)

diff --git a/README.md b/README.md
index 186c603..3aa6e79 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,15 @@
-# Graph Convolutional Networks for Text Classification in PyTorch
+# Simplifying Graph Convolutional Networks in PyTorch (TextSGC)
 
-PyTorch 1.6 and Python 3.7 implementation of Graph Convolutional Networks for Text Classification [1].
+PyTorch 1.6 and Python 3.7 implementation of Simplifying Graph Convolutional Networks [1].
 
 Tested on the 20NG/R8/R52/Ohsumed/MR data set, the code on this repository can achieve the effect of the paper.
 
 ## Benchmark
 
-| dataset       | 20NG | R8 | R52 | Ohsumed | MR  |
+| dataset       | 20NG | R8 | R52 | Ohsumed | MR   |
 |---------------|----------|------|--------|--------|--------|
 | TextGCN(official) | 0.8634    | 0.9707 | 0.9356   | 0.6836   | 0.7674   |
-| This repo.    | 0.8618     | 0.9704 | 0.9354   | 0.6827  | 0.7643  |
+| This repo.    | 0.8605    | 0.9743 | 0.9384   | 0.6828  | 0.7728  |
 
 NOTE: The result of the experiment is to repeat the run 10 times, and then take the average of accuracy.
 
@@ -32,4 +32,4 @@ NOTE: The result of the experiment is to repeat the run 10 times, and then take
 3. Training model, run `trainer.py`
 
 ## References
-[1] [Yao, L. , Mao, C. , & Luo, Y. . (2018). Graph convolutional networks for text classification.](https://arxiv.org/abs/1809.05679)
+[1] [Wu, F. , Zhang, T. , Souza, A. H. D. , Fifty, C. , Yu, T. , & Weinberger, K. Q. . (2019). Simplifying graph convolutional networks.](https://arxiv.org/abs/1902.07153)
diff --git a/build_graph.py b/build_graph.py
index 2e0b13c..f15133c 100644
--- a/build_graph.py
+++ b/build_graph.py
@@ -1,194 +1,194 @@
-import os
-from collections import Counter
-
-import networkx as nx
-
-import itertools
-import math
-from collections import defaultdict
-from time import time
-
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.feature_extraction.text import TfidfTransformer
-from sklearn.pipeline import Pipeline
-from tqdm import tqdm
-
-from utils import print_graph_detail
-
-
-def get_window(content_lst, window_size):
-    """
-    找出窗口
-    :param content_lst:
-    :param window_size:
-    :return:
-    """
-    word_window_freq = defaultdict(int)  # w(i)  单词在窗口单位内出现的次数
-    word_pair_count = defaultdict(int)  # w(i, j)
-    windows_len = 0
-    for words in tqdm(content_lst, desc="Split by window"):
-        windows = list()
-
-        if isinstance(words, str):
-            words = words.split()
-        length = len(words)
-
-        if length <= window_size:
-            windows.append(words)
-        else:
-            for j in range(length - window_size + 1):
-                window = words[j: j + window_size]
-                windows.append(list(set(window)))
-
-        for window in windows:
-            for word in window:
-                word_window_freq[word] += 1
-
-            for word_pair in itertools.combinations(window, 2):
-                word_pair_count[word_pair] += 1
-
-        windows_len += len(windows)
-    return word_window_freq, word_pair_count, windows_len
-
-
-def cal_pmi(W_ij, W, word_freq_1, word_freq_2):
-    p_i = word_freq_1 / W
-    p_j = word_freq_2 / W
-    p_i_j = W_ij / W
-    pmi = math.log(p_i_j / (p_i * p_j))
-
-    return pmi
-
-
-def count_pmi(windows_len, word_pair_count, word_window_freq, threshold):
-    word_pmi_lst = list()
-    for word_pair, W_i_j in tqdm(word_pair_count.items(), desc="Calculate pmi between words"):
-        word_freq_1 = word_window_freq[word_pair[0]]
-        word_freq_2 = word_window_freq[word_pair[1]]
-
-        pmi = cal_pmi(W_i_j, windows_len, word_freq_1, word_freq_2)
-        if pmi <= threshold:
-            continue
-        word_pmi_lst.append([word_pair[0], word_pair[1], pmi])
-    return word_pmi_lst
-
-
-def get_pmi_edge(content_lst, window_size=20, threshold=0.):
-    if isinstance(content_lst, str):
-        content_lst = list(open(content_lst, "r"))
-    print("pmi read file len:", len(content_lst))
-
-    pmi_start = time()
-    word_window_freq, word_pair_count, windows_len = get_window(content_lst,
-                                                                window_size=window_size)
-
-    pmi_edge_lst = count_pmi(windows_len, word_pair_count, word_window_freq, threshold)
-    print("Total number of edges between word:", len(pmi_edge_lst))
-    pmi_time = time() - pmi_start
-    return pmi_edge_lst, pmi_time
-
-
-class BuildGraph:
-    def __init__(self, dataset):
-        clean_corpus_path = "data/text_dataset/clean_corpus"
-        self.graph_path = "data/graph"
-        if not os.path.exists(self.graph_path):
-            os.makedirs(self.graph_path)
-
-        self.word2id = dict()  # 单词映射
-        self.dataset = dataset
-        print(f"\n==> 现在的数据集是:{dataset}<==")
-
-        self.g = nx.Graph()
-
-        self.content = f"{clean_corpus_path}/{dataset}.txt"
-
-        self.get_tfidf_edge()
-        self.get_pmi_edge()
-        self.save()
-
-    def get_pmi_edge(self):
-        pmi_edge_lst, self.pmi_time = get_pmi_edge(self.content, window_size=20, threshold=0.0)
-        print("pmi time:", self.pmi_time)
-
-        for edge_item in pmi_edge_lst:
-            word_indx1 = self.node_num + self.word2id[edge_item[0]]
-            word_indx2 = self.node_num + self.word2id[edge_item[1]]
-            if word_indx1 == word_indx2:
-                continue
-            self.g.add_edge(word_indx1, word_indx2, weight=edge_item[2])
-
-        print_graph_detail(self.g)
-
-    def get_tfidf_edge(self):
-        # 获得tfidf权重矩阵（sparse）和单词列表
-        tfidf_vec = self.get_tfidf_vec()
-
-        count_lst = list()  # 统计每个句子的长度
-        for ind, row in tqdm(enumerate(tfidf_vec),
-                             desc="generate tfidf edge"):
-            count = 0
-            for col_ind, value in zip(row.indices, row.data):
-                word_ind = self.node_num + col_ind
-                self.g.add_edge(ind, word_ind, weight=value)
-                count += 1
-            count_lst.append(count)
-
-        print_graph_detail(self.g)
-
-    def get_tfidf_vec(self):
-        """
-        学习获得tfidf矩阵，及其对应的单词序列
-        :param content_lst:
-        :return:
-        """
-        start = time()
-        text_tfidf = Pipeline([
-            ("vect", CountVectorizer(min_df=1,
-                                     max_df=1.0,
-                                     token_pattern=r"\S+",
-                                     )),
-            ("tfidf", TfidfTransformer(norm=None,
-                                       use_idf=True,
-                                       smooth_idf=False,
-                                       sublinear_tf=False
-                                       ))
-        ])
-
-        tfidf_vec = text_tfidf.fit_transform(open(self.content, "r"))
-
-        self.tfidf_time = time() - start
-        print("tfidf time:", self.tfidf_time)
-        print("tfidf_vec shape:", tfidf_vec.shape)
-        print("tfidf_vec type:", type(tfidf_vec))
-
-        self.node_num = tfidf_vec.shape[0]
-
-        # 映射单词
-        vocab_lst = text_tfidf["vect"].get_feature_names()
-        print("vocab_lst len:", len(vocab_lst))
-        for ind, word in enumerate(vocab_lst):
-            self.word2id[word] = ind
-
-        self.vocab_lst = vocab_lst
-
-        return tfidf_vec
-
-    def save(self):
-        print("total time:", self.pmi_time + self.tfidf_time)
-        nx.write_weighted_edgelist(self.g,
-                                   f"{self.graph_path}/{self.dataset}.txt")
-
-        print("\n")
-
-
-def main():
-    BuildGraph("mr")
-    BuildGraph("ohsumed")
-    BuildGraph("R52")
-    BuildGraph("R8")
-    BuildGraph("20ng")
-
-
-if __name__ == '__main__':
-    main()
+import os
+from collections import Counter
+
+import networkx as nx
+
+import itertools
+import math
+from collections import defaultdict
+from time import time
+
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.pipeline import Pipeline
+from tqdm import tqdm
+
+from utils import print_graph_detail
+
+
+def get_window(content_lst, window_size):
+    """
+    找出窗口
+    :param content_lst:
+    :param window_size:
+    :return:
+    """
+    word_window_freq = defaultdict(int)  # w(i)  单词在窗口单位内出现的次数
+    word_pair_count = defaultdict(int)  # w(i, j)
+    windows_len = 0
+    for words in tqdm(content_lst, desc="Split by window"):
+        windows = list()
+
+        if isinstance(words, str):
+            words = words.split()
+        length = len(words)
+
+        if length <= window_size:
+            windows.append(words)
+        else:
+            for j in range(length - window_size + 1):
+                window = words[j: j + window_size]
+                windows.append(list(set(window)))
+
+        for window in windows:
+            for word in window:
+                word_window_freq[word] += 1
+
+            for word_pair in itertools.combinations(window, 2):
+                word_pair_count[word_pair] += 1
+
+        windows_len += len(windows)
+    return word_window_freq, word_pair_count, windows_len
+
+
+def cal_pmi(W_ij, W, word_freq_1, word_freq_2):
+    p_i = word_freq_1 / W
+    p_j = word_freq_2 / W
+    p_i_j = W_ij / W
+    pmi = math.log(p_i_j / (p_i * p_j))
+
+    return pmi
+
+
+def count_pmi(windows_len, word_pair_count, word_window_freq, threshold):
+    word_pmi_lst = list()
+    for word_pair, W_i_j in tqdm(word_pair_count.items(), desc="Calculate pmi between words"):
+        word_freq_1 = word_window_freq[word_pair[0]]
+        word_freq_2 = word_window_freq[word_pair[1]]
+
+        pmi = cal_pmi(W_i_j, windows_len, word_freq_1, word_freq_2)
+        if pmi <= threshold:
+            continue
+        word_pmi_lst.append([word_pair[0], word_pair[1], pmi])
+    return word_pmi_lst
+
+
+def get_pmi_edge(content_lst, window_size=20, threshold=0.):
+    if isinstance(content_lst, str):
+        content_lst = list(open(content_lst, "r"))
+    print("pmi read file len:", len(content_lst))
+
+    pmi_start = time()
+    word_window_freq, word_pair_count, windows_len = get_window(content_lst,
+                                                                window_size=window_size)
+
+    pmi_edge_lst = count_pmi(windows_len, word_pair_count, word_window_freq, threshold)
+    print("Total number of edges between word:", len(pmi_edge_lst))
+    pmi_time = time() - pmi_start
+    return pmi_edge_lst, pmi_time
+
+
+class BuildGraph:
+    def __init__(self, dataset):
+        clean_corpus_path = "PyTorch_TextSGC/data/text_dataset/clean_corpus"
+        self.graph_path = "PyTorch_TextSGC/data/graph"
+        if not os.path.exists(self.graph_path):
+            os.makedirs(self.graph_path)
+
+        self.word2id = dict()  # 单词映射
+        self.dataset = dataset
+        print(f"\n==> 现在的数据集是:{dataset}<==")
+
+        self.g = nx.Graph()
+
+        self.content = f"{clean_corpus_path}/{dataset}.txt"
+
+        self.get_tfidf_edge()
+        self.get_pmi_edge()
+        self.save()
+
+    def get_pmi_edge(self):
+        pmi_edge_lst, self.pmi_time = get_pmi_edge(self.content, window_size=20, threshold=0.0)
+        print("pmi time:", self.pmi_time)
+
+        for edge_item in pmi_edge_lst:
+            word_indx1 = self.node_num + self.word2id[edge_item[0]]
+            word_indx2 = self.node_num + self.word2id[edge_item[1]]
+            if word_indx1 == word_indx2:
+                continue
+            self.g.add_edge(word_indx1, word_indx2, weight=edge_item[2])
+
+        print_graph_detail(self.g)
+
+    def get_tfidf_edge(self):
+        # 获得tfidf权重矩阵（sparse）和单词列表
+        tfidf_vec = self.get_tfidf_vec()
+
+        count_lst = list()  # 统计每个句子的长度
+        for ind, row in tqdm(enumerate(tfidf_vec),
+                             desc="generate tfidf edge"):
+            count = 0
+            for col_ind, value in zip(row.indices, row.data):
+                word_ind = self.node_num + col_ind
+                self.g.add_edge(ind, word_ind, weight=value)
+                count += 1
+            count_lst.append(count)
+
+        print_graph_detail(self.g)
+
+    def get_tfidf_vec(self):
+        """
+        学习获得tfidf矩阵，及其对应的单词序列
+        :param content_lst:
+        :return:
+        """
+        start = time()
+        text_tfidf = Pipeline([
+            ("vect", CountVectorizer(min_df=1,
+                                     max_df=1.0,
+                                     token_pattern=r"\S+",
+                                     )),
+            ("tfidf", TfidfTransformer(norm=None,
+                                       use_idf=True,
+                                       smooth_idf=False,
+                                       sublinear_tf=False
+                                       ))
+        ])
+
+        tfidf_vec = text_tfidf.fit_transform(open(self.content, "r"))
+
+        self.tfidf_time = time() - start
+        print("tfidf time:", self.tfidf_time)
+        print("tfidf_vec shape:", tfidf_vec.shape)
+        print("tfidf_vec type:", type(tfidf_vec))
+
+        self.node_num = tfidf_vec.shape[0]
+
+        # 映射单词
+        vocab_lst = text_tfidf["vect"].get_feature_names()
+        print("vocab_lst len:", len(vocab_lst))
+        for ind, word in enumerate(vocab_lst):
+            self.word2id[word] = ind
+
+        self.vocab_lst = vocab_lst
+
+        return tfidf_vec
+
+    def save(self):
+        print("total time:", self.pmi_time + self.tfidf_time)
+        nx.write_weighted_edgelist(self.g,
+                                   f"{self.graph_path}/{self.dataset}.txt")
+
+        print("\n")
+
+
+def main():
+    BuildGraph("mr")
+    BuildGraph("ohsumed")
+    BuildGraph("R52")
+    BuildGraph("R8")
+    BuildGraph("20ng")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/data_processor.py b/data_processor.py
index e3c806e..a613f83 100644
--- a/data_processor.py
+++ b/data_processor.py
@@ -1,162 +1,162 @@
-import os
-import re
-from collections import Counter
-from collections import defaultdict
-import numpy as np
-
-from tqdm import tqdm
-
-
-class StringProcess(object):
-    def __init__(self):
-        self.other_char = re.compile(r"[^A-Za-z0-9(),!?\'\`]", flags=0)
-        self.num = re.compile(r"[+-]?\d+\.?\d*", flags=0)
-        # self.url = re.compile(r"[a-z]*[:.]+\S+|\n|\s+", flags=0)
-        self.url = re.compile(
-                r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", flags=0)
-        self.stop_words = None
-        self.nlp = None
-
-    def clean_str(self, string):
-        string = re.sub(self.other_char, " ", string)
-        string = re.sub(r"\'s", " \'s", string)
-        string = re.sub(r"\'ve", " \'ve", string)
-        string = re.sub(r"n\'t", " n\'t", string)
-        string = re.sub(r"\'re", " \'re", string)
-        string = re.sub(r"\'d", " \'d", string)
-        string = re.sub(r"\'ll", " \'ll", string)
-        string = re.sub(r",", " , ", string)
-        string = re.sub(r"!", " ! ", string)
-        string = re.sub(r"\(", " \( ", string)
-        string = re.sub(r"\)", " \) ", string)
-        string = re.sub(r"\?", " \? ", string)
-        string = re.sub(r"\s{2,}", " ", string)
-
-        return string.strip().lower()
-
-    def norm_str(self, string):
-        string = re.sub(self.other_char, " ", string)
-
-        if self.nlp is None:
-            from spacy.lang.en import English
-            self.nlp = English()
-
-        new_doc = list()
-        doc = self.nlp(string)
-        for token in doc:
-            if token.is_space or token.is_punct:
-                continue
-            if token.is_digit:
-                token = "[num]"
-            else:
-                token = token.text
-
-            new_doc.append(token)
-
-        return " ".join(new_doc).lower()
-
-    def lean_str_sst(self, string):
-        """
-            Tokenization/string cleaning for the SST yelp_dataset
-            Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
-            """
-        string = re.sub(self.other_char, " ", string)
-        string = re.sub(r"\s{2,}", " ", string)
-        return string.strip().lower()
-
-    def remove_stopword(self, string):
-        if self.stop_words is None:
-            from nltk.corpus import stopwords
-            self.stop_words = set(stopwords.words('english'))
-
-        if type(string) is str:
-            string = string.split()
-
-        new_string = list()
-        for word in string:
-            if word in self.stop_words:
-                continue
-            new_string.append(word)
-
-        return " ".join(new_string)
-
-    def replace_num(self, string):
-        result = re.sub(self.num, '<num>', string)
-        return result
-
-    def replace_urls(self, string):
-        result = re.sub(self.url, '<url>', string)
-        result = ' '.join(re.split(' +|\n+', result)).strip()
-        return result
-
-
-def remove_less_word(lines_str, word_st):
-    return " ".join([word for word in lines_str.split() if word in word_st])
-
-
-class CorpusProcess:
-    def __init__(self, dataset, encoding=None):
-        corpus_path = "data/text_dataset/corpus"
-        clean_corpus_path = "data/text_dataset/clean_corpus"
-        if not os.path.exists(clean_corpus_path):
-            os.makedirs(clean_corpus_path)
-
-        self.dataset = dataset
-        self.corpus_name = f"{corpus_path}/{dataset}.txt"
-        self.save_name = f"{clean_corpus_path}/{dataset}.txt"
-        self.context_dct = defaultdict(dict)
-
-        self.encoding = encoding
-        self.clean_text()
-
-    def clean_text(self):
-        sp = StringProcess()
-        word_lst = list()
-        with open(self.corpus_name, mode="rb", encoding=self.encoding) as fin:
-            for indx, item in tqdm(enumerate(fin), desc="clean the text"):
-                data = item.strip().decode('latin1')
-                data = sp.clean_str(data)
-                if self.dataset not in {"mr"}:
-                    data = sp.remove_stopword(data)
-                word_lst.extend(data.split())
-
-        word_st = set()
-        if self.dataset not in {"mr"}:
-            for word, value in Counter(word_lst).items():
-                if value < 5:
-                    continue
-                word_st.add(word)
-        else:
-            word_st = set(word_lst)
-
-        doc_len_lst = list()
-        with open(self.save_name, mode='w') as fout:
-            with open(self.corpus_name, mode="rb", encoding=self.encoding) as fin:
-                for line in tqdm(fin):
-                    lines_str = line.strip().decode('latin1')
-                    lines_str = sp.clean_str(lines_str)
-                    if self.dataset not in {"mr"}:
-                        lines_str = sp.remove_stopword(lines_str)
-                        lines_str = remove_less_word(lines_str, word_st)
-
-                    fout.write(lines_str)
-                    fout.write(" \n")
-
-                    doc_len_lst.append(len(lines_str.split()))
-
-        print("Average length:", np.mean(doc_len_lst))
-        print("doc count:", len(doc_len_lst))
-        print("Total number of words:", len(word_st))
-
-
-def main():
-    CorpusProcess("R52")
-    # CorpusProcess("20ng")
-    # CorpusProcess("mr")
-    # CorpusProcess("ohsumed")
-    # CorpusProcess("R8")
-    # pass
-
-
-if __name__ == '__main__':
-    main()
+import os
+import re
+from collections import Counter
+from collections import defaultdict
+import numpy as np
+
+from tqdm import tqdm
+
+
+class StringProcess(object):
+    def __init__(self):
+        self.other_char = re.compile(r"[^A-Za-z0-9(),!?\'\`]", flags=0)
+        self.num = re.compile(r"[+-]?\d+\.?\d*", flags=0)
+        # self.url = re.compile(r"[a-z]*[:.]+\S+|\n|\s+", flags=0)
+        self.url = re.compile(
+                r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", flags=0)
+        self.stop_words = None
+        self.nlp = None
+
+    def clean_str(self, string):
+        string = re.sub(self.other_char, " ", string)
+        string = re.sub(r"\'s", " \'s", string)
+        string = re.sub(r"\'ve", " \'ve", string)
+        string = re.sub(r"n\'t", " n\'t", string)
+        string = re.sub(r"\'re", " \'re", string)
+        string = re.sub(r"\'d", " \'d", string)
+        string = re.sub(r"\'ll", " \'ll", string)
+        string = re.sub(r",", " , ", string)
+        string = re.sub(r"!", " ! ", string)
+        string = re.sub(r"\(", " \( ", string)
+        string = re.sub(r"\)", " \) ", string)
+        string = re.sub(r"\?", " \? ", string)
+        string = re.sub(r"\s{2,}", " ", string)
+
+        return string.strip().lower()
+
+    def norm_str(self, string):
+        string = re.sub(self.other_char, " ", string)
+
+        if self.nlp is None:
+            from spacy.lang.en import English
+            self.nlp = English()
+
+        new_doc = list()
+        doc = self.nlp(string)
+        for token in doc:
+            if token.is_space or token.is_punct:
+                continue
+            if token.is_digit:
+                token = "[num]"
+            else:
+                token = token.text
+
+            new_doc.append(token)
+
+        return " ".join(new_doc).lower()
+
+    def lean_str_sst(self, string):
+        """
+            Tokenization/string cleaning for the SST yelp_dataset
+            Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
+            """
+        string = re.sub(self.other_char, " ", string)
+        string = re.sub(r"\s{2,}", " ", string)
+        return string.strip().lower()
+
+    def remove_stopword(self, string):
+        if self.stop_words is None:
+            from nltk.corpus import stopwords
+            self.stop_words = set(stopwords.words('english'))
+
+        if type(string) is str:
+            string = string.split()
+
+        new_string = list()
+        for word in string:
+            if word in self.stop_words:
+                continue
+            new_string.append(word)
+
+        return " ".join(new_string)
+
+    def replace_num(self, string):
+        result = re.sub(self.num, '<num>', string)
+        return result
+
+    def replace_urls(self, string):
+        result = re.sub(self.url, '<url>', string)
+        result = ' '.join(re.split(' +|\n+', result)).strip()
+        return result
+
+
+def remove_less_word(lines_str, word_st):
+    return " ".join([word for word in lines_str.split() if word in word_st])
+
+
+class CorpusProcess:
+    def __init__(self, dataset, encoding=None):
+        corpus_path = "PyTorch_TextSGC/data/text_dataset/corpus"
+        clean_corpus_path = "PyTorch_TextSGC/data/text_dataset/clean_corpus"
+        if not os.path.exists(clean_corpus_path):
+            os.makedirs(clean_corpus_path)
+
+        self.dataset = dataset
+        self.corpus_name = f"{corpus_path}/{dataset}.txt"
+        self.save_name = f"{clean_corpus_path}/{dataset}.txt"
+        self.context_dct = defaultdict(dict)
+
+        self.encoding = encoding
+        self.clean_text()
+
+    def clean_text(self):
+        sp = StringProcess()
+        word_lst = list()
+        with open(self.corpus_name, mode="rb", encoding=self.encoding) as fin:
+            for indx, item in tqdm(enumerate(fin), desc="clean the text"):
+                data = item.strip().decode('latin1')
+                data = sp.clean_str(data)
+                if self.dataset not in {"mr"}:
+                    data = sp.remove_stopword(data)
+                word_lst.extend(data.split())
+
+        word_st = set()
+        if self.dataset not in {"mr"}:
+            for word, value in Counter(word_lst).items():
+                if value < 5:
+                    continue
+                word_st.add(word)
+        else:
+            word_st = set(word_lst)
+
+        doc_len_lst = list()
+        with open(self.save_name, mode='w') as fout:
+            with open(self.corpus_name, mode="rb", encoding=self.encoding) as fin:
+                for line in tqdm(fin):
+                    lines_str = line.strip().decode('latin1')
+                    lines_str = sp.clean_str(lines_str)
+                    if self.dataset not in {"mr"}:
+                        lines_str = sp.remove_stopword(lines_str)
+                        lines_str = remove_less_word(lines_str, word_st)
+
+                    fout.write(lines_str)
+                    fout.write(" \n")
+
+                    doc_len_lst.append(len(lines_str.split()))
+
+        print("Average length:", np.mean(doc_len_lst))
+        print("doc count:", len(doc_len_lst))
+        print("Total number of words:", len(word_st))
+
+
+def main():
+    # CorpusProcess("R52")
+    # CorpusProcess("20ng")
+    CorpusProcess("mr")
+    CorpusProcess("ohsumed")
+    # CorpusProcess("R8")
+    # pass
+
+
+if __name__ == '__main__':
+    main()
diff --git a/layer.py b/layer.py
index 85a9ba3..a7e662f 100644
--- a/layer.py
+++ b/layer.py
@@ -1,56 +1,13 @@
-import math
+import torch.nn as nn
 import torch as th
-
-from torch.nn.parameter import Parameter
 from torch.nn.modules.module import Module
 
 
-class GraphConvolution(Module):
-    """
-    Simple pygGCN layer, similar to https://arxiv.org/abs/1609.02907
-    """
-
-    def __init__(self, in_features, out_features, bias=True):
-        super(GraphConvolution, self).__init__()
-        self.in_features = in_features
-        self.out_features = out_features
-        self.weight = Parameter(th.FloatTensor(in_features, out_features))
-        if bias:
-            self.bias = Parameter(th.FloatTensor(out_features))
-        else:
-            self.register_parameter('bias', None)
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        stdv = 1. / math.sqrt(self.weight.size(1))
-        self.weight.data.uniform_(-stdv, stdv)
-        if self.bias is not None:
-            self.bias.data.uniform_(-stdv, stdv)
-
-    def forward(self, infeatn, adj):
-        support = th.spmm(infeatn, self.weight)
-        output = th.spmm(adj, support)
-        if self.bias is not None:
-            return output + self.bias
-        else:
-            return output
-
-    def __repr__(self):
-        return self.__class__.__name__ + ' (' \
-               + str(self.in_features) + ' -> ' \
-               + str(self.out_features) + ')'
-
-
-class GCN(Module):
-    def __init__(self, nfeat, nhid, nclass, dropout):
-        super(GCN, self).__init__()
-        self.gc1 = GraphConvolution(nfeat, nhid)
-        self.gc2 = GraphConvolution(nhid, nclass)
-        self.dropout = dropout
+class SGC(Module):
+    def __init__(self, nfeat, nclass):
+        super(SGC, self).__init__()
+        self.W = nn.Linear(nfeat, nclass)
+        th.nn.init.xavier_normal_(self.W.weight)
 
-    def forward(self, x, adj):
-        x = self.gc1(x, adj)
-        x = th.relu(x)
-        x = th.dropout(x, self.dropout, train=self.training)
-        x = self.gc2(x, adj)
-        return x
+    def forward(self, x):
+        return self.W(x)
diff --git a/trainer.py b/trainer.py
index 32d289e..c62442a 100644
--- a/trainer.py
+++ b/trainer.py
@@ -1,266 +1,291 @@
-import gc
-import warnings
-from time import time
-
-import networkx as nx
-import numpy as np
-import pandas as pd
-import torch as th
-from sklearn.model_selection import train_test_split
-
-from layer import GCN
-from utils import accuracy
-from utils import macro_f1
-from utils import CudaUse
-from utils import EarlyStopping
-from utils import LogResult
-from utils import parameter_parser
-from utils import preprocess_adj
-from utils import print_graph_detail
-from utils import read_file
-from utils import return_seed
-
-th.backends.cudnn.deterministic = True
-th.backends.cudnn.benchmark = True
-warnings.filterwarnings("ignore")
-
-
-def get_train_test(target_fn):
-    train_lst = list()
-    test_lst = list()
-    with read_file(target_fn, mode="r") as fin:
-        for indx, item in enumerate(fin):
-            if item.split("\t")[1] in {"train", "training", "20news-bydate-train"}:
-                train_lst.append(indx)
-            else:
-                test_lst.append(indx)
-
-    return train_lst, test_lst
-
-
-class PrepareData:
-    def __init__(self, args):
-        print("prepare data")
-        self.graph_path = "data/graph"
-        self.args = args
-
-        # graph
-        graph = nx.read_weighted_edgelist(f"{self.graph_path}/{args.dataset}.txt"
-                                          , nodetype=int)
-        print_graph_detail(graph)
-        adj = nx.to_scipy_sparse_matrix(graph,
-                                        nodelist=list(range(graph.number_of_nodes())),
-                                        weight='weight',
-                                        dtype=np.float)
-
-        adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
-
-        self.adj = preprocess_adj(adj, is_sparse=True)
-
-        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-        # features
-        self.nfeat_dim = graph.number_of_nodes()
-        row = list(range(self.nfeat_dim))
-        col = list(range(self.nfeat_dim))
-        value = [1.] * self.nfeat_dim
-        shape = (self.nfeat_dim, self.nfeat_dim)
-        indices = th.from_numpy(
-                np.vstack((row, col)).astype(np.int64))
-        values = th.FloatTensor(value)
-        shape = th.Size(shape)
-
-        self.features = th.sparse.FloatTensor(indices, values, shape)
-
-        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-        # target
-
-        target_fn = f"data/text_dataset/{self.args.dataset}.txt"
-        target = np.array(pd.read_csv(target_fn,
-                                      sep="\t",
-                                      header=None)[2])
-        target2id = {label: indx for indx, label in enumerate(set(target))}
-        self.target = [target2id[label] for label in target]
-        self.nclass = len(target2id)
-
-        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-        # train val test split
-
-        self.train_lst, self.test_lst = get_train_test(target_fn)
-
-
-class TextGCNTrainer:
-    def __init__(self, args, model, pre_data):
-        self.args = args
-        self.model = model
-        self.device = args.device
-
-        self.max_epoch = self.args.max_epoch
-        self.set_seed()
-
-        self.dataset = args.dataset
-        self.predata = pre_data
-        self.earlystopping = EarlyStopping(args.early_stopping)
-
-    def set_seed(self):
-        th.manual_seed(self.args.seed)
-        np.random.seed(self.args.seed)
-
-    def fit(self):
-        self.prepare_data()
-        self.model = self.model(nfeat=self.nfeat_dim,
-                                nhid=self.args.nhid,
-                                nclass=self.nclass,
-                                dropout=self.args.dropout)
-        print(self.model.parameters)
-        self.model = self.model.to(self.device)
-
-        self.optimizer = th.optim.Adam(self.model.parameters(), lr=self.args.lr)
-        self.criterion = th.nn.CrossEntropyLoss()
-
-        self.model_param = sum(param.numel() for param in self.model.parameters())
-        print('# model parameters:', self.model_param)
-        self.convert_tensor()
-
-        start = time()
-        self.train()
-        self.train_time = time() - start
-
-    @classmethod
-    def set_description(cls, desc):
-        string = ""
-        for key, value in desc.items():
-            if isinstance(value, int):
-                string += f"{key}:{value} "
-            else:
-                string += f"{key}:{value:.4f} "
-        print(string)
-
-    def prepare_data(self):
-        self.adj = self.predata.adj
-        self.nfeat_dim = self.predata.nfeat_dim
-        self.features = self.predata.features
-        self.target = self.predata.target
-        self.nclass = self.predata.nclass
-
-        self.train_lst, self.val_lst = train_test_split(self.predata.train_lst,
-                                                        test_size=self.args.val_ratio,
-                                                        shuffle=True,
-                                                        random_state=self.args.seed)
-        self.test_lst = self.predata.test_lst
-
-    def convert_tensor(self):
-        self.model = self.model.to(self.device)
-        self.adj = self.adj.to(self.device)
-        self.features = self.features.to(self.device)
-        self.target = th.tensor(self.target).long().to(self.device)
-        self.train_lst = th.tensor(self.train_lst).long().to(self.device)
-        self.val_lst = th.tensor(self.val_lst).long().to(self.device)
-
-    def train(self):
-        for epoch in range(self.max_epoch):
-            self.model.train()
-            self.optimizer.zero_grad()
-
-            logits = self.model.forward(self.features, self.adj)
-            loss = self.criterion(logits[self.train_lst],
-                                  self.target[self.train_lst])
-
-            loss.backward()
-            self.optimizer.step()
-
-            val_desc = self.val(self.val_lst)
-
-            desc = dict(**{"epoch"     : epoch,
-                           "train_loss": loss.item(),
-                           }, **val_desc)
-
-            self.set_description(desc)
-
-            if self.earlystopping(val_desc["val_loss"]):
-                break
-
-    @th.no_grad()
-    def val(self, x, prefix="val"):
-        self.model.eval()
-        with th.no_grad():
-            logits = self.model.forward(self.features, self.adj)
-            loss = self.criterion(logits[x],
-                                  self.target[x])
-            acc = accuracy(logits[x],
-                           self.target[x])
-            f1, precision, recall = macro_f1(logits[x],
-                                             self.target[x],
-                                             num_classes=self.nclass)
-
-            desc = {
-                f"{prefix}_loss": loss.item(),
-                "acc"           : acc,
-                "macro_f1"      : f1,
-                "precision"     : precision,
-                "recall"        : recall,
-            }
-        return desc
-
-    @th.no_grad()
-    def test(self):
-        self.test_lst = th.tensor(self.test_lst).long().to(self.device)
-        test_desc = self.val(self.test_lst, prefix="test")
-        test_desc["train_time"] = self.train_time
-        test_desc["model_param"] = self.model_param
-        return test_desc
-
-
-def main(dataset, times):
-    args = parameter_parser()
-    args.dataset = dataset
-
-    args.device = th.device('cuda') if th.cuda.is_available() else th.device('cpu')
-    args.nhid = 200
-    args.max_epoch = 200
-    args.dropout = 0.5
-    args.val_ratio = 0.1
-    args.early_stopping = 10
-    args.lr = 0.02
-    model = GCN
-
-    print(args)
-
-    predata = PrepareData(args)
-    cudause = CudaUse()
-
-    record = LogResult()
-    seed_lst = list()
-    for ind, seed in enumerate(return_seed(times)):
-        print(f"\n\n==> {ind}, seed:{seed}")
-        args.seed = seed
-        seed_lst.append(seed)
-
-        framework = TextGCNTrainer(model=model, args=args, pre_data=predata)
-        framework.fit()
-
-        if th.cuda.is_available():
-            gpu_mem = cudause.gpu_mem_get(_id=0)
-            record.log_single(key="gpu_mem", value=gpu_mem)
-
-        record.log(framework.test())
-
-        del framework
-        gc.collect()
-
-        if th.cuda.is_available():
-            th.cuda.empty_cache()
-
-    print("==> seed set:")
-    print(seed_lst)
-    record.show_str()
-
-
-if __name__ == '__main__':
-    # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-    # for d in ["mr", "ohsumed", "R52", "R8", "20ng"]:
-    #     main(d)
-    main("mr", 1)
-    # main("ohsumed")
-    # main("R8", 1)
-    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+import gc
+import warnings
+from time import time
+
+import networkx as nx
+import numpy as np
+import pandas as pd
+import torch as th
+from sklearn.model_selection import train_test_split
+
+from layer import SGC
+from utils import accuracy
+from utils import macro_f1
+from utils import CudaUse
+from utils import EarlyStopping
+from utils import LogResult
+from utils import parameter_parser
+from utils import preprocess_adj
+from utils import print_graph_detail
+from utils import read_file
+from utils import return_seed
+
+th.backends.cudnn.deterministic = True
+th.backends.cudnn.benchmark = True
+warnings.filterwarnings("ignore")
+
+
+def get_train_test(target_fn):
+    train_lst = list()
+    test_lst = list()
+    with read_file(target_fn, mode="r") as fin:
+        for indx, item in enumerate(fin):
+            if item.split("\t")[1] in {"train", "training", "20news-bydate-train"}:
+                train_lst.append(indx)
+            else:
+                test_lst.append(indx)
+
+    return train_lst, test_lst
+
+
+class PrepareData:
+    def __init__(self, args):
+        print("prepare data")
+        self.graph_path = "PyTorch_TextSGC/data/graph"
+        self.args = args
+
+        # graph
+        graph = nx.read_weighted_edgelist(f"{self.graph_path}/{args.dataset}.txt"
+                                          , nodetype=int)
+        print_graph_detail(graph)
+        adj = nx.to_scipy_sparse_matrix(graph,
+                                        nodelist=list(range(graph.number_of_nodes())),
+                                        weight='weight',
+                                        dtype=np.float)
+
+        adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
+
+        self.adj = preprocess_adj(adj, is_sparse=True)
+
+        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+        # target
+
+        target_fn = f"PyTorch_TextSGC/data/text_dataset/{self.args.dataset}.txt"
+        target = np.array(pd.read_csv(target_fn,
+                                      sep="\t",
+                                      header=None)[2])
+        target2id = {label: indx for indx, label in enumerate(set(target))}
+        self.target = [target2id[label] for label in target]
+        self.nclass = len(target2id)
+
+        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+        # train val test split
+
+        self.train_lst, self.test_lst = get_train_test(target_fn)
+
+
+class TextSCGTrainer:
+    def __init__(self, args, model, pre_data):
+        self.args = args
+        self.model = model
+        self.device = args.device
+
+        self.max_epoch = self.args.max_epoch
+        self.set_seed()
+
+        self.dataset = args.dataset
+        self.predata = pre_data
+        self.earlystopping = EarlyStopping(args.early_stopping)
+
+    def set_seed(self):
+        th.manual_seed(self.args.seed)
+        np.random.seed(self.args.seed)
+
+    def fit(self):
+        self.prepare_data()
+        self.convert_tensor()
+
+        self.model = self.model(nfeat=self.nfeat_dim,
+                                nclass=self.nclass)
+        self.model = self.model.to(self.device)
+
+        self.optimizer = th.optim.Adam(self.model.parameters(), lr=self.args.lr)
+        self.criterion = th.nn.CrossEntropyLoss()
+
+        self.model_param = sum(param.numel() for param in self.model.parameters())
+        print('# model parameters:', self.model_param)
+
+        start = time()
+        self.train()
+        self.train_time = time() - start + self.pre_time
+
+    @classmethod
+    def set_description(cls, desc):
+        string = ""
+        for key, value in desc.items():
+            if isinstance(value, int):
+                string += f"{key}:{value} "
+            else:
+                string += f"{key}:{value:.4f} "
+        print(string)
+
+    def prepare_data(self):
+        self.adj = self.predata.adj
+        self.target = self.predata.target
+        self.nclass = self.predata.nclass
+
+        self.train_lst, self.val_lst = train_test_split(self.predata.train_lst,
+                                                        test_size=self.args.val_ratio,
+                                                        shuffle=True,
+                                                        random_state=self.args.seed)
+        self.test_lst = self.predata.test_lst
+
+    @th.no_grad()
+    def sgc_precompute(self, sp_adj, adj_dense, train_lst, val_lst, test_lst):
+        start = time()
+
+        # train
+        feats = adj_dense[:, train_lst].to(self.device)
+        feats = th.spmm(sp_adj, feats).t()
+
+        train_feats_max, _ = feats.max(dim=0, keepdim=True)
+        train_feats_min, _ = feats.min(dim=0, keepdim=True)
+
+        train_feats_range = train_feats_max - train_feats_min
+        useful_features_dim = train_feats_range.squeeze().gt(0).nonzero().squeeze()
+        feats = feats[:, useful_features_dim]
+        train_feats_range = train_feats_range[:, useful_features_dim]
+        train_feats_min = train_feats_min[:, useful_features_dim]
+        train_vec = ((feats - train_feats_min) / train_feats_range)
+
+        # val
+        feats = adj_dense[:, val_lst].to(self.device)
+        feats = th.spmm(sp_adj, feats).t()
+        feats = feats[:, useful_features_dim]
+        val_vec = ((feats - train_feats_min) / train_feats_range)
+
+        # test
+        feats = adj_dense[:, test_lst].to(self.device)
+        feats = th.spmm(sp_adj, feats).t()
+        feats = feats[:, useful_features_dim]
+        test_vec = ((feats - train_feats_min) / train_feats_range).cpu()
+
+        print(train_vec.size())
+        print(val_vec.size())
+        print(test_vec.size())
+        return train_vec, val_vec, test_vec, time() - start
+
+    def convert_tensor(self):
+        self.target = th.tensor(self.target).long().to(self.device)
+
+        self.train_lst = th.tensor(self.train_lst).long().to(self.device)
+        self.val_lst = th.tensor(self.val_lst).long().to(self.device)
+        self.test_lst = th.tensor(self.test_lst).long().to(self.device)
+
+        adj_dense = self.adj.to_dense().to(self.device)
+        self.adj = self.adj.to(self.device)
+        self.train_vec, self.val_vec, self.test_vec, self.pre_time = self.sgc_precompute(self.adj,
+                                                                                         adj_dense,
+                                                                                         self.train_lst,
+                                                                                         self.val_lst,
+                                                                                         self.test_lst)
+        self.nfeat_dim = self.train_vec.size(1)
+
+    def train(self):
+        for epoch in range(self.max_epoch):
+            self.model.train()
+            self.optimizer.zero_grad()
+
+            logits = self.model.forward(self.train_vec)
+            loss = self.criterion(logits,
+                                  self.target[self.train_lst])
+
+            loss.backward()
+            self.optimizer.step()
+
+            val_desc = self.val(self.val_vec, self.val_lst)
+
+            desc = dict(**{"epoch"     : epoch,
+                           "train_loss": loss.item(),
+                           }, **val_desc)
+
+            self.set_description(desc)
+
+            if self.earlystopping(val_desc["val_loss"]):
+                break
+
+    @th.no_grad()
+    def val(self, feats, ind, prefix="val"):
+        self.model.eval()
+        with th.no_grad():
+            logits = self.model.forward(feats)
+            loss = self.criterion(logits,
+                                  self.target[ind])
+            acc = accuracy(logits,
+                           self.target[ind])
+            f1, precision, recall = macro_f1(logits,
+                                             self.target[ind],
+                                             num_classes=self.nclass)
+
+            desc = {
+                f"{prefix}_loss": loss.item(),
+                "acc"           : acc,
+                "macro_f1"      : f1,
+                "precision"     : precision,
+                "recall"        : recall,
+            }
+        return desc
+
+    @th.no_grad()
+    def test(self):
+        test_vec = self.test_vec.to(self.device)
+        test_lst = th.tensor(self.test_lst).long().to(self.device)
+        test_desc = self.val(test_vec, test_lst, prefix="test")
+        test_desc["train_time"] = self.train_time
+        test_desc["model_param"] = self.model_param
+        return test_desc
+
+
+def main(dataset, times):
+    args = parameter_parser()
+    args.dataset = dataset
+
+    args.device = th.device('cuda') if th.cuda.is_available() else th.device('cpu')
+    args.nhid = 200
+    args.max_epoch = 200
+    args.dropout = 0.5
+    args.val_ratio = 0.1
+    args.early_stopping = 10
+    args.lr = 0.01
+    model = SGC
+
+    print(args)
+
+    predata = PrepareData(args)
+    cudause = CudaUse()
+
+    record = LogResult()
+    seed_lst = list()
+    for ind, seed in enumerate(return_seed(times)):
+        print(f"\n\n==> {ind}, seed:{seed}")
+        args.seed = seed
+        seed_lst.append(seed)
+
+        framework = TextSCGTrainer(model=model, args=args, pre_data=predata)
+        framework.fit()
+
+        if th.cuda.is_available():
+            gpu_mem = cudause.gpu_mem_get(_id=0)
+            record.log_single(key="gpu_mem", value=gpu_mem)
+
+        record.log(framework.test())
+
+        del framework
+        gc.collect()
+
+        if th.cuda.is_available():
+            th.cuda.empty_cache()
+
+    print("==> seed set:")
+    print(seed_lst)
+    record.show_str()
+
+
+if __name__ == '__main__':
+    # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+    # for d in ["mr", "ohsumed", "R52", "R8", "20ng"]:
+    #     main(d)
+    main("mr", 1)
+    # main("ohsumed")
+    # main("R8", 1)
+    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
diff --git a/utils.py b/utils.py
index 702680d..6a67ef5 100644
--- a/utils.py
+++ b/utils.py
@@ -1,238 +1,239 @@
-import argparse
-import random
-from collections import defaultdict
-
-import numpy as np
-import torch as th
-import scipy.sparse as sp
-
-
-def macro_f1(pred, targ, num_classes=None):
-    pred = th.max(pred, 1)[1]
-    tp_out = []
-    fp_out = []
-    fn_out = []
-    if num_classes is None:
-        num_classes = sorted(set(targ.cpu().numpy().tolist()))
-    else:
-        num_classes = range(num_classes)
-    for i in num_classes:
-        tp = ((pred == i) & (targ == i)).sum().item()  # 预测为i，且标签的确为i的
-        fp = ((pred == i) & (targ != i)).sum().item()  # 预测为i，但标签不是为i的
-        fn = ((pred != i) & (targ == i)).sum().item()  # 预测不是i，但标签是i的
-        tp_out.append(tp)
-        fp_out.append(fp)
-        fn_out.append(fn)
-
-    eval_tp = np.array(tp_out)
-    eval_fp = np.array(fp_out)
-    eval_fn = np.array(fn_out)
-
-    precision = eval_tp / (eval_tp + eval_fp)
-    precision[np.isnan(precision)] = 0
-    precision = np.mean(precision)
-
-    recall = eval_tp / (eval_tp + eval_fn)
-    recall[np.isnan(recall)] = 0
-    recall = np.mean(recall)
-
-    f1 = 2 * (precision * recall) / (precision + recall)
-    return f1, precision, recall
-
-
-def accuracy(pred, targ):
-    pred = th.max(pred, 1)[1]
-    acc = ((pred == targ).float()).sum().item() / targ.size()[0]
-
-    return acc
-
-
-class CudaUse(object):
-    def __init__(self):
-        self.cuda_available = th.cuda.is_available()
-        if self.cuda_available:
-            from fastai.utils.pynvml_gate import load_pynvml_env
-            self.pynvml = load_pynvml_env()
-
-    def get_cuda_id(self):
-        if self.cuda_available:
-            gpu_mem = sorted(self.gpu_mem_get_all(), key=lambda item: item.free, reverse=True)
-            low_use_id = gpu_mem[0].id
-            return th.device(f'cuda:{low_use_id}')
-        else:
-            return th.device('cpu')
-
-    def gpu_mem_get_all(self):
-        "get total, used and free memory (in MBs) for each available gpu"
-        return list(map(self.gpu_mem_get, range(self.pynvml.nvmlDeviceGetCount())))
-
-    def gpu_mem_get(self, _id=None):
-        """get total, used and free memory (in MBs) for gpu `id`. if `id` is not passed,
-        currently selected torch device is used"""
-        from collections import namedtuple
-        GPUMemory = namedtuple('GPUMemory', ['total', 'free', 'used', 'id'])
-
-        if _id is None:
-            _id = th.cuda.current_device()
-        try:
-            handle = self.pynvml.nvmlDeviceGetHandleByIndex(_id)
-            info = self.pynvml.nvmlDeviceGetMemoryInfo(handle)
-            # return GPUMemory(*(map(b2mb, [info.total, info.free, info.used])), id=_id)
-            return b2mb(info.used)
-        except:
-            return GPUMemory(0, 0, 0, -1)
-
-
-def read_file(path, mode='r', encoding=None):
-    if mode not in {"r", "rb"}:
-        raise ValueError("only read")
-    return open(path, mode=mode, encoding=encoding)
-
-
-def print_graph_detail(graph):
-    """
-    格式化显示Graph参数
-    :param graph:
-    :return:
-    """
-    import networkx as nx
-    dst = {"nodes"    : nx.number_of_nodes(graph),
-           "edges"    : nx.number_of_edges(graph),
-           "selfloops": nx.number_of_selfloops(graph),
-           "isolates" : nx.number_of_isolates(graph),
-           "覆盖度"      : 1 - nx.number_of_isolates(graph) / nx.number_of_nodes(graph), }
-    print_table(dst)
-
-
-def print_table(dst):
-    table_title = list(dst.keys())
-    from prettytable import PrettyTable
-    table = PrettyTable(field_names=table_title, header_style="title", header=True, border=True,
-                        hrules=1, padding_width=2, align="c")
-    table.float_format = "0.4"
-    table.add_row([dst[i] for i in table_title])
-    print(table)
-
-
-def return_seed(nums=10):
-    # seed = [47, 17, 1, 3, 87, 300, 77, 23, 13]
-    seed = random.sample(range(0, 100000), nums)
-    return seed
-
-
-def preprocess_adj(adj, is_sparse=False):
-    """Preprocessing of adjacency matrix for simple pygGCN model and conversion to
-    tuple representation."""
-    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
-    if is_sparse:
-        adj_normalized = sparse_mx_to_torch_sparse_tensor(adj_normalized)
-        return adj_normalized
-    else:
-        return th.from_numpy(adj_normalized.A).float()
-
-
-def sparse_mx_to_torch_sparse_tensor(sparse_mx):
-    """Convert a scipy sparse matrix to a torch sparse tensor."""
-    sparse_mx = sparse_mx.tocoo().astype(np.float32)
-    indices = th.from_numpy(
-            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
-    values = th.from_numpy(sparse_mx.data)
-    shape = th.Size(sparse_mx.shape)
-    return th.sparse.FloatTensor(indices, values, shape)
-
-
-def normalize_adj(adj):
-    """Symmetrically normalize adjacency matrix."""
-    adj = sp.coo_matrix(adj)
-    rowsum = np.array(adj.sum(1))
-    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
-    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
-    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
-    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
-
-
-class EarlyStopping:
-    """Early stops the training if validation loss doesn't improve after a given patience."""
-
-    def __init__(self, patience=7, verbose=False, delta=0):
-        """
-        Args:
-            patience (int): How long to wait after last time validation loss improved.
-                            Default: 7
-            verbose (bool): If True, prints a message for each validation loss improvement.
-                            Default: False
-            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
-                            Default: 0
-        """
-        self.patience = patience
-        self.verbose = verbose
-        self.counter = 0
-        self.best_score = None
-        self.early_stop = False
-        self.val_loss_min = np.Inf
-        self.delta = delta
-        self.model_path = "hdd_data/prepare_dataset/model/model.pt"
-
-    def __call__(self, val_loss, model=None):
-
-        score = -val_loss
-
-        if self.best_score is None:
-            self.best_score = score
-            # self.save_checkpoint(val_loss, model)
-        elif score < self.best_score + self.delta:
-            self.counter += 1
-            if self.verbose:
-                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
-            if self.counter >= self.patience:
-                self.early_stop = True
-                return True
-        else:
-            self.best_score = score
-            # self.save_checkpoint(val_loss, model)
-            self.counter = 0
-
-    def save_checkpoint(self, val_loss, model):
-        '''Saves model when validation loss decrease.'''
-        if self.verbose:
-            print(
-                    f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
-        th.save(model.state_dict(), self.model_path)
-        self.val_loss_min = val_loss
-
-    def load_model(self):
-        return th.load(self.model_path)
-
-
-def parameter_parser():
-    """
-    A method to parse up command line parameters. By default it trains on the PubMed yelp_dataset.
-    The default hyperparameters give a good quality representation without grid search.
-    """
-    parser = argparse.ArgumentParser(description="Run .")
-
-    return parser.parse_args()
-
-
-class LogResult:
-    def __init__(self):
-        self.result = defaultdict(list)
-        pass
-
-    def log(self, result: dict):
-        for key, value in result.items():
-            self.result[key].append(value)
-
-    def log_single(self, key, value):
-        self.result[key].append(value)
-
-    def show_str(self):
-        print()
-        string = ""
-        for key, value_lst in self.result.items():
-            value = np.mean(value_lst)
-            if isinstance(value, int):
-                string += f"{key}:\n{value}\n{max(value_lst)}\n{min(value_lst)}\n"
-            else:
-                string += f"{key}:\n{value:.4f}\n{max(value_lst):.4f}\n{min(value_lst):.4f} \n"
-        print(string)
+import argparse
+import random
+from collections import defaultdict
+
+import numpy as np
+import torch as th
+import scipy.sparse as sp
+
+
+def macro_f1(pred, targ, num_classes=None):
+    pred = th.max(pred, 1)[1]
+    tp_out = []
+    fp_out = []
+    fn_out = []
+    if num_classes is None:
+        num_classes = sorted(set(targ.cpu().numpy().tolist()))
+    else:
+        num_classes = range(num_classes)
+    for i in num_classes:
+        tp = ((pred == i) & (targ == i)).sum().item()  # 预测为i，且标签的确为i的
+        fp = ((pred == i) & (targ != i)).sum().item()  # 预测为i，但标签不是为i的
+        fn = ((pred != i) & (targ == i)).sum().item()  # 预测不是i，但标签是i的
+        tp_out.append(tp)
+        fp_out.append(fp)
+        fn_out.append(fn)
+
+    eval_tp = np.array(tp_out)
+    eval_fp = np.array(fp_out)
+    eval_fn = np.array(fn_out)
+
+    precision = eval_tp / (eval_tp + eval_fp)
+    precision[np.isnan(precision)] = 0
+    precision = np.mean(precision)
+
+    recall = eval_tp / (eval_tp + eval_fn)
+    recall[np.isnan(recall)] = 0
+    recall = np.mean(recall)
+
+    f1 = 2 * (precision * recall) / (precision + recall)
+    return f1, precision, recall
+
+
+def accuracy(pred, targ):
+    pred = th.max(pred, 1)[1]
+    acc = ((pred == targ).float()).sum().item() / targ.size()[0]
+
+    return acc
+
+
+class CudaUse(object):
+    def __init__(self):
+        self.cuda_available = th.cuda.is_available()
+        if self.cuda_available:
+            from fastai.utils.pynvml_gate import load_pynvml_env
+            self.pynvml = load_pynvml_env()
+
+    def get_cuda_id(self):
+        if self.cuda_available:
+            gpu_mem = sorted(self.gpu_mem_get_all(), key=lambda item: item.free, reverse=True)
+            low_use_id = gpu_mem[0].id
+            return th.device(f'cuda:{low_use_id}')
+        else:
+            return th.device('cpu')
+
+    def gpu_mem_get_all(self):
+        "get total, used and free memory (in MBs) for each available gpu"
+        return list(map(self.gpu_mem_get, range(self.pynvml.nvmlDeviceGetCount())))
+
+    def gpu_mem_get(self, _id=None):
+        """get total, used and free memory (in MBs) for gpu `id`. if `id` is not passed,
+        currently selected torch device is used"""
+        from collections import namedtuple
+        GPUMemory = namedtuple('GPUMemory', ['total', 'free', 'used', 'id'])
+
+        if _id is None:
+            _id = th.cuda.current_device()
+        try:
+            handle = self.pynvml.nvmlDeviceGetHandleByIndex(_id)
+            info = self.pynvml.nvmlDeviceGetMemoryInfo(handle)
+            # return GPUMemory(*(map(b2mb, [info.total, info.free, info.used])), id=_id)
+            return b2mb(info.used)
+        except:
+            return GPUMemory(0, 0, 0, -1)
+
+
+def read_file(path, mode='r', encoding=None):
+    if mode not in {"r", "rb"}:
+        raise ValueError("only read")
+    return open(path, mode=mode, encoding=encoding)
+
+
+def print_graph_detail(graph):
+    """
+    格式化显示Graph参数
+    :param graph:
+    :return:
+    """
+    import networkx as nx
+    dst = {"nodes"    : nx.number_of_nodes(graph),
+           "edges"    : nx.number_of_edges(graph),
+           "selfloops": nx.number_of_selfloops(graph),
+           "isolates" : nx.number_of_isolates(graph),
+           "覆盖度"      : 1 - nx.number_of_isolates(graph) / nx.number_of_nodes(graph), }
+    print_table(dst)
+
+
+def print_table(dst):
+    table_title = list(dst.keys())
+    from prettytable import PrettyTable
+    table = PrettyTable(field_names=table_title, header_style="title", header=True, border=True,
+                        hrules=1, padding_width=2, align="c")
+    table.float_format = "0.4"
+    table.add_row([dst[i] for i in table_title])
+    print(table)
+
+
+def return_seed(nums=10):
+    # seed = [47, 17, 1, 3, 87, 300, 77, 23, 13]
+    seed = random.sample(range(0, 100000), nums)
+    return seed
+
+
+def preprocess_adj(adj, is_sparse=False):
+    """Preprocessing of adjacency matrix for simple pygGCN model and conversion to
+    tuple representation."""
+    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
+    if is_sparse:
+        adj_normalized = sparse_mx_to_torch_sparse_tensor(adj_normalized)
+        return adj_normalized
+    else:
+        return th.from_numpy(adj_normalized.A).float()
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo().astype(np.float32)
+    indices = th.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
+    values = th.from_numpy(sparse_mx.data)
+    shape = th.Size(sparse_mx.shape)
+    return th.sparse.FloatTensor(indices, values, shape)
+
+
+def normalize_adj(adj):
+    """Symmetrically normalize adjacency matrix."""
+    adj = sp.coo_matrix(adj)
+    rowsum = np.array(adj.sum(1))
+    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
+    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
+    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
+    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
+
+
+class EarlyStopping:
+    """Early stops the training if validation loss doesn't improve after a given patience."""
+
+    def __init__(self, patience=7, verbose=False, delta=0):
+        """
+        Args:
+            patience (int): How long to wait after last time validation loss improved.
+                            Default: 7
+            verbose (bool): If True, prints a message for each validation loss improvement.
+                            Default: False
+            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
+                            Default: 0
+        """
+        self.patience = patience
+        self.verbose = verbose
+        self.counter = 0
+        self.best_score = None
+        self.early_stop = False
+        self.val_loss_min = np.Inf
+        self.delta = delta
+        self.model_path = "hdd_data/prepare_dataset/model/model.pt"
+
+    def __call__(self, val_loss, model=None):
+
+        score = -val_loss
+
+        if self.best_score is None:
+            self.best_score = score
+            # self.save_checkpoint(val_loss, model)
+        elif score < self.best_score + self.delta:
+            self.counter += 1
+            if self.verbose:
+                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
+            if self.counter >= self.patience:
+                self.early_stop = True
+                return True
+        else:
+            self.best_score = score
+            # self.save_checkpoint(val_loss, model)
+            self.counter = 0
+
+    def save_checkpoint(self, val_loss, model):
+        '''Saves model when validation loss decrease.'''
+        if self.verbose:
+            print(
+                    f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
+        th.save(model.state_dict(), self.model_path)
+        self.val_loss_min = val_loss
+
+    def load_model(self):
+        return th.load(self.model_path)
+
+
+def parameter_parser():
+    """
+    A method to parse up command line parameters. By default it trains on the PubMed yelp_dataset.
+    The default hyperparameters give a good quality representation without grid search.
+    """
+    parser = argparse.ArgumentParser(description="Run .")
+
+    return parser.parse_args()
+
+
+class LogResult:
+    def __init__(self):
+        self.result = defaultdict(list)
+        pass
+
+    def log(self, result: dict):
+        for key, value in result.items():
+            self.result[key].append(value)
+
+    def log_single(self, key, value):
+        self.result[key].append(value)
+
+    def show_str(self):
+        print()
+        string = ""
+        for key, value_lst in self.result.items():
+            value = np.mean(value_lst)
+            if isinstance(value, int):
+                string += f"{key}:\n{value}\n{max(value_lst)}\n{min(value_lst)}\n"
+            else:
+                # string +=f"{key}:\n{value:.4f}\n{max(value_lst):.4f}\n{min(value_lst):.4f} \n"
+                print(key," ",":",value,)
+        print(string)

From a249870494b140d339874016454e7216cddf3252 Mon Sep 17 00:00:00 2001
From: 2314 <131743576+gxy2314@users.noreply.github.com>
Date: Thu, 26 Dec 2024 14:40:27 +0800
Subject: [PATCH 2/2] resolve training issues with mistakes

- Fixed `ModuleNotFoundError: No module named 'fastai.utils'` during training.
- Resolved `RuntimeError: indices should be either on CPU or on the same device as the indexed tensor`.
- Addressed output errors during result display.