From 902f7ec5ce0be53e6d93dd540e5edfcfbad117e7 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:01:27 -0300 Subject: [PATCH 01/90] move config to config.py and use it to simplify get_user_ids.py Signed-off-by: Niv Sardi --- config.py | 7 +++++++ get_user_ids.py | 14 +++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 config.py diff --git a/config.py b/config.py new file mode 100644 index 0000000..ec2c666 --- /dev/null +++ b/config.py @@ -0,0 +1,7 @@ +from random import randint +import json + +def get_config(filename): + with open(filename) as data: + d = json.load(data) + return d[randint(0, d.__len__() - 1)] diff --git a/get_user_ids.py b/get_user_ids.py index 0845fd7..c40f398 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -3,17 +3,17 @@ import csv import sys +from config import get_config -consumer_key = "" -consumer_secret = "" -access_token = "" -access_token_secret = "" +config = "./config.json" -auth = tweepy.OAuthHandler(consumer_key, consumer_secret) -auth.set_access_token(access_token, access_token_secret) -api = tweepy.API(auth) +authdata = get_config(config) + +auth = tweepy.OAuthHandler(authdata['consumer_key'], authdata['consumer_secret']) +auth.set_access_token(authdata['access_token'], authdata['access_token_secret']) +api = tweepy.API(auth) def get_user_ids(): handles = [] From 9073397d7257b44ad6b91dcc3180d3d7a05c5c4f Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:02:56 -0300 Subject: [PATCH 02/90] use config.py in streaming.py and use getopt to pass args --- streaming.py | 53 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/streaming.py b/streaming.py index 2b5477c..7d5de11 100644 --- a/streaming.py +++ b/streaming.py @@ -1,14 +1,48 @@ +import sys +import re +from getopt import getopt, GetoptError + from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream from save_to_db import save_to_db +from config import get_config + +r = re.compile("\s+") +db = "mysql" +ids = [] +track = [] +argv = sys.argv +config_file = 0 + +if ((sys.argv[0]).find(".py") != -1): + argv = sys.argv[1:] -# Authentication details. -consumer_key = "" -consumer_secret = "" -access_token = "" -access_token_secret = "" +def usage(): + print "usage: [-c|--config config.json] [-f|--file id.csv] [-i|--ids id1,id2,id3...] [-t|--track hash1,hash2,hash3...] [-d|--db]" +try: + opts, args = getopt(argv, "c:f:i:t:D:", ["config", "file", "ids", "track"]) +except GetoptError: + usage() + sys.exit(2) +for opt, arg in opts: + if opt in ("-c", "--config"): + config_file = arg + if opt in ("-D", "--db"): + db = arg + if opt in ("-i", "--ids"): + ids += r.sub(",", arg).split(',') + if opt in ("-t", "--track"): + track += r.sub(",", arg).split(',') + + if opt in ("-f", "--file"): + with open(arg) as f: + for row in f: + ids.append(row) + +config = get_config(config_file) +ids += args # This is the listener, resposible for receiving data class StdOutListener(StreamListener): @@ -24,13 +58,8 @@ def on_error(self, status): if __name__ == "__main__": l = StdOutListener() - auth = OAuthHandler(consumer_key, consumer_secret) - auth.set_access_token(access_token, access_token_secret) - - ids = [] - with open("ids.csv") as f: - for row in f: - ids.append(row) + auth = OAuthHandler(config['consumer_key'], config['consumer_secret']) + auth.set_access_token(config['access_token'], config['access_token_secret']) stream = Stream(auth, l) stream.filter(ids) From a564e321937d955adf36ad851bd7e0de74cb4849 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:06:12 -0300 Subject: [PATCH 03/90] rename save_to_db to db_mysql and port functions from get_user_ids Signed-off-by: Niv Sardi --- save_to_db.py => db_mysql.py | 39 ++++++++++++++++++++++++++++++ screenshot.py | 46 ++++-------------------------------- streaming.py | 6 ++++- 3 files changed, 48 insertions(+), 43 deletions(-) rename save_to_db.py => db_mysql.py (50%) diff --git a/save_to_db.py b/db_mysql.py similarity index 50% rename from save_to_db.py rename to db_mysql.py index 3b61dce..56dbb42 100644 --- a/save_to_db.py +++ b/db_mysql.py @@ -8,6 +8,45 @@ db="", charset="utf8") +def writeSuccess(path): + cur = db.cursor() + try: + cur.execute("""UPDATE Tweets \ + SET Screenshot=1 \ + WHERE Tweet_Id=%s""", [path]) + db.commit() + print "Screenshot OK. Tweet id ", path + except MySQLdb.Error, e: + try: + print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) + except IndexError: + print "MySQL Error: %s" % str(e) + + print "Error", e.args[0], e.args[1] + print "Warning:", path, "not saved to database" + return True + +def markDeleted(path): + cur = db.cursor() + try: + cur.execute("""UPDATE Tweets \ + SET Deleted=1 \ + WHERE Tweet_Id=%s""", [path]) + db.commit() + print "Tweet marked as deleted ", path + except MySQLdb.Error, e: + try: + print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) + except IndexError: + print "MySQL Error: %s" % str(e) + + print "Error", e.args[0], e.args[1] + print "Warning:", path, "not saved to database" + return True + +def getLogs(): + cur = db.cursor() + return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") def save_to_db(author, text, url, id_str): cur = db.cursor() diff --git a/screenshot.py b/screenshot.py index 9c8b088..3e8158d 100644 --- a/screenshot.py +++ b/screenshot.py @@ -1,14 +1,9 @@ # Run me with 'nosetests screenshot.py --with-save-baseline --nocapture' -import MySQLdb from needle.cases import NeedleTestCase from needle.driver import NeedlePhantomJS -db = MySQLdb.connect(host="", - user="", - passwd="", - db="") - +from db_mysql import db class captureTweetScreenshots(NeedleTestCase): @@ -20,47 +15,14 @@ def test_masthead(self): self.list_to_screenshot() def writeSuccess(self, path): - - cur = db.cursor() - try: - cur.execute("""UPDATE Tweets \ - SET Screenshot=1 \ - WHERE Tweet_Id=%s""", [path]) - db.commit() - print "Screenshot OK. Tweet id ", path - except MySQLdb.Error, e: - try: - print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) - except IndexError: - print "MySQL Error: %s" % str(e) - - print "Error", e.args[0], e.args[1] - print "Warning:", path, "not saved to database" - return True + return db.writeSuccess(path) def markDeleted(self, path): - - cur = db.cursor() - try: - cur.execute("""UPDATE Tweets \ - SET Deleted=1 \ - WHERE Tweet_Id=%s""", [path]) - db.commit() - print "Tweet marked as deleted ", path - except MySQLdb.Error, e: - try: - print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) - except IndexError: - print "MySQL Error: %s" % str(e) - - print "Error", e.args[0], e.args[1] - print "Warning:", path, "not saved to database" - return True + return db.markDeleted(path) def list_to_screenshot(self): logFile = open('logfile.txt', 'w') - cur = db.cursor() - cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") + cur = db.getLogs() for (Url, Tweet_Id) in cur: try: self.driver.get(Url) diff --git a/streaming.py b/streaming.py index 7d5de11..ca6a4e3 100644 --- a/streaming.py +++ b/streaming.py @@ -5,7 +5,6 @@ from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream -from save_to_db import save_to_db from config import get_config r = re.compile("\s+") @@ -44,6 +43,11 @@ def usage(): config = get_config(config_file) ids += args +if db == "mysql": + from db_mysql import save_to_db +else: + print "ERROR could not find db driver for ", db + sys.exit(-2) # This is the listener, resposible for receiving data class StdOutListener(StreamListener): From 88067bf3ae4066593a2c2e51ac5271f716f0501b Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:06:56 -0300 Subject: [PATCH 04/90] make get_user_ids use getopts allow to pass users from file or from command line --- get_user_ids.py | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/get_user_ids.py b/get_user_ids.py index c40f398..2ae1f07 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -1,3 +1,4 @@ +from getopt import getopt, GetoptError import tweepy import time import csv @@ -7,8 +8,31 @@ config = "./config.json" +if ((sys.argv[0]).find(".py") != -1): + argv = sys.argv[1:] + +def usage(): + print "usage: [-c|--config config.json] [-f|--file users.csv] [user1 [user2 [user3] ...]]" + +try: + opts, args = getopt(argv, "c:f:", ["config", "file"]) +except GetoptError: + usage() + sys.exit(2) +for opt, arg in opts: + if opt in ("-c", "--config"): + config = arg + if opt in ("-f", "--file"): + with open(arg, "rb") as csvfile: + reader = csv.reader(csvfile, delimiter=',', quotechar='|') + for row in reader: + for elem in row: + users.extend(elem.strip().split(',')) + +users += args authdata = get_config(config) +print "looking for", users auth = tweepy.OAuthHandler(authdata['consumer_key'], authdata['consumer_secret']) auth.set_access_token(authdata['access_token'], authdata['access_token_secret']) @@ -17,20 +41,15 @@ def get_user_ids(): handles = [] - with open("list.csv", "rb") as csvfile: - reader = csv.reader(csvfile, delimiter=',', quotechar='|') - for row in reader: - for elem in row: - handles.extend(elem.strip().split(',')) - - for handle in handles: - try: - u = api.get_user(handle[1:-1]) - time.sleep(6) - print u._json['id'] - sys.stderr.write(str(u._json['id']) + "\n") - except Exception, e: - print e - + for screen_name in users: + try: + u = api.get_user(screen_name) + print screen_name, u._json['id'] + handles.append(str(u._json['id'])) + except Exception, e: + print 'ERROR', e, authdata + + sys.stderr.write(' '.join(handles) + "\n") + return handles if __name__ == '__main__': get_user_ids() From 6c5f48435402c0326a4659f5c8fa6ccb210bd32c Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:07:45 -0300 Subject: [PATCH 05/90] add sqlite driver Signed-off-by: Niv Sardi --- db_sqlite.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++ streaming.py | 2 ++ 2 files changed, 59 insertions(+) create mode 100644 db_sqlite.py diff --git a/db_sqlite.py b/db_sqlite.py new file mode 100644 index 0000000..0b31f6c --- /dev/null +++ b/db_sqlite.py @@ -0,0 +1,57 @@ +import sqlite3 + +db = sqlite3.connect('twitter.db') + +def writeSuccess(path): + cur = db.cursor() + try: + cur.execute("""UPDATE Tweets \ + SET Screenshot=1 \ + WHERE Tweet_Id='%s'""" % [path]) + db.commit() + print "Screenshot OK. Tweet id ", path + except sqlite3.Error, e: + print "Error", e + print "Warning:", path, "not saved to database" + return True + +def markDeleted(path): + cur = db.cursor() + try: + cur.execute("""UPDATE Tweets \ + SET Deleted=1 \ + WHERE Tweet_Id='%s'""" % [path]) + db.commit() + print "Tweet marked as deleted ", path + except sqlite3.Error, e: + print "Error", e + print "Warning:", path, "not saved to database" + return True + +def getLogs(): + cur = db.cursor() + return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") + +def save_to_db(author, text, url, id_str): + cur = db.cursor() + + cur.execute("CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ + Author VARCHAR(255), \ + Text VARCHAR(255), \ + Url VARCHAR(255), \ + Tweet_Id VARCHAR(255), \ + Screenshot INTEGER, \ + Deleted INTEGER)") + + try: + cur.execute("""INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) + VALUES ('%s', '%s', '%s', '%s', '%s', '%s')""" % (author, text, url, id_str, 0, 0)) + db.commit() + #print "Wrote to database:", author, id_str + except sqlite3.Error, e: + print "Error", e + db.rollback() + print "ERROR writing database" + +if __name__ == '__main__': + save_to_db('xaiki', 'blah blah', 'url', 'id') diff --git a/streaming.py b/streaming.py index ca6a4e3..ed83b23 100644 --- a/streaming.py +++ b/streaming.py @@ -45,6 +45,8 @@ def usage(): if db == "mysql": from db_mysql import save_to_db +elif db == "sqlite": + from db_sqlite import save_to_db else: print "ERROR could not find db driver for ", db sys.exit(-2) From beb3b6678a19de12e2c7468545d3bb96fdff2b55 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:08:56 -0300 Subject: [PATCH 06/90] add elasticsearch driver remove dead code from db_mysql Signed-off-by: Niv Sardi --- db_elasticsearch.py | 13 +++++++++++++ db_mysql.py | 9 --------- streaming.py | 2 ++ 3 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 db_elasticsearch.py diff --git a/db_elasticsearch.py b/db_elasticsearch.py new file mode 100644 index 0000000..e049a61 --- /dev/null +++ b/db_elasticsearch.py @@ -0,0 +1,13 @@ +import elasticsearch + +es = elasticsearch.Elasticsearch() + +def save_to_db(author, text, url, id_str): + es.index(index="tweets", doc_type="tweet", id=id, body={ + "Author": author, + "Text": text.encode('utf-8'), + "Url": url, + "Tweet_Id": id_str + }) + + diff --git a/db_mysql.py b/db_mysql.py index 56dbb42..e834743 100644 --- a/db_mysql.py +++ b/db_mysql.py @@ -1,5 +1,4 @@ import MySQLdb -# import elasticsearch db = MySQLdb.connect(host="", @@ -69,11 +68,3 @@ def save_to_db(author, text, url, id_str): print "Error", e.args[0], e.args[1] db.rollback() print "ERROR writing database" - - # es = elasticsearch.Elasticsearch() - # es.index(index="tweets", doc_type="tweet", id=id, body={ - # "Author": author, - # "Text": text.encode('utf-8'), - # "Url": url, - # "Tweet_Id": id_str - # }) diff --git a/streaming.py b/streaming.py index ed83b23..d3a4d31 100644 --- a/streaming.py +++ b/streaming.py @@ -47,6 +47,8 @@ def usage(): from db_mysql import save_to_db elif db == "sqlite": from db_sqlite import save_to_db +elif db == "elasticsearch": + from db_elasticsearch import save_to_db else: print "ERROR could not find db driver for ", db sys.exit(-2) From 55c033238c94fa10be61a2ea89acc2889a1b8912 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:09:35 -0300 Subject: [PATCH 07/90] Allow following users and hashtags (track) --- streaming.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming.py b/streaming.py index d3a4d31..ff8ed3f 100644 --- a/streaming.py +++ b/streaming.py @@ -70,4 +70,4 @@ def on_error(self, status): auth.set_access_token(config['access_token'], config['access_token_secret']) stream = Stream(auth, l) - stream.filter(ids) + stream.filter(follow=ids, track=track) From ee9e8f08a0496121fa7ab32b8bc0a10a04868995 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:10:18 -0300 Subject: [PATCH 08/90] add requirements.txt Signed-off-by: Niv Sardi --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2475d22 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +tweepy +needle From e8eb572f78f315f63d7b7d846fd66c7a698e4d05 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 10 Aug 2018 21:10:30 -0300 Subject: [PATCH 09/90] documentation update in README.md Signed-off-by: Niv Sardi --- README.md | 50 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d70e4ea..6949990 100644 --- a/README.md +++ b/README.md @@ -2,22 +2,56 @@ This is a collection of tools to monitor deleted tweets, automate screenshoting, and archiving. -* `streaming.py` and `save_to_db.py` work together to grab a real-time streamed timeline from Twitter and save all the results in a database. +* `streaming.py` and `db_{mysql,sqlite}.py` work together to grab a real-time streamed timeline from Twitter and save all the results in a database. * All the tweets in the database are then screenshot by `screenshot.py` * Finally, the `monitoring.py` worker crawls through the database and checks if the tweets have been deleted. * I included `get_user_ids.py`, as the Twitter API often requires the ID, and not the screen name (eg not "@basilesimon"). ## Dependencies and install * `git clone` this repo -* `wget https://raw.githubusercontent.com/pypa/pip/master/contrib/get-pip.py` then `sudo python get-pip.py` -* `pip install tweepy` -* `pip install MySQL-python` (but you might need to `apt-get install build-essential python-dev libmysqlclient-dev`. I read it's easy to install on Max OS, with Homebrew) -* `pip install needle` -* `apt-get install mysql-server nodejs-legacy nodejs npm` +* `wget https://raw.githubusercontent.com/pypa/pip/master/contrib/get-pip.py` +then `sudo python get-pip.py` +* pip install -r requirements.txt + +# Configuration +you should put your credentials in a json file, we'll pick up a random +entry, the default file is `config.json` but you can change that with command +line arguments + +it should look like this: +```json +[ + { + "consumer_key" : "XXXXXXXXXXXXXXXXXXXXXXXXX", + "consumer_secret" : "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", + "access_token": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", + "access_token_secret" : "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + }, + { + "consumer_key" : "YYYYYYYYYYYYYYYYYYYYYYYYY", + "consumer_secret" : "YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY", + "access_token": "YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY", + "access_token_secret" : "YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY" + }, + { + "consumer_key" : "ZZZZZZZZZZZZZZZZZZZZZZZZZ", + "consumer_secret" : "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ", + "access_token": "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ", + "access_token_secret" : "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ" + } +] +``` + + +# for Mysql +* `pip install MySQL-python` (but you might need to `apt-get install + build-essential python-dev libmysqlclient-dev`. I read it's easy to install + on Max OS, with Homebrew) +* `apt-get install mysql-server + +* apt-get install nodejs-legacy nodejs npm` * `sudo apt-get install build-essential chrpath git-core libssl-dev libfontconfig1-dev libxft-dev` * `sudo npm -g install phantomjs` -* You will need a comma-separated list of user IDs, or a list of keywords you want to track. See all the other options in [the Docs](https://dev.twitter.com/streaming/reference/post/statuses/filter). -* Obviously, you will also need your developer access keys and things. Pop them in the placeholders accordingly in each file. ### Comma-separated list of user IDs I use the wonderful [t from sferik](https://github.com/sferik/t), a command line tool for twitter shenanigans. From 12ba35bb6578c76ec4b314f6099faffbf32b2eca Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 17 Aug 2018 13:10:55 -0300 Subject: [PATCH 10/90] move db files to their own directory Signed-off-by: Niv Sardi --- DB/__init__.py | 0 DB/generic.py | 13 ++++++ DB/mysql.py | 81 ++++++++++++++++++++++++++++++++++ DB/pynx.py | 103 ++++++++++++++++++++++++++++++++++++++++++++ DB/sqlite.py | 70 ++++++++++++++++++++++++++++++ db_elasticsearch.py | 13 ------ db_mysql.py | 70 ------------------------------ db_sqlite.py | 57 ------------------------ 8 files changed, 267 insertions(+), 140 deletions(-) create mode 100644 DB/__init__.py create mode 100644 DB/generic.py create mode 100644 DB/mysql.py create mode 100644 DB/pynx.py create mode 100644 DB/sqlite.py delete mode 100644 db_elasticsearch.py delete mode 100644 db_mysql.py delete mode 100644 db_sqlite.py diff --git a/DB/__init__.py b/DB/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/DB/generic.py b/DB/generic.py new file mode 100644 index 0000000..a9e513d --- /dev/null +++ b/DB/generic.py @@ -0,0 +1,13 @@ +class DB: + def __init__(self): + self.name = "Generic DB Driver" + def getTweets(self): + print "NOT IMPLEMENTED" + def writeSuccess(self, path): + print "NOT IMPLEMENTED" + def markDeleted(self, path): + print "NOT IMPLEMENTED" + def getLogs(self): + print "NOT IMPLEMENTED" + def save(self, url, status): + print "NOT IMPLEMENTED" diff --git a/DB/mysql.py b/DB/mysql.py new file mode 100644 index 0000000..c1d8514 --- /dev/null +++ b/DB/mysql.py @@ -0,0 +1,81 @@ +import MySQLdb +import generic + +class MySQLDriver(generic.DB): + def __init__(self): + super(self) + self.name = "MySQL DB Driver" + self.db = MySQLdb.connect(host="", + user="", + passwd="", + db="", + charset="utf8") + + def getTweets(): + self.db.cursor() + return cur.execute("""SELECT * \ + FROM Tweets \ + WHERE Deleted=0""") + + def writeSuccess(path): + cur = self.db.cursor() + try: + cur.execute("""UPDATE Tweets \ + SET Screenshot=1 \ + WHERE Tweet_Id=%s""", [path]) + self.db.commit() + print "Screenshot OK. Tweet id ", path + except MySQLdb.Error, e: + try: + print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) + except IndexError: + print "MySQL Error: %s" % str(e) + + print "Error", e.args[0], e.args[1] + print "Warning:", path, "not saved to database" + return True + + def markDeleted(path): + cur = self.db.cursor() + try: + cur.execute("""UPDATE Tweets \ + SET Deleted=1 \ + WHERE Tweet_Id=%s""", [path]) + self.db.commit() + print "Tweet marked as deleted ", path + except MySQLdb.Error, e: + try: + print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) + except IndexError: + print "MySQL Error: %s" % str(e) + + print "Error", e.args[0], e.args[1] + print "Warning:", path, "not saved to database" + return True + + def getLogs(): + cur = self.db.cursor() + return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") + + def save(url, status): + (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) + cur = db.cursor() + + cur.execute("CREATE TABLE IF NOT EXISTS Tweets(Id INT PRIMARY KEY AUTO_INCREMENT, \ + Author VARCHAR(255), \ + Text VARCHAR(255), \ + Url VARCHAR(255), \ + Tweet_Id VARCHAR(255), \ + Screenshot INT, \ + Deleted INT)") + + try: + cur.execute("""INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) + VALUES (%s, %s, %s, %s, %s, %s)""", + (author, text, url, id_str, 0, 0)) + self.db.commit() + print "Wrote to database:", author, id_str + except MySQLdb.Error, e: + print "Error", e.args[0], e.args[1] + self.db.rollback() + print "ERROR writing database" diff --git a/DB/pynx.py b/DB/pynx.py new file mode 100644 index 0000000..9fb71e7 --- /dev/null +++ b/DB/pynx.py @@ -0,0 +1,103 @@ +import networkx as nx +import unicodedata +import json +import re + +from . import generic + +hashre = re.compile(r'(#\w+)') +userre = re.compile(r'(@\w+)') + +def normalize(input_str): + return unicodedata.normalize('NFKD', input_str).encode('ASCII', 'ignore').lower() + +def add_node(G, node, attr = {}): + try: + G[node]['weight'] += 1 + except KeyError: + G.add_node(node, weight = 1) + +def add_edge(G, n, p): + try: + G.edges[n, p]['weight'] += 1 + except KeyError: + G.add_edge(n, p, weight = 1) + +def add_tags(G, text): + tags = hashre.findall(text) + while len(tags) > 1: + t = normalize(tags.pop()) + add_node(G, t) + for u in tags: + u = normalize(u) + add_node(G, u) + add_edge(G, t, u) + return G + +def add_users(G, text, status): + users = set(userre.findall(text)) + if status.in_reply_to_screen_name: users.add("@%s" % status.in_reply_to_screen_name) + try: + users.append("@%s" % status.retweeted_status.user.screen_name) + except AttributeError: + pass + u = "@%s" % status.user.screen_name + add_node(G, u) + for v in users: + add_edge(G, u, v) + +class Driver(generic.DB): + def __init__(self, filename = "graph.gexf"): + generic.DB.__init__(self) + + self.name = "NetworkX DB Driver" + self.filename = filename + self. type = filename.split('.')[-1] or 'gexf' + self._user_graph = 'user-%s' % filename + self._hash_graph = 'hash-%s' % filename + self._write = getattr(nx, 'write_%s' % self.type) + self._read = getattr(nx, 'read_%s' % self.type) + + self.G = self._open_graph(self._user_graph) + self.H = self._open_graph(self._hash_graph) + print 'graphs opened', self.G.nodes(), self.H.nodes() + + def _open_graph(self, filename): + try: + return self._read(filename) + except IOError: + return nx.Graph() + + def getTweets(self): + return [n for n in self.G.nodes()] + + def markDeleted(self, id): + self.G.nodes[id]['deleted'] = True + + def writeSuccess(self, path): + print "NOT IMPLEMENTED" + + def getLogs(self): + print "NOT IMPLEMENTED" + + def _write_all(self): + self._write(self.H, self._hash_graph) + self._write(self.G, self._user_graph) + + def close(self): + self._write_all() + + def save(self, url, status): + try: + text = status.extended_tweet.text + except AttributeError: + text = status.text + + add_tags(self.H, text) + add_users(self.G, text, status) + print 'H', self.H.nodes() + self._write_all() + +if __name__ == '__main__': + G = nx.Graph() + add_users(G, 'RT @test blah blah #gnu @other', {}) diff --git a/DB/sqlite.py b/DB/sqlite.py new file mode 100644 index 0000000..4aaf50c --- /dev/null +++ b/DB/sqlite.py @@ -0,0 +1,70 @@ +import sqlite3 +import generic + +class SQLiteDriver(generic.DB): + def __init__(self, filename = 'twitter.db'): + super(self) + self.db = sqlite3.connect(filename) + + + def getTweets(self): + self.db.cursor() + return cur.execute("""SELECT * \ + FROM Tweets \ + WHERE Deleted=0""") + + def _commit(self, query): + cur = self.db.cursor() + try: + cur.execute(query) + self.db.commit() + except sqlite3.Error, e: + print "Error", e + return False + return True + + def writeSuccess(self, path): + if (self._commit("""UPDATE Tweets \ + SET Screenshot=1 \ + WHERE Tweet_Id='%s'""")): + print "Screenshot OK. Tweet id ", path + return True + print "Warning:", path, "not saved to database" + return False + + def markDeleted(self, path): + if (self._commit("""UPDATE Tweets \ + SET Deleted=1 \ + WHERE Tweet_Id='%s'""" % [path])): + print "Tweet marked as deleted ", path + return True + print "Warning:", path, "not saved to database" + return False + + def getLogs(self, ): + cur = db.cursor() + return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") + + def save(self, url, status): + (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) + cur = db.cursor() + + cur.execute("CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ + Author VARCHAR(255), \ + Text VARCHAR(255), \ + Url VARCHAR(255), \ + Tweet_Id VARCHAR(255), \ + Screenshot INTEGER, \ + Deleted INTEGER)") + + try: + cur.execute(""" + INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) + VALUES ('%s', '%s', '%s', '%s', '%s', '%s') + """ % (author, text, url, id_str, 0, 0)) + db.commit() + #print "Wrote to database:", author, id_str + except sqlite3.Error, e: + print "Error", e + db.rollback() + print "ERROR writing database" diff --git a/db_elasticsearch.py b/db_elasticsearch.py deleted file mode 100644 index e049a61..0000000 --- a/db_elasticsearch.py +++ /dev/null @@ -1,13 +0,0 @@ -import elasticsearch - -es = elasticsearch.Elasticsearch() - -def save_to_db(author, text, url, id_str): - es.index(index="tweets", doc_type="tweet", id=id, body={ - "Author": author, - "Text": text.encode('utf-8'), - "Url": url, - "Tweet_Id": id_str - }) - - diff --git a/db_mysql.py b/db_mysql.py deleted file mode 100644 index e834743..0000000 --- a/db_mysql.py +++ /dev/null @@ -1,70 +0,0 @@ -import MySQLdb - - -db = MySQLdb.connect(host="", - user="", - passwd="", - db="", - charset="utf8") - -def writeSuccess(path): - cur = db.cursor() - try: - cur.execute("""UPDATE Tweets \ - SET Screenshot=1 \ - WHERE Tweet_Id=%s""", [path]) - db.commit() - print "Screenshot OK. Tweet id ", path - except MySQLdb.Error, e: - try: - print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) - except IndexError: - print "MySQL Error: %s" % str(e) - - print "Error", e.args[0], e.args[1] - print "Warning:", path, "not saved to database" - return True - -def markDeleted(path): - cur = db.cursor() - try: - cur.execute("""UPDATE Tweets \ - SET Deleted=1 \ - WHERE Tweet_Id=%s""", [path]) - db.commit() - print "Tweet marked as deleted ", path - except MySQLdb.Error, e: - try: - print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) - except IndexError: - print "MySQL Error: %s" % str(e) - - print "Error", e.args[0], e.args[1] - print "Warning:", path, "not saved to database" - return True - -def getLogs(): - cur = db.cursor() - return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") - -def save_to_db(author, text, url, id_str): - cur = db.cursor() - - cur.execute("CREATE TABLE IF NOT EXISTS Tweets(Id INT PRIMARY KEY AUTO_INCREMENT, \ - Author VARCHAR(255), \ - Text VARCHAR(255), \ - Url VARCHAR(255), \ - Tweet_Id VARCHAR(255), \ - Screenshot INT, \ - Deleted INT)") - - try: - cur.execute("""INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) - VALUES (%s, %s, %s, %s, %s, %s)""", - (author, text, url, id_str, 0, 0)) - db.commit() - print "Wrote to database:", author, id_str - except MySQLdb.Error, e: - print "Error", e.args[0], e.args[1] - db.rollback() - print "ERROR writing database" diff --git a/db_sqlite.py b/db_sqlite.py deleted file mode 100644 index 0b31f6c..0000000 --- a/db_sqlite.py +++ /dev/null @@ -1,57 +0,0 @@ -import sqlite3 - -db = sqlite3.connect('twitter.db') - -def writeSuccess(path): - cur = db.cursor() - try: - cur.execute("""UPDATE Tweets \ - SET Screenshot=1 \ - WHERE Tweet_Id='%s'""" % [path]) - db.commit() - print "Screenshot OK. Tweet id ", path - except sqlite3.Error, e: - print "Error", e - print "Warning:", path, "not saved to database" - return True - -def markDeleted(path): - cur = db.cursor() - try: - cur.execute("""UPDATE Tweets \ - SET Deleted=1 \ - WHERE Tweet_Id='%s'""" % [path]) - db.commit() - print "Tweet marked as deleted ", path - except sqlite3.Error, e: - print "Error", e - print "Warning:", path, "not saved to database" - return True - -def getLogs(): - cur = db.cursor() - return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") - -def save_to_db(author, text, url, id_str): - cur = db.cursor() - - cur.execute("CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ - Author VARCHAR(255), \ - Text VARCHAR(255), \ - Url VARCHAR(255), \ - Tweet_Id VARCHAR(255), \ - Screenshot INTEGER, \ - Deleted INTEGER)") - - try: - cur.execute("""INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) - VALUES ('%s', '%s', '%s', '%s', '%s', '%s')""" % (author, text, url, id_str, 0, 0)) - db.commit() - #print "Wrote to database:", author, id_str - except sqlite3.Error, e: - print "Error", e - db.rollback() - print "ERROR writing database" - -if __name__ == '__main__': - save_to_db('xaiki', 'blah blah', 'url', 'id') From 9c73a88f5c4658f6c8b1c501d92682d61592a450 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 17 Aug 2018 13:13:10 -0300 Subject: [PATCH 11/90] port streaming to new DB code, add signal handler to properly write files on ^C --- streaming.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/streaming.py b/streaming.py index ff8ed3f..8628bec 100644 --- a/streaming.py +++ b/streaming.py @@ -1,3 +1,4 @@ +import signal import sys import re from getopt import getopt, GetoptError @@ -8,7 +9,7 @@ from config import get_config r = re.compile("\s+") -db = "mysql" +db_driver = "mysql" ids = [] track = [] argv = sys.argv @@ -29,7 +30,7 @@ def usage(): if opt in ("-c", "--config"): config_file = arg if opt in ("-D", "--db"): - db = arg + db_driver = arg if opt in ("-i", "--ids"): ids += r.sub(",", arg).split(',') if opt in ("-t", "--track"): @@ -43,14 +44,20 @@ def usage(): config = get_config(config_file) ids += args -if db == "mysql": - from db_mysql import save_to_db -elif db == "sqlite": - from db_sqlite import save_to_db -elif db == "elasticsearch": - from db_elasticsearch import save_to_db +if db_driver == "mysql": + from DB.mysql import Driver + filename = filename or "mysql://" +elif db_driver == "sqlite": + from DB.sqlite import Driver + filename = filename or "twitter.sqlite" +elif db_driver == "elasticsearch": + from DB.elasticsearch import Driver + filename = filename or "ec://" +elif db_driver == "pynx": + from DB.pynx import Driver + filename = filename or "graph.gexf" else: - print "ERROR could not find db driver for ", db + print "ERROR could not find db driver for ", db_driver sys.exit(-2) # This is the listener, resposible for receiving data class StdOutListener(StreamListener): @@ -59,7 +66,7 @@ def on_status(self, status): tweet_url = "http://twitter.com/" + status.user.screen_name + "/status/" + status.id_str print "TWEET", status.text print "URL", tweet_url - save_to_db(status.user.screen_name, status.text, tweet_url, status.id_str) + db.save(tweet_url, status) def on_error(self, status): print status @@ -69,5 +76,12 @@ def on_error(self, status): auth = OAuthHandler(config['consumer_key'], config['consumer_secret']) auth.set_access_token(config['access_token'], config['access_token_secret']) + db = Driver(filename) + def signal_handler(sig, frame): + db.close() + sys.exit(0) + + signal.signal(signal.SIGINT, signal_handler) + stream = Stream(auth, l) stream.filter(follow=ids, track=track) From 5e7b7206343eed94b30e8e0fc97d6dca3a2d3fee Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 17 Aug 2018 13:14:18 -0300 Subject: [PATCH 12/90] detect connection errors and retry --- streaming.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/streaming.py b/streaming.py index 8628bec..e0e3cd6 100644 --- a/streaming.py +++ b/streaming.py @@ -3,6 +3,7 @@ import re from getopt import getopt, GetoptError +from urllib3.exceptions import ProtocolError from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream @@ -84,4 +85,9 @@ def signal_handler(sig, frame): signal.signal(signal.SIGINT, signal_handler) stream = Stream(auth, l) - stream.filter(follow=ids, track=track) + print "STREAM",ids, track + while True: + try: + stream.filter(follow=ids, track=track) + except ProtocolError: + pass From cabf63f14f1fdeaad6176062db30e68be618946e Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 17 Aug 2018 13:15:08 -0300 Subject: [PATCH 13/90] -f is for config file --- streaming.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/streaming.py b/streaming.py index e0e3cd6..a89f811 100644 --- a/streaming.py +++ b/streaming.py @@ -15,6 +15,7 @@ track = [] argv = sys.argv config_file = 0 +filename = None if ((sys.argv[0]).find(".py") != -1): argv = sys.argv[1:] @@ -38,11 +39,9 @@ def usage(): track += r.sub(",", arg).split(',') if opt in ("-f", "--file"): - with open(arg) as f: - for row in f: - ids.append(row) + filename=arg -config = get_config(config_file) + config = get_config(config_file) ids += args if db_driver == "mysql": From ed32799230439f66d5d3ffd5fe46c38a18c5573e Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:27:59 -0300 Subject: [PATCH 14/90] PYNX: normalize users too --- DB/pynx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DB/pynx.py b/DB/pynx.py index 9fb71e7..7b74374 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -41,10 +41,10 @@ def add_users(G, text, status): users.append("@%s" % status.retweeted_status.user.screen_name) except AttributeError: pass - u = "@%s" % status.user.screen_name + u = normalize("@%s" % status.user.screen_name) add_node(G, u) for v in users: - add_edge(G, u, v) + add_edge(G, u, normalize(v)) class Driver(generic.DB): def __init__(self, filename = "graph.gexf"): From d4161e9d76f4ebe3e6b6d0d08202af6ad887225f Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:28:47 -0300 Subject: [PATCH 15/90] move arg parsing to config.py and use it in streaming.py --- config.py | 116 +++++++++++++++++++++++++++++++++++++++++++++++++-- streaming.py | 58 ++++---------------------- 2 files changed, 121 insertions(+), 53 deletions(-) diff --git a/config.py b/config.py index ec2c666..f7bbf54 100644 --- a/config.py +++ b/config.py @@ -1,7 +1,117 @@ -from random import randint +import re +import sys import json +from getopt import getopt, GetoptError +from random import randint + +r = re.compile("\s+") +filename = None + def get_config(filename): + d = load_json(filename) + return d[randint(0, d.__len__() - 1)] + +def o2u(option): + args = "-%s|--%s" % (option['short'].replace(':', ''), option['long']) + usage = option['usage'] % args + return "\t[%s]\t%s\n" % (usage, option['doc']) + +def usage(name, options): + print "usage: %s" %name + return reduce(lambda acc, cur: acc + o2u(cur), options, "") + +def identity(a): + return a + +def load_json(filename): with open(filename) as data: - d = json.load(data) - return d[randint(0, d.__len__() - 1)] + return json.load(data) + +def load_row_file(file): + ret = [] + with open(file) as f: + for row in f: + ret.append(row) + return ret + +def parse_db(arg): + try: + db_driver, filename = arg.split(':') + except ValueError: + db_driver = arg + filename = None + finally: + if db_driver == "mysql": + from DB.mysql import Driver + filename = filename or "mysql://" + elif db_driver == "sqlite": + from DB.sqlite import Driver + filename = filename or "twitter.sqlite" + elif db_driver == "elasticsearch": + from DB.elasticsearch import Driver + filename = filename or "ec://" + elif db_driver == "pynx": + from DB.pynx import Driver + filename = filename or "graph.gexf" + else: + print "ERROR could not find db driver for ", db_driver + sys.exit(-2) + return Driver(filename) + +def parse_comas(arg): + return r.sub(",", arg).split(',') + +def make_short(o): + if o.has_key('parse'): return o['short'] + ':' + return o['short'] + +def make_long(o): + if o.has_key('parse'): return o['long'] + '=' + return o['long'] + +def parse_args(options): + argv = sys.argv + + if ((sys.argv[0]).find(".py") != -1): + argv = sys.argv[1:] + + shorts = "".join(map(make_short, options)) + longs = map(make_long, options) + sopthash = dict(map(lambda o: (o['short'], o), options)) + lopthash = dict(map(lambda o: (o['long'], o), options)) + parsed = {} + + def usage(): + print "usage: %s" % "\n".join(map(o2u, options)) + + def get_key(opt): + try: + return sopthash[opt[1:]] + except KeyError: + return lopthash[opt[2:]] + + try: + opts, args = getopt(argv, shorts, longs) + except GetoptError: + usage() + sys.exit(2) + for opt, arg in opts: + try: + k = get_key(opt) + parsed.update({k['long']: k['parse'](arg)}) + except KeyError: + print "couldn't parse arg: %s, %s" % (opt, arg) + + print "parsed: %s" % (parsed) + return parsed + +CONFIG_FILE = {'long': 'config', 'short': 'c', 'usage': '%s config.json', 'doc': 'config file', 'parse': load_json} +IDS = {'long': 'ids', 'short': 'i', 'usage': '%s "id1,id2,id3"', 'doc': 'twitter user ids', 'parse': parse_comas} +USERS = {'long': 'user', 'short': 'u', 'usage': '%s "user1,user2,usr3"', 'doc': 'twitter usernames', 'parse': parse_comas} +TERMS = {'long': 'track', 'short': 't', 'usage': '%s "term1,term2,term3"', 'doc': 'terms to track', 'parse': parse_comas} +DBS = {'long': 'database', 'short': 'D', 'usage': '%s [mysql|sqlite|elasticsearch]', 'doc': 'database system to use', 'parse': parse_db} +options = [CONFIG_FILE, IDS, USERS, TERMS, DBS] + +if __name__ == '__main__': + parse_args(options) diff --git a/streaming.py b/streaming.py index a89f811..f6ca780 100644 --- a/streaming.py +++ b/streaming.py @@ -7,58 +7,11 @@ from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream -from config import get_config -r = re.compile("\s+") -db_driver = "mysql" -ids = [] -track = [] -argv = sys.argv -config_file = 0 -filename = None - -if ((sys.argv[0]).find(".py") != -1): - argv = sys.argv[1:] - -def usage(): - print "usage: [-c|--config config.json] [-f|--file id.csv] [-i|--ids id1,id2,id3...] [-t|--track hash1,hash2,hash3...] [-d|--db]" - -try: - opts, args = getopt(argv, "c:f:i:t:D:", ["config", "file", "ids", "track"]) -except GetoptError: - usage() - sys.exit(2) -for opt, arg in opts: - if opt in ("-c", "--config"): - config_file = arg - if opt in ("-D", "--db"): - db_driver = arg - if opt in ("-i", "--ids"): - ids += r.sub(",", arg).split(',') - if opt in ("-t", "--track"): - track += r.sub(",", arg).split(',') +import config as c - if opt in ("-f", "--file"): - filename=arg - - config = get_config(config_file) -ids += args +r = re.compile("\s+") -if db_driver == "mysql": - from DB.mysql import Driver - filename = filename or "mysql://" -elif db_driver == "sqlite": - from DB.sqlite import Driver - filename = filename or "twitter.sqlite" -elif db_driver == "elasticsearch": - from DB.elasticsearch import Driver - filename = filename or "ec://" -elif db_driver == "pynx": - from DB.pynx import Driver - filename = filename or "graph.gexf" -else: - print "ERROR could not find db driver for ", db_driver - sys.exit(-2) # This is the listener, resposible for receiving data class StdOutListener(StreamListener): @@ -72,11 +25,16 @@ def on_error(self, status): print status if __name__ == "__main__": + opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.TERMS, c.DBS]) + db = opts['database'] + config = opts['config'][0] + ids = opts['ids'] + track = opts['track'] + l = StdOutListener() auth = OAuthHandler(config['consumer_key'], config['consumer_secret']) auth.set_access_token(config['access_token'], config['access_token_secret']) - db = Driver(filename) def signal_handler(sig, frame): db.close() sys.exit(0) From 08d2f8b473d5f7a8c150337b74863519e01d313d Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:30:38 -0300 Subject: [PATCH 16/90] ignore .pyc files Signed-off-by: Niv Sardi --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc From 30b2af3d39e5aa15296427b4f8ceb3c91c3b52bd Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:46:57 -0300 Subject: [PATCH 17/90] config: unify load_* function's naming: use filename for all --- config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index f7bbf54..5a6f912 100644 --- a/config.py +++ b/config.py @@ -28,9 +28,9 @@ def load_json(filename): with open(filename) as data: return json.load(data) -def load_row_file(file): +def load_row_file(filename): ret = [] - with open(file) as f: + with open(filename) as f: for row in f: ret.append(row) return ret From a5280dbdd4dda008dcc63fc3c5ccdce196dcd0f0 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:47:34 -0300 Subject: [PATCH 18/90] config: implement load_csv and introduce CSV_FILE config option --- config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/config.py b/config.py index 5a6f912..6847090 100644 --- a/config.py +++ b/config.py @@ -35,6 +35,16 @@ def load_row_file(filename): ret.append(row) return ret +def load_csv(filename): + ret = [] + with open(filename, "rb") as csvfile: + reader = csv.reader(csvfile, delimiter=',', quotechar='|') + for row in reader: + for elem in row: + ret.extend(elem.strip().split(',')) + return ret + + def parse_db(arg): try: db_driver, filename = arg.split(':') @@ -111,6 +121,8 @@ def get_key(opt): USERS = {'long': 'user', 'short': 'u', 'usage': '%s "user1,user2,usr3"', 'doc': 'twitter usernames', 'parse': parse_comas} TERMS = {'long': 'track', 'short': 't', 'usage': '%s "term1,term2,term3"', 'doc': 'terms to track', 'parse': parse_comas} DBS = {'long': 'database', 'short': 'D', 'usage': '%s [mysql|sqlite|elasticsearch]', 'doc': 'database system to use', 'parse': parse_db} +CSV_FILE = {'long': 'csv', 'short': 'f', 'usage': '%s file.csv', 'doc': 'load data from a csv file', 'parse': load_csv} + options = [CONFIG_FILE, IDS, USERS, TERMS, DBS] if __name__ == '__main__': From 5e74f4eb15a0dea9ceacca0a367dc839e49a545c Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:48:45 -0300 Subject: [PATCH 19/90] config: rename user option to users --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index 6847090..3711846 100644 --- a/config.py +++ b/config.py @@ -118,7 +118,7 @@ def get_key(opt): CONFIG_FILE = {'long': 'config', 'short': 'c', 'usage': '%s config.json', 'doc': 'config file', 'parse': load_json} IDS = {'long': 'ids', 'short': 'i', 'usage': '%s "id1,id2,id3"', 'doc': 'twitter user ids', 'parse': parse_comas} -USERS = {'long': 'user', 'short': 'u', 'usage': '%s "user1,user2,usr3"', 'doc': 'twitter usernames', 'parse': parse_comas} +USERS = {'long': 'users', 'short': 'u', 'usage': '%s "user1,user2,usr3"', 'doc': 'twitter usernames', 'parse': parse_comas} TERMS = {'long': 'track', 'short': 't', 'usage': '%s "term1,term2,term3"', 'doc': 'terms to track', 'parse': parse_comas} DBS = {'long': 'database', 'short': 'D', 'usage': '%s [mysql|sqlite|elasticsearch]', 'doc': 'database system to use', 'parse': parse_db} CSV_FILE = {'long': 'csv', 'short': 'f', 'usage': '%s file.csv', 'doc': 'load data from a csv file', 'parse': load_csv} From da0fe9913ab9ed0654c0f34d2bb36d92b5ca6ec5 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:49:09 -0300 Subject: [PATCH 20/90] get_user_ids: use new config code --- get_user_ids.py | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/get_user_ids.py b/get_user_ids.py index 2ae1f07..b462b65 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -1,37 +1,18 @@ -from getopt import getopt, GetoptError import tweepy import time import csv import sys +import config as c -from config import get_config +opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) -config = "./config.json" +authdata = opts['config'][0] +users = None +try: + users = opts['users'] +except KeyError: + users = opts['csv'] -if ((sys.argv[0]).find(".py") != -1): - argv = sys.argv[1:] - -def usage(): - print "usage: [-c|--config config.json] [-f|--file users.csv] [user1 [user2 [user3] ...]]" - -try: - opts, args = getopt(argv, "c:f:", ["config", "file"]) -except GetoptError: - usage() - sys.exit(2) -for opt, arg in opts: - if opt in ("-c", "--config"): - config = arg - if opt in ("-f", "--file"): - with open(arg, "rb") as csvfile: - reader = csv.reader(csvfile, delimiter=',', quotechar='|') - for row in reader: - for elem in row: - users.extend(elem.strip().split(',')) - -users += args - -authdata = get_config(config) print "looking for", users auth = tweepy.OAuthHandler(authdata['consumer_key'], authdata['consumer_secret']) From f4c94a59fd3e129e37f8b440f377defe738919a0 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:50:41 -0300 Subject: [PATCH 21/90] port monitoring to new config system --- monitoring.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/monitoring.py b/monitoring.py index 0afeaee..b312a8f 100644 --- a/monitoring.py +++ b/monitoring.py @@ -1,15 +1,12 @@ -import MySQLdb +import sys import requests +import config as c +opts = c.parse_args([c.DBS]) +db = opts['database'] -db = MySQLdb.connect(host="", - user="", - passwd="", - db="", - charset="utf8") list_of_tweets = [] - def query(url): r = requests.get(url) if r.status_code != 200: @@ -19,11 +16,7 @@ def query(url): def read_database(db): - cur = db.cursor() - cur.execute("""SELECT * \ - FROM Tweets \ - WHERE Deleted=0""") - + cur = db.getTweets() for tweet in cur: list_of_tweets.append(tweet) print tweet @@ -33,11 +26,8 @@ def read_database(db): def check_tweet(): for tweet in read_database(db): if query(tweet[3]) is True: - cur = db.cursor() - cur.execute("""UPDATE Tweets \ - SET Deleted=1 \ - WHERE Tweet_Id=%s""", [tweet[4]]) - db.commit() + db.markDeleted(tweet[4]) + print "tweet deleted, id is", tweet[4] print "url is", tweet[3] From af8759b79d14db6d54cd1762148e70cbbac2d344 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 18 Aug 2018 14:51:59 -0300 Subject: [PATCH 22/90] port screenshot code to new config API --- screenshot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/screenshot.py b/screenshot.py index 3e8158d..baff45c 100644 --- a/screenshot.py +++ b/screenshot.py @@ -3,7 +3,10 @@ from needle.cases import NeedleTestCase from needle.driver import NeedlePhantomJS -from db_mysql import db +import config as c + +opts = c.parse_args([c.DBS]) +db = opts['database'] class captureTweetScreenshots(NeedleTestCase): From 372320d04d562b5c84bcf34e104eaca0cce15b59 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Mon, 14 Jan 2019 10:44:48 -0500 Subject: [PATCH 23/90] fix sqlite driver Signed-off-by: Niv Sardi --- DB/sqlite.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index 4aaf50c..647195a 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -1,9 +1,9 @@ import sqlite3 import generic -class SQLiteDriver(generic.DB): +class Driver(generic.DB): def __init__(self, filename = 'twitter.db'): - super(self) + generic.DB.__init__(self) self.db = sqlite3.connect(filename) @@ -42,12 +42,12 @@ def markDeleted(self, path): return False def getLogs(self, ): - cur = db.cursor() + cur = self.db.cursor() return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") def save(self, url, status): (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) - cur = db.cursor() + cur = self.db.cursor() cur.execute("CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ Author VARCHAR(255), \ @@ -62,9 +62,9 @@ def save(self, url, status): INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) VALUES ('%s', '%s', '%s', '%s', '%s', '%s') """ % (author, text, url, id_str, 0, 0)) - db.commit() + self.db.commit() #print "Wrote to database:", author, id_str except sqlite3.Error, e: print "Error", e - db.rollback() + self.db.rollback() print "ERROR writing database" From e3207c7c4bf2acbf77fe6c653b6c1e94f3e65e3f Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Mon, 14 Jan 2019 10:45:01 -0500 Subject: [PATCH 24/90] allow for empty ids or tracks Signed-off-by: Niv Sardi --- streaming.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/streaming.py b/streaming.py index f6ca780..d46bfbc 100644 --- a/streaming.py +++ b/streaming.py @@ -28,8 +28,14 @@ def on_error(self, status): opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.TERMS, c.DBS]) db = opts['database'] config = opts['config'][0] - ids = opts['ids'] - track = opts['track'] + try: + ids = opts['ids'] + except KeyError: + ids = [] + try: + track = opts['track'] + except KeyError: + track = [] l = StdOutListener() auth = OAuthHandler(config['consumer_key'], config['consumer_secret']) From 0b87ae4ee54fdf0cd02568e27636e4912235649b Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Mon, 29 Apr 2019 23:47:03 -0300 Subject: [PATCH 25/90] sqlite fix Signed-off-by: Niv Sardi --- DB/sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index 647195a..68426e1 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -8,7 +8,7 @@ def __init__(self, filename = 'twitter.db'): def getTweets(self): - self.db.cursor() + cur = self.db.cursor() return cur.execute("""SELECT * \ FROM Tweets \ WHERE Deleted=0""") From 9a84af6a017e07990e815d8f76f9b513408ccc58 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Tue, 30 Apr 2019 02:55:43 -0300 Subject: [PATCH 26/90] switch config to argparse Signed-off-by: Niv Sardi --- config.py | 211 ++++++++++++++++++++++++-------------------------- monitoring.py | 2 +- screenshot.py | 2 +- streaming.py | 15 ++-- 4 files changed, 108 insertions(+), 122 deletions(-) diff --git a/config.py b/config.py index 3711846..9114ec5 100644 --- a/config.py +++ b/config.py @@ -2,128 +2,119 @@ import sys import json -from getopt import getopt, GetoptError +import argparse from random import randint + +class LoadJSONAction(argparse.Action): + def __call__(self, parser, namespace, filename, option_string=None): + with open(filename) as data: + setattr(namespace, self.dest, json.load(data)) + +class LoadRowFileAction(argparse.Action): + def __call__(self, parser, namespace, filename, option_string=None): + ret = [] + with open(filename) as f: + for row in f: + ret.append(row) + setattr(namespace, self.dest, ret) + +class LoadCSVAction(argparse.Action): + def __call__(self, parser, namespace, filename, option_string=None): + ret = [] + with open(filename, "rb") as csvfile: + reader = csv.reader(csvfile, delimiter=',', quotechar='|') + for row in reader: + for elem in row: + ret.extend(elem.strip().split(',')) + setattr(namespace, self.dest, ret) + +class LoadDBDriverAction(argparse.Action): + def __call__(self, parser, namespace, arg, option_string=None): + try: + db_driver, filename = arg.split(':') + except ValueError: + db_driver = arg + filename = None + finally: + if db_driver == "mysql": + from DB.mysql import Driver + filename = filename or "mysql://" + elif db_driver == "sqlite": + from DB.sqlite import Driver + filename = filename or "twitter.sqlite" + elif db_driver == "elasticsearch": + from DB.elasticsearch import Driver + filename = filename or "ec://" + elif db_driver == "pynx": + from DB.pynx import Driver + filename = filename or "graph.gexf" + else: + print "ERROR could not find db driver for ", db_driver + sys.exit(-2) + setattr(namespace, self.dest, Driver(filename)) + +class ParseComasAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, r.sub(",", values).split(',')) + +CONFIG_FILE = { + 'flags': '-c, --config', + 'dest': 'config', + 'help': 'config file', + 'action': LoadJSONAction +} +IDS = { + 'flags': '-i, --ids', + 'dest': 'ids', + 'help': 'twitter user ids, as a comma-separated list', + 'action': ParseComasAction +} +USERS = { + 'flags': '-u, --users', + 'dest': 'users', + 'help': 'twitter usernames, as a comma-separated list', + 'action': ParseComasAction +} +TERMS = { + 'flags': '-t, --track', + 'dest': 'track', + 'help': 'terms to track, as a comma-separated list', + 'action': ParseComasAction +} +DBS = { + 'flags': '-D, --database', + 'dest': 'db', + 'help': 'database system to use (mysql, sqlite, elasticsearch)', + 'default': 'sqlite', + 'action': LoadDBDriverAction +} +CSV_FILE = { + 'flags': '-f, --csv', + 'dest': 'csv', + 'help': 'load data from a csv file', + 'action': LoadRowFileAction +} + +options = [CONFIG_FILE, IDS, USERS, TERMS, DBS] + r = re.compile("\s+") filename = None -def get_config(filename): - d = load_json(filename) - return d[randint(0, d.__len__() - 1)] - -def o2u(option): - args = "-%s|--%s" % (option['short'].replace(':', ''), option['long']) - usage = option['usage'] % args - return "\t[%s]\t%s\n" % (usage, option['doc']) - -def usage(name, options): - print "usage: %s" %name - return reduce(lambda acc, cur: acc + o2u(cur), options, "") - -def identity(a): - return a - -def load_json(filename): - with open(filename) as data: - return json.load(data) - -def load_row_file(filename): - ret = [] - with open(filename) as f: - for row in f: - ret.append(row) - return ret - -def load_csv(filename): - ret = [] - with open(filename, "rb") as csvfile: - reader = csv.reader(csvfile, delimiter=',', quotechar='|') - for row in reader: - for elem in row: - ret.extend(elem.strip().split(',')) - return ret - - -def parse_db(arg): - try: - db_driver, filename = arg.split(':') - except ValueError: - db_driver = arg - filename = None - finally: - if db_driver == "mysql": - from DB.mysql import Driver - filename = filename or "mysql://" - elif db_driver == "sqlite": - from DB.sqlite import Driver - filename = filename or "twitter.sqlite" - elif db_driver == "elasticsearch": - from DB.elasticsearch import Driver - filename = filename or "ec://" - elif db_driver == "pynx": - from DB.pynx import Driver - filename = filename or "graph.gexf" - else: - print "ERROR could not find db driver for ", db_driver - sys.exit(-2) - return Driver(filename) - -def parse_comas(arg): - return r.sub(",", arg).split(',') - -def make_short(o): - if o.has_key('parse'): return o['short'] + ':' - return o['short'] - -def make_long(o): - if o.has_key('parse'): return o['long'] + '=' - return o['long'] - def parse_args(options): argv = sys.argv if ((sys.argv[0]).find(".py") != -1): argv = sys.argv[1:] - shorts = "".join(map(make_short, options)) - longs = map(make_long, options) - sopthash = dict(map(lambda o: (o['short'], o), options)) - lopthash = dict(map(lambda o: (o['long'], o), options)) - parsed = {} - - def usage(): - print "usage: %s" % "\n".join(map(o2u, options)) - - def get_key(opt): - try: - return sopthash[opt[1:]] - except KeyError: - return lopthash[opt[2:]] - - try: - opts, args = getopt(argv, shorts, longs) - except GetoptError: - usage() - sys.exit(2) - for opt, arg in opts: - try: - k = get_key(opt) - parsed.update({k['long']: k['parse'](arg)}) - except KeyError: - print "couldn't parse arg: %s, %s" % (opt, arg) - - print "parsed: %s" % (parsed) - return parsed - -CONFIG_FILE = {'long': 'config', 'short': 'c', 'usage': '%s config.json', 'doc': 'config file', 'parse': load_json} -IDS = {'long': 'ids', 'short': 'i', 'usage': '%s "id1,id2,id3"', 'doc': 'twitter user ids', 'parse': parse_comas} -USERS = {'long': 'users', 'short': 'u', 'usage': '%s "user1,user2,usr3"', 'doc': 'twitter usernames', 'parse': parse_comas} -TERMS = {'long': 'track', 'short': 't', 'usage': '%s "term1,term2,term3"', 'doc': 'terms to track', 'parse': parse_comas} -DBS = {'long': 'database', 'short': 'D', 'usage': '%s [mysql|sqlite|elasticsearch]', 'doc': 'database system to use', 'parse': parse_db} -CSV_FILE = {'long': 'csv', 'short': 'f', 'usage': '%s file.csv', 'doc': 'load data from a csv file', 'parse': load_csv} + parser = argparse.ArgumentParser(description='Twitter Tools: query twitter from the commandline') -options = [CONFIG_FILE, IDS, USERS, TERMS, DBS] + def add_argument(o): + flags = o.pop('flags') + parser.add_argument(flags, **o) + + map(add_argument, options) + return parser.parse_args() if __name__ == '__main__': parse_args(options) diff --git a/monitoring.py b/monitoring.py index b312a8f..778214e 100644 --- a/monitoring.py +++ b/monitoring.py @@ -3,7 +3,7 @@ import config as c opts = c.parse_args([c.DBS]) -db = opts['database'] +db = opts.db list_of_tweets = [] diff --git a/screenshot.py b/screenshot.py index baff45c..bf24789 100644 --- a/screenshot.py +++ b/screenshot.py @@ -6,7 +6,7 @@ import config as c opts = c.parse_args([c.DBS]) -db = opts['database'] +db = opts.db class captureTweetScreenshots(NeedleTestCase): diff --git a/streaming.py b/streaming.py index d46bfbc..623c5a9 100644 --- a/streaming.py +++ b/streaming.py @@ -26,16 +26,11 @@ def on_error(self, status): if __name__ == "__main__": opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.TERMS, c.DBS]) - db = opts['database'] - config = opts['config'][0] - try: - ids = opts['ids'] - except KeyError: - ids = [] - try: - track = opts['track'] - except KeyError: - track = [] + + db = opts.db + config = opts.config[0] + ids = opts.ids or [] + track = opts.track or [] l = StdOutListener() auth = OAuthHandler(config['consumer_key'], config['consumer_secret']) From 713e65d108805eef397d7ae445dde4446682689f Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Tue, 30 Apr 2019 03:13:39 -0300 Subject: [PATCH 27/90] pep8 --- DB/generic.py | 5 +++ DB/mysql.py | 43 ++++++++++++------- DB/pynx.py | 48 ++++++++++++--------- DB/sqlite.py | 46 +++++++++++++------- config.py | 111 ++++++++++++++++++++++++++++++------------------ get_user_ids.py | 42 ++++++++++-------- monitoring.py | 10 ++--- screenshot.py | 12 +++--- streaming.py | 57 ++++++++++++++++--------- 9 files changed, 234 insertions(+), 140 deletions(-) diff --git a/DB/generic.py b/DB/generic.py index a9e513d..f12fd13 100644 --- a/DB/generic.py +++ b/DB/generic.py @@ -1,13 +1,18 @@ class DB: def __init__(self): self.name = "Generic DB Driver" + def getTweets(self): print "NOT IMPLEMENTED" + def writeSuccess(self, path): print "NOT IMPLEMENTED" + def markDeleted(self, path): print "NOT IMPLEMENTED" + def getLogs(self): print "NOT IMPLEMENTED" + def save(self, url, status): print "NOT IMPLEMENTED" diff --git a/DB/mysql.py b/DB/mysql.py index c1d8514..eed2e3c 100644 --- a/DB/mysql.py +++ b/DB/mysql.py @@ -1,28 +1,30 @@ import MySQLdb import generic + class MySQLDriver(generic.DB): def __init__(self): super(self) self.name = "MySQL DB Driver" - self.db = MySQLdb.connect(host="", - user="", - passwd="", - db="", - charset="utf8") + self.db = MySQLdb.connect(host="", user="", passwd="", db="", charset="utf8") def getTweets(): self.db.cursor() - return cur.execute("""SELECT * \ + return cur.execute( + """SELECT * \ FROM Tweets \ - WHERE Deleted=0""") + WHERE Deleted=0""" + ) def writeSuccess(path): cur = self.db.cursor() try: - cur.execute("""UPDATE Tweets \ + cur.execute( + """UPDATE Tweets \ SET Screenshot=1 \ - WHERE Tweet_Id=%s""", [path]) + WHERE Tweet_Id=%s""", + [path], + ) self.db.commit() print "Screenshot OK. Tweet id ", path except MySQLdb.Error, e: @@ -38,9 +40,12 @@ def writeSuccess(path): def markDeleted(path): cur = self.db.cursor() try: - cur.execute("""UPDATE Tweets \ + cur.execute( + """UPDATE Tweets \ SET Deleted=1 \ - WHERE Tweet_Id=%s""", [path]) + WHERE Tweet_Id=%s""", + [path], + ) self.db.commit() print "Tweet marked as deleted ", path except MySQLdb.Error, e: @@ -55,24 +60,30 @@ def markDeleted(path): def getLogs(): cur = self.db.cursor() - return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") + return cur.execute( + "SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 " + ) def save(url, status): (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) cur = db.cursor() - cur.execute("CREATE TABLE IF NOT EXISTS Tweets(Id INT PRIMARY KEY AUTO_INCREMENT, \ + cur.execute( + "CREATE TABLE IF NOT EXISTS Tweets(Id INT PRIMARY KEY AUTO_INCREMENT, \ Author VARCHAR(255), \ Text VARCHAR(255), \ Url VARCHAR(255), \ Tweet_Id VARCHAR(255), \ Screenshot INT, \ - Deleted INT)") + Deleted INT)" + ) try: - cur.execute("""INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) + cur.execute( + """INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) VALUES (%s, %s, %s, %s, %s, %s)""", - (author, text, url, id_str, 0, 0)) + (author, text, url, id_str, 0, 0), + ) self.db.commit() print "Wrote to database:", author, id_str except MySQLdb.Error, e: diff --git a/DB/pynx.py b/DB/pynx.py index 7b74374..9df97b6 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -5,23 +5,27 @@ from . import generic -hashre = re.compile(r'(#\w+)') -userre = re.compile(r'(@\w+)') +hashre = re.compile(r"(#\w+)") +userre = re.compile(r"(@\w+)") + def normalize(input_str): - return unicodedata.normalize('NFKD', input_str).encode('ASCII', 'ignore').lower() + return unicodedata.normalize("NFKD", input_str).encode("ASCII", "ignore").lower() + -def add_node(G, node, attr = {}): +def add_node(G, node, attr={}): try: - G[node]['weight'] += 1 + G[node]["weight"] += 1 except KeyError: - G.add_node(node, weight = 1) + G.add_node(node, weight=1) + def add_edge(G, n, p): try: - G.edges[n, p]['weight'] += 1 + G.edges[n, p]["weight"] += 1 except KeyError: - G.add_edge(n, p, weight = 1) + G.add_edge(n, p, weight=1) + def add_tags(G, text): tags = hashre.findall(text) @@ -34,9 +38,11 @@ def add_tags(G, text): add_edge(G, t, u) return G + def add_users(G, text, status): users = set(userre.findall(text)) - if status.in_reply_to_screen_name: users.add("@%s" % status.in_reply_to_screen_name) + if status.in_reply_to_screen_name: + users.add("@%s" % status.in_reply_to_screen_name) try: users.append("@%s" % status.retweeted_status.user.screen_name) except AttributeError: @@ -46,21 +52,22 @@ def add_users(G, text, status): for v in users: add_edge(G, u, normalize(v)) + class Driver(generic.DB): - def __init__(self, filename = "graph.gexf"): + def __init__(self, filename="graph.gexf"): generic.DB.__init__(self) self.name = "NetworkX DB Driver" self.filename = filename - self. type = filename.split('.')[-1] or 'gexf' - self._user_graph = 'user-%s' % filename - self._hash_graph = 'hash-%s' % filename - self._write = getattr(nx, 'write_%s' % self.type) - self._read = getattr(nx, 'read_%s' % self.type) + self.type = filename.split(".")[-1] or "gexf" + self._user_graph = "user-%s" % filename + self._hash_graph = "hash-%s" % filename + self._write = getattr(nx, "write_%s" % self.type) + self._read = getattr(nx, "read_%s" % self.type) self.G = self._open_graph(self._user_graph) self.H = self._open_graph(self._hash_graph) - print 'graphs opened', self.G.nodes(), self.H.nodes() + print "graphs opened", self.G.nodes(), self.H.nodes() def _open_graph(self, filename): try: @@ -72,7 +79,7 @@ def getTweets(self): return [n for n in self.G.nodes()] def markDeleted(self, id): - self.G.nodes[id]['deleted'] = True + self.G.nodes[id]["deleted"] = True def writeSuccess(self, path): print "NOT IMPLEMENTED" @@ -95,9 +102,10 @@ def save(self, url, status): add_tags(self.H, text) add_users(self.G, text, status) - print 'H', self.H.nodes() + print "H", self.H.nodes() self._write_all() -if __name__ == '__main__': + +if __name__ == "__main__": G = nx.Graph() - add_users(G, 'RT @test blah blah #gnu @other', {}) + add_users(G, "RT @test blah blah #gnu @other", {}) diff --git a/DB/sqlite.py b/DB/sqlite.py index 68426e1..adf8f21 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -1,17 +1,19 @@ import sqlite3 import generic + class Driver(generic.DB): - def __init__(self, filename = 'twitter.db'): - generic.DB.__init__(self) + def __init__(self, filename="twitter.db"): + generic.DB.__init__(self) self.db = sqlite3.connect(filename) - def getTweets(self): cur = self.db.cursor() - return cur.execute("""SELECT * \ + return cur.execute( + """SELECT * \ FROM Tweets \ - WHERE Deleted=0""") + WHERE Deleted=0""" + ) def _commit(self, query): cur = self.db.cursor() @@ -24,46 +26,58 @@ def _commit(self, query): return True def writeSuccess(self, path): - if (self._commit("""UPDATE Tweets \ + if self._commit( + """UPDATE Tweets \ SET Screenshot=1 \ - WHERE Tweet_Id='%s'""")): + WHERE Tweet_Id='%s'""" + ): print "Screenshot OK. Tweet id ", path return True print "Warning:", path, "not saved to database" return False def markDeleted(self, path): - if (self._commit("""UPDATE Tweets \ + if self._commit( + """UPDATE Tweets \ SET Deleted=1 \ - WHERE Tweet_Id='%s'""" % [path])): + WHERE Tweet_Id='%s'""" + % [path] + ): print "Tweet marked as deleted ", path return True print "Warning:", path, "not saved to database" return False - def getLogs(self, ): + def getLogs(self,): cur = self.db.cursor() - return cur.execute("SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 ") + return cur.execute( + "SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 " + ) def save(self, url, status): (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) cur = self.db.cursor() - cur.execute("CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ + cur.execute( + "CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ Author VARCHAR(255), \ Text VARCHAR(255), \ Url VARCHAR(255), \ Tweet_Id VARCHAR(255), \ Screenshot INTEGER, \ - Deleted INTEGER)") + Deleted INTEGER)" + ) try: - cur.execute(""" + cur.execute( + """ INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) VALUES ('%s', '%s', '%s', '%s', '%s', '%s') - """ % (author, text, url, id_str, 0, 0)) + """ + % (author, text, url, id_str, 0, 0) + ) self.db.commit() - #print "Wrote to database:", author, id_str + # print "Wrote to database:", author, id_str except sqlite3.Error, e: print "Error", e self.db.rollback() diff --git a/config.py b/config.py index 9114ec5..d192fda 100644 --- a/config.py +++ b/config.py @@ -1,17 +1,26 @@ import re import sys import json +import csv import argparse -from random import randint class LoadJSONAction(argparse.Action): + """ + load a json file and put it in an opt + """ + def __call__(self, parser, namespace, filename, option_string=None): with open(filename) as data: setattr(namespace, self.dest, json.load(data)) + class LoadRowFileAction(argparse.Action): + """ + load a file line by line into an opt + """ + def __call__(self, parser, namespace, filename, option_string=None): ret = [] with open(filename) as f: @@ -19,102 +28,120 @@ def __call__(self, parser, namespace, filename, option_string=None): ret.append(row) setattr(namespace, self.dest, ret) + class LoadCSVAction(argparse.Action): + """ + load a csv file and put it in an opt + """ + def __call__(self, parser, namespace, filename, option_string=None): ret = [] with open(filename, "rb") as csvfile: - reader = csv.reader(csvfile, delimiter=',', quotechar='|') + reader = csv.reader(csvfile, delimiter=",", quotechar="|") for row in reader: for elem in row: - ret.extend(elem.strip().split(',')) + ret.extend(elem.strip().split(",")) setattr(namespace, self.dest, ret) + class LoadDBDriverAction(argparse.Action): + """ + load a db driver by name + """ + def __call__(self, parser, namespace, arg, option_string=None): - try: - db_driver, filename = arg.split(':') + try: + db_driver, filename = arg.split(":") except ValueError: db_driver = arg filename = None finally: if db_driver == "mysql": from DB.mysql import Driver + filename = filename or "mysql://" elif db_driver == "sqlite": from DB.sqlite import Driver + filename = filename or "twitter.sqlite" elif db_driver == "elasticsearch": from DB.elasticsearch import Driver + filename = filename or "ec://" elif db_driver == "pynx": from DB.pynx import Driver + filename = filename or "graph.gexf" else: - print "ERROR could not find db driver for ", db_driver + print("ERROR could not find db driver for ", db_driver) sys.exit(-2) setattr(namespace, self.dest, Driver(filename)) + class ParseComasAction(argparse.Action): + """ + Parse a coma separated arg into an array + """ + def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, r.sub(",", values).split(',')) + setattr(namespace, self.dest, r.sub(",", values).split(",")) + CONFIG_FILE = { - 'flags': '-c, --config', - 'dest': 'config', - 'help': 'config file', - 'action': LoadJSONAction + "flags": "-c, --config", + "dest": "config", + "help": "config file", + "action": LoadJSONAction, } IDS = { - 'flags': '-i, --ids', - 'dest': 'ids', - 'help': 'twitter user ids, as a comma-separated list', - 'action': ParseComasAction + "flags": "-i, --ids", + "dest": "ids", + "help": "twitter user ids, as a comma-separated list", + "action": ParseComasAction, } USERS = { - 'flags': '-u, --users', - 'dest': 'users', - 'help': 'twitter usernames, as a comma-separated list', - 'action': ParseComasAction + "flags": "-u, --users", + "dest": "users", + "help": "twitter usernames, as a comma-separated list", + "action": ParseComasAction, } TERMS = { - 'flags': '-t, --track', - 'dest': 'track', - 'help': 'terms to track, as a comma-separated list', - 'action': ParseComasAction + "flags": "-t, --track", + "dest": "track", + "help": "terms to track, as a comma-separated list", + "action": ParseComasAction, } DBS = { - 'flags': '-D, --database', - 'dest': 'db', - 'help': 'database system to use (mysql, sqlite, elasticsearch)', - 'default': 'sqlite', - 'action': LoadDBDriverAction + "flags": "-D, --database", + "dest": "db", + "help": "database system to use (mysql, sqlite, elasticsearch)", + "default": "sqlite", + "action": LoadDBDriverAction, } CSV_FILE = { - 'flags': '-f, --csv', - 'dest': 'csv', - 'help': 'load data from a csv file', - 'action': LoadRowFileAction + "flags": "-f, --csv", + "dest": "csv", + "help": "load data from a csv file", + "action": LoadRowFileAction, } options = [CONFIG_FILE, IDS, USERS, TERMS, DBS] -r = re.compile("\s+") -filename = None +r = re.compile(r"\s+") -def parse_args(options): - argv = sys.argv - - if ((sys.argv[0]).find(".py") != -1): - argv = sys.argv[1:] - parser = argparse.ArgumentParser(description='Twitter Tools: query twitter from the commandline') +def parse_args(options): + parser = argparse.ArgumentParser( + description="Twitter Tools: query twitter from the commandline" + ) def add_argument(o): - flags = o.pop('flags') + flags = o.pop("flags") parser.add_argument(flags, **o) map(add_argument, options) return parser.parse_args() -if __name__ == '__main__': + +if __name__ == "__main__": parse_args(options) diff --git a/get_user_ids.py b/get_user_ids.py index b462b65..c1953ed 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -4,33 +4,41 @@ import sys import config as c -opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) -authdata = opts['config'][0] -users = None -try: - users = opts['users'] -except KeyError: - users = opts['csv'] +def twitter_login(): + opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) -print "looking for", users + authdata = opts["config"][0] + users = None + try: + users = opts["users"] + except KeyError: + users = opts["csv"] -auth = tweepy.OAuthHandler(authdata['consumer_key'], authdata['consumer_secret']) -auth.set_access_token(authdata['access_token'], authdata['access_token_secret']) + print ("looking for", users) -api = tweepy.API(auth) + auth = tweepy.OAuthHandler(authdata["consumer_key"], authdata["consumer_secret"]) + auth.set_access_token(authdata["access_token"], authdata["access_token_secret"]) -def get_user_ids(): + return tweepy.API(auth) + + +API = twitter_login() + + +def get_user_ids(api=API): handles = [] for screen_name in users: try: u = api.get_user(screen_name) - print screen_name, u._json['id'] - handles.append(str(u._json['id'])) + print screen_name, u._json["id"] + handles.append(str(u._json["id"])) except Exception, e: - print 'ERROR', e, authdata + print "ERROR", e, authdata - sys.stderr.write(' '.join(handles) + "\n") + sys.stderr.write(" ".join(handles) + "\n") return handles -if __name__ == '__main__': + + +if __name__ == "__main__": get_user_ids() diff --git a/monitoring.py b/monitoring.py index 778214e..a8082ac 100644 --- a/monitoring.py +++ b/monitoring.py @@ -1,4 +1,3 @@ -import sys import requests import config as c @@ -7,19 +6,20 @@ list_of_tweets = [] + def query(url): r = requests.get(url) if r.status_code != 200: return True else: - print "Tweet still exists" + print("Tweet still exists") def read_database(db): cur = db.getTweets() for tweet in cur: list_of_tweets.append(tweet) - print tweet + print(tweet) return list_of_tweets @@ -28,8 +28,8 @@ def check_tweet(): if query(tweet[3]) is True: db.markDeleted(tweet[4]) - print "tweet deleted, id is", tweet[4] - print "url is", tweet[3] + print("tweet deleted, id is", tweet[4]) + print("url is", tweet[3]) if __name__ == "__main__": diff --git a/screenshot.py b/screenshot.py index bf24789..5fc2f95 100644 --- a/screenshot.py +++ b/screenshot.py @@ -8,8 +8,8 @@ opts = c.parse_args([c.DBS]) db = opts.db -class captureTweetScreenshots(NeedleTestCase): +class captureTweetScreenshots(NeedleTestCase): @classmethod def get_web_driver(cls): return NeedlePhantomJS() @@ -24,20 +24,20 @@ def markDeleted(self, path): return db.markDeleted(path) def list_to_screenshot(self): - logFile = open('logfile.txt', 'w') + logFile = open("logfile.txt", "w") cur = db.getLogs() for (Url, Tweet_Id) in cur: try: self.driver.get(Url) except: - print "Url doesnt exist ", Url + print("Url doesnt exist ", Url) logFile.write("Url doesnt exist \n") continue try: - self.assertScreenshot('.tweet', Tweet_Id) + self.assertScreenshot(".tweet", Tweet_Id) except: - print "Tweet deleted ", Url + print("Tweet deleted ", Url) self.markDeleted(Tweet_Id) message = "Tweet deleted %s \n" % Url logFile.write(message) @@ -46,5 +46,7 @@ def list_to_screenshot(self): message = "Tweet screenshotted %s \n" % Url logFile.write(message) logFile.close() + + # if __name__ == '__main__': # list_to_screenshot(db) diff --git a/streaming.py b/streaming.py index 623c5a9..c665927 100644 --- a/streaming.py +++ b/streaming.py @@ -1,7 +1,9 @@ +""" +stream tweets to database driver and stdout +""" + import signal import sys -import re -from getopt import getopt, GetoptError from urllib3.exceptions import ProtocolError from tweepy.streaming import StreamListener @@ -10,42 +12,59 @@ import config as c -r = re.compile("\s+") - -# This is the listener, resposible for receiving data class StdOutListener(StreamListener): + """ + listener, resposible for receiving data + """ + def __init__(self, database): + super(self) + self.database = database def on_status(self, status): - tweet_url = "http://twitter.com/" + status.user.screen_name + "/status/" + status.id_str - print "TWEET", status.text - print "URL", tweet_url - db.save(tweet_url, status) + """ + a twitter status has been recieved + """ + tweet_url = ( + "http://twitter.com/" + status.user.screen_name + "/status/" + status.id_str + ) + print("TWEET", status.text) + print("URL", tweet_url) + self.database.save(tweet_url, status) def on_error(self, status): - print status + """ + error handler + """ + print(status) -if __name__ == "__main__": +def run(): + """ + main entry point + """ opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.TERMS, c.DBS]) - db = opts.db + database = opts.db config = opts.config[0] ids = opts.ids or [] track = opts.track or [] - l = StdOutListener() - auth = OAuthHandler(config['consumer_key'], config['consumer_secret']) - auth.set_access_token(config['access_token'], config['access_token_secret']) + stdout = StdOutListener(database) + auth = OAuthHandler(config["consumer_key"], config["consumer_secret"]) + auth.set_access_token(config["access_token"], config["access_token_secret"]) - def signal_handler(sig, frame): - db.close() + def signal_handler(): + database.close() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) - stream = Stream(auth, l) - print "STREAM",ids, track + stream = Stream(auth, stdout) + print("STREAM", ids, track) while True: try: stream.filter(follow=ids, track=track) except ProtocolError: pass + +if __name__ == "__main__": + run() From cd6a1faba9d3c45473cbde4fd75385e49e4a45fb Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:41:42 -0300 Subject: [PATCH 28/90] port DB to python3 Signed-off-by: Niv Sardi --- DB/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DB/generic.py b/DB/generic.py index f12fd13..7070a0c 100644 --- a/DB/generic.py +++ b/DB/generic.py @@ -3,16 +3,16 @@ def __init__(self): self.name = "Generic DB Driver" def getTweets(self): - print "NOT IMPLEMENTED" + print("NOT IMPLEMENTED") def writeSuccess(self, path): - print "NOT IMPLEMENTED" + print("NOT IMPLEMENTED") def markDeleted(self, path): - print "NOT IMPLEMENTED" + print("NOT IMPLEMENTED") def getLogs(self): - print "NOT IMPLEMENTED" + print("NOT IMPLEMENTED") def save(self, url, status): - print "NOT IMPLEMENTED" + print("NOT IMPLEMENTED") From 28d5ec38c2a1788d03363d589382f45bc96e8e1c Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:42:08 -0300 Subject: [PATCH 29/90] db:sqlite: fix import Signed-off-by: Niv Sardi --- DB/sqlite.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index adf8f21..ec97a7f 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -1,6 +1,5 @@ import sqlite3 -import generic - +from DB import generic class Driver(generic.DB): def __init__(self, filename="twitter.db"): From 8792ef8f95bf941759aeaac91e972583a9c9d6a2 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:42:31 -0300 Subject: [PATCH 30/90] db:sqlite: only create table once Signed-off-by: Niv Sardi --- DB/sqlite.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index ec97a7f..c4c4b02 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -6,6 +6,19 @@ def __init__(self, filename="twitter.db"): generic.DB.__init__(self) self.db = sqlite3.connect(filename) + cur = self.db.cursor() + + cur.execute( + "CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ + Author VARCHAR(255), \ + Text VARCHAR(255), \ + Url VARCHAR(255), \ + Tweet_Id VARCHAR(255), \ + Screenshot INTEGER, \ + Deleted INTEGER)" + ) + + def getTweets(self): cur = self.db.cursor() return cur.execute( @@ -57,16 +70,6 @@ def save(self, url, status): (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) cur = self.db.cursor() - cur.execute( - "CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ - Author VARCHAR(255), \ - Text VARCHAR(255), \ - Url VARCHAR(255), \ - Tweet_Id VARCHAR(255), \ - Screenshot INTEGER, \ - Deleted INTEGER)" - ) - try: cur.execute( """ From 44a54782bd7d62bae54c5dab4736e69bb3089d48 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:42:56 -0300 Subject: [PATCH 31/90] db:sqlite: port to python3 Signed-off-by: Niv Sardi --- DB/sqlite.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index c4c4b02..1752d2a 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -32,8 +32,8 @@ def _commit(self, query): try: cur.execute(query) self.db.commit() - except sqlite3.Error, e: - print "Error", e + except sqlite3.Error as e: + print("Error", e) return False return True @@ -43,9 +43,9 @@ def writeSuccess(self, path): SET Screenshot=1 \ WHERE Tweet_Id='%s'""" ): - print "Screenshot OK. Tweet id ", path + print("Screenshot OK. Tweet id ", path) return True - print "Warning:", path, "not saved to database" + print("Warning:", path, "not saved to database") return False def markDeleted(self, path): @@ -55,9 +55,9 @@ def markDeleted(self, path): WHERE Tweet_Id='%s'""" % [path] ): - print "Tweet marked as deleted ", path + print("Tweet marked as deleted ", path) return True - print "Warning:", path, "not saved to database" + print("Warning:", path, "not saved to database") return False def getLogs(self,): @@ -80,7 +80,7 @@ def save(self, url, status): ) self.db.commit() # print "Wrote to database:", author, id_str - except sqlite3.Error, e: - print "Error", e + except sqlite3.Error as e: + print("Error", e, c) self.db.rollback() - print "ERROR writing database" + print("ERROR writing database") From a1314bb47848a9839505142e594ec0ae68609ff9 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:43:15 -0300 Subject: [PATCH 32/90] db:sqlite: sanitize text input by passing to sqlite direct python objects Signed-off-by: Niv Sardi --- DB/sqlite.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index 1752d2a..8e8bbe6 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -71,13 +71,11 @@ def save(self, url, status): cur = self.db.cursor() try: - cur.execute( - """ + c = """ INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) - VALUES ('%s', '%s', '%s', '%s', '%s', '%s') + VALUES (?, ?, ?, ?, ?, ?) """ - % (author, text, url, id_str, 0, 0) - ) + cur.execute(c, (author, text, url, id_str, 0, 0)) self.db.commit() # print "Wrote to database:", author, id_str except sqlite3.Error as e: From e161c653cb6c18a832302e00eb2e752d103200ef Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:44:32 -0300 Subject: [PATCH 33/90] config: process default config and dbs, add last config term as cmdline arg to make @ladatano happy Signed-off-by: Niv Sardi --- config.py | 96 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 30 deletions(-) diff --git a/config.py b/config.py index d192fda..5f89a5f 100644 --- a/config.py +++ b/config.py @@ -1,10 +1,13 @@ import re +import os import sys import json import csv import argparse +def flatten(lists): + return [i for l in lists for i in l] class LoadJSONAction(argparse.Action): """ @@ -44,39 +47,42 @@ def __call__(self, parser, namespace, filename, option_string=None): setattr(namespace, self.dest, ret) -class LoadDBDriverAction(argparse.Action): - """ - load a db driver by name - """ +def load_db_driver(arg): + try: + db_driver, filename = arg.split(":") + except ValueError: + db_driver = arg + filename = None + finally: + if db_driver == "mysql": + from DB.mysql import Driver - def __call__(self, parser, namespace, arg, option_string=None): - try: - db_driver, filename = arg.split(":") - except ValueError: - db_driver = arg - filename = None - finally: - if db_driver == "mysql": - from DB.mysql import Driver + filename = filename or "mysql://" + elif db_driver == "sqlite": + from DB.sqlite import Driver + + filename = filename or "twitter.sqlite" + elif db_driver == "elasticsearch": + from DB.elasticsearch import Driver - filename = filename or "mysql://" - elif db_driver == "sqlite": - from DB.sqlite import Driver + filename = filename or "ec://" + elif db_driver == "pynx": + from DB.pynx import Driver - filename = filename or "twitter.sqlite" - elif db_driver == "elasticsearch": - from DB.elasticsearch import Driver + filename = filename or "graph.gexf" + else: + print("ERROR could not find db driver for ", db_driver) + sys.exit(-2) - filename = filename or "ec://" - elif db_driver == "pynx": - from DB.pynx import Driver + return Driver(filename) - filename = filename or "graph.gexf" - else: - print("ERROR could not find db driver for ", db_driver) - sys.exit(-2) - setattr(namespace, self.dest, Driver(filename)) +class LoadDBDriverAction(argparse.Action): + """ + load a db driver by name + """ + def __call__(self, parser, namespace, arg, option_string=None): + setattr(namespace, self.dest, load_db_driver(arg)) class ParseComasAction(argparse.Action): """ @@ -84,15 +90,34 @@ class ParseComasAction(argparse.Action): """ def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, r.sub(",", values).split(",")) + setattr(namespace, self.dest, flatten([v.split(",") for v in values])) + +def load_config(paths): + def try_load_json(j): + try: + with open(j) as data: + return json.load(data) + except FileNotFoundError: + return None + except Exception as e: + print(e, "is your config file well formated ?") + raise e + for p in paths: + c = try_load_json(os.path.expanduser(p)) + print(p, c) + if c: return c + + return [] CONFIG_FILE = { "flags": "-c, --config", "dest": "config", "help": "config file", "action": LoadJSONAction, + "default": load_config(["./config.json", "../config.json", "~/.config/twitter-tools/config.json"]) } + IDS = { "flags": "-i, --ids", "dest": "ids", @@ -102,12 +127,14 @@ def __call__(self, parser, namespace, values, option_string=None): USERS = { "flags": "-u, --users", "dest": "users", + "nargs": "+", "help": "twitter usernames, as a comma-separated list", "action": ParseComasAction, } TERMS = { "flags": "-t, --track", "dest": "track", + "nargs": "+", "help": "terms to track, as a comma-separated list", "action": ParseComasAction, } @@ -139,9 +166,18 @@ def add_argument(o): flags = o.pop("flags") parser.add_argument(flags, **o) - map(add_argument, options) - return parser.parse_args() + last = options.pop() + [add_argument(o) for o in options] + + last["flags"] = last["dest"] + del last["dest"] + + add_argument(last) + opts = parser.parse_args() + if opts.db == DBS["default"]: + opts.db = load_db_driver(DBS["default"]) + return opts if __name__ == "__main__": parse_args(options) From d2746c00ddbf71c73d7274134d4e77e615b030ac Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:44:58 -0300 Subject: [PATCH 34/90] streaming: fix code and add error handeling Signed-off-by: Niv Sardi --- streaming.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/streaming.py b/streaming.py index c665927..2924c9c 100644 --- a/streaming.py +++ b/streaming.py @@ -5,10 +5,9 @@ import signal import sys +import tweepy from urllib3.exceptions import ProtocolError from tweepy.streaming import StreamListener -from tweepy import OAuthHandler -from tweepy import Stream import config as c @@ -17,13 +16,14 @@ class StdOutListener(StreamListener): listener, resposible for receiving data """ def __init__(self, database): - super(self) + super(StdOutListener, self).__init__() self.database = database def on_status(self, status): """ a twitter status has been recieved """ + tweet_url = ( "http://twitter.com/" + status.user.screen_name + "/status/" + status.id_str ) @@ -35,34 +35,48 @@ def on_error(self, status): """ error handler """ - print(status) + print("error", status) def run(): """ main entry point """ - opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.TERMS, c.DBS]) + opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.DBS, c.TERMS]) database = opts.db config = opts.config[0] - ids = opts.ids or [] - track = opts.track or [] - stdout = StdOutListener(database) - auth = OAuthHandler(config["consumer_key"], config["consumer_secret"]) + stream_config = { + "follow": opts.ids or None, + "track": opts.track or None + + } + + listener = StdOutListener(database) + + auth = tweepy.OAuthHandler(config["consumer_key"], config["consumer_secret"]) auth.set_access_token(config["access_token"], config["access_token_secret"]) + + api = tweepy.API(auth) + # test authentication + try: + api.verify_credentials() + print("Authentication OK") + except: + print("Error during authentication") + def signal_handler(): database.close() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) - stream = Stream(auth, stdout) - print("STREAM", ids, track) + stream = tweepy.Stream(auth = auth, listener = listener) + print("STREAM", stream_config) while True: try: - stream.filter(follow=ids, track=track) + stream.filter(**stream_config) except ProtocolError: pass From 7a7164519ded9083842cb970eaba0eaea57a2398 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:50:27 -0300 Subject: [PATCH 35/90] 2to3 Signed-off-by: Niv Sardi --- DB/mysql.py | 34 +++++++++++++++++----------------- DB/pynx.py | 8 ++++---- DB/sqlite.py | 14 +++++++------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/DB/mysql.py b/DB/mysql.py index eed2e3c..67e423f 100644 --- a/DB/mysql.py +++ b/DB/mysql.py @@ -1,5 +1,5 @@ import MySQLdb -import generic +from . import generic class MySQLDriver(generic.DB): @@ -26,15 +26,15 @@ def writeSuccess(path): [path], ) self.db.commit() - print "Screenshot OK. Tweet id ", path - except MySQLdb.Error, e: + print(("Screenshot OK. Tweet id ", path)) + except MySQLdb.Error as e: try: - print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) + print(("MySQL Error [%d]: %s" % (e.args[0], e.args[1]))) except IndexError: - print "MySQL Error: %s" % str(e) + print(("MySQL Error: %s" % str(e))) - print "Error", e.args[0], e.args[1] - print "Warning:", path, "not saved to database" + print(("Error", e.args[0], e.args[1])) + print(("Warning:", path, "not saved to database")) return True def markDeleted(path): @@ -47,15 +47,15 @@ def markDeleted(path): [path], ) self.db.commit() - print "Tweet marked as deleted ", path - except MySQLdb.Error, e: + print(("Tweet marked as deleted ", path)) + except MySQLdb.Error as e: try: - print "MySQL Error [%d]: %s" % (e.args[0], e.args[1]) + print(("MySQL Error [%d]: %s" % (e.args[0], e.args[1]))) except IndexError: - print "MySQL Error: %s" % str(e) + print(("MySQL Error: %s" % str(e))) - print "Error", e.args[0], e.args[1] - print "Warning:", path, "not saved to database" + print(("Error", e.args[0], e.args[1])) + print(("Warning:", path, "not saved to database")) return True def getLogs(): @@ -85,8 +85,8 @@ def save(url, status): (author, text, url, id_str, 0, 0), ) self.db.commit() - print "Wrote to database:", author, id_str - except MySQLdb.Error, e: - print "Error", e.args[0], e.args[1] + print(("Wrote to database:", author, id_str)) + except MySQLdb.Error as e: + print(("Error", e.args[0], e.args[1])) self.db.rollback() - print "ERROR writing database" + print("ERROR writing database") diff --git a/DB/pynx.py b/DB/pynx.py index 9df97b6..682ab66 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -67,7 +67,7 @@ def __init__(self, filename="graph.gexf"): self.G = self._open_graph(self._user_graph) self.H = self._open_graph(self._hash_graph) - print "graphs opened", self.G.nodes(), self.H.nodes() + print("graphs opened", self.G.nodes(), self.H.nodes()) def _open_graph(self, filename): try: @@ -82,10 +82,10 @@ def markDeleted(self, id): self.G.nodes[id]["deleted"] = True def writeSuccess(self, path): - print "NOT IMPLEMENTED" + print("NOT IMPLEMENTED") def getLogs(self): - print "NOT IMPLEMENTED" + print("NOT IMPLEMENTED") def _write_all(self): self._write(self.H, self._hash_graph) @@ -102,7 +102,7 @@ def save(self, url, status): add_tags(self.H, text) add_users(self.G, text, status) - print "H", self.H.nodes() + print("H", self.H.nodes()) self._write_all() diff --git a/DB/sqlite.py b/DB/sqlite.py index 8e8bbe6..c90201f 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -1,5 +1,5 @@ import sqlite3 -from DB import generic +from . import generic class Driver(generic.DB): def __init__(self, filename="twitter.db"): @@ -33,7 +33,7 @@ def _commit(self, query): cur.execute(query) self.db.commit() except sqlite3.Error as e: - print("Error", e) + print(("Error", e)) return False return True @@ -43,9 +43,9 @@ def writeSuccess(self, path): SET Screenshot=1 \ WHERE Tweet_Id='%s'""" ): - print("Screenshot OK. Tweet id ", path) + print(("Screenshot OK. Tweet id ", path)) return True - print("Warning:", path, "not saved to database") + print(("Warning:", path, "not saved to database")) return False def markDeleted(self, path): @@ -55,9 +55,9 @@ def markDeleted(self, path): WHERE Tweet_Id='%s'""" % [path] ): - print("Tweet marked as deleted ", path) + print(("Tweet marked as deleted ", path)) return True - print("Warning:", path, "not saved to database") + print(("Warning:", path, "not saved to database")) return False def getLogs(self,): @@ -79,6 +79,6 @@ def save(self, url, status): self.db.commit() # print "Wrote to database:", author, id_str except sqlite3.Error as e: - print("Error", e, c) + print(("Error", e, c)) self.db.rollback() print("ERROR writing database") From 63e5673cc3e1e0da9bda3bc099b7e812508ddbf1 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 03:51:18 -0300 Subject: [PATCH 36/90] 2to3 Signed-off-by: Niv Sardi --- config.py | 6 +++--- get_user_ids.py | 8 ++++---- monitoring.py | 4 ++-- screenshot.py | 4 ++-- streaming.py | 8 ++++---- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/config.py b/config.py index 5f89a5f..7b6d0a2 100644 --- a/config.py +++ b/config.py @@ -71,7 +71,7 @@ def load_db_driver(arg): filename = filename or "graph.gexf" else: - print("ERROR could not find db driver for ", db_driver) + print(("ERROR could not find db driver for ", db_driver)) sys.exit(-2) return Driver(filename) @@ -100,12 +100,12 @@ def try_load_json(j): except FileNotFoundError: return None except Exception as e: - print(e, "is your config file well formated ?") + print((e, "is your config file well formated ?")) raise e for p in paths: c = try_load_json(os.path.expanduser(p)) - print(p, c) + print((p, c)) if c: return c return [] diff --git a/get_user_ids.py b/get_user_ids.py index c1953ed..c8ebf4d 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -15,7 +15,7 @@ def twitter_login(): except KeyError: users = opts["csv"] - print ("looking for", users) + print(("looking for", users)) auth = tweepy.OAuthHandler(authdata["consumer_key"], authdata["consumer_secret"]) auth.set_access_token(authdata["access_token"], authdata["access_token_secret"]) @@ -31,10 +31,10 @@ def get_user_ids(api=API): for screen_name in users: try: u = api.get_user(screen_name) - print screen_name, u._json["id"] + print(screen_name, u._json["id"]) handles.append(str(u._json["id"])) - except Exception, e: - print "ERROR", e, authdata + except Exception as e: + print("ERROR", e, authdata) sys.stderr.write(" ".join(handles) + "\n") return handles diff --git a/monitoring.py b/monitoring.py index a8082ac..369e4a1 100644 --- a/monitoring.py +++ b/monitoring.py @@ -28,8 +28,8 @@ def check_tweet(): if query(tweet[3]) is True: db.markDeleted(tweet[4]) - print("tweet deleted, id is", tweet[4]) - print("url is", tweet[3]) + print(("tweet deleted, id is", tweet[4])) + print(("url is", tweet[3])) if __name__ == "__main__": diff --git a/screenshot.py b/screenshot.py index 5fc2f95..05db4b5 100644 --- a/screenshot.py +++ b/screenshot.py @@ -30,14 +30,14 @@ def list_to_screenshot(self): try: self.driver.get(Url) except: - print("Url doesnt exist ", Url) + print(("Url doesnt exist ", Url)) logFile.write("Url doesnt exist \n") continue try: self.assertScreenshot(".tweet", Tweet_Id) except: - print("Tweet deleted ", Url) + print(("Tweet deleted ", Url)) self.markDeleted(Tweet_Id) message = "Tweet deleted %s \n" % Url logFile.write(message) diff --git a/streaming.py b/streaming.py index 2924c9c..435a2d1 100644 --- a/streaming.py +++ b/streaming.py @@ -27,15 +27,15 @@ def on_status(self, status): tweet_url = ( "http://twitter.com/" + status.user.screen_name + "/status/" + status.id_str ) - print("TWEET", status.text) - print("URL", tweet_url) + print(("TWEET", status.text)) + print(("URL", tweet_url)) self.database.save(tweet_url, status) def on_error(self, status): """ error handler """ - print("error", status) + print(("error", status)) def run(): """ @@ -73,7 +73,7 @@ def signal_handler(): signal.signal(signal.SIGINT, signal_handler) stream = tweepy.Stream(auth = auth, listener = listener) - print("STREAM", stream_config) + print(("STREAM", stream_config)) while True: try: stream.filter(**stream_config) From 742c03186da05e2ebc173f6726f986bbf13a2ece Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 04:35:18 -0300 Subject: [PATCH 37/90] add little config tool Signed-off-by: Niv Sardi --- requirements.txt | 1 + setup.py | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 setup.py diff --git a/requirements.txt b/requirements.txt index 2475d22..7b054b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ tweepy needle +pyinquirer diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4c7e29c --- /dev/null +++ b/setup.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +# -*- coding: utf-8 -*- +""" +* ask the user for it's twitter credentials +""" +from __future__ import print_function, unicode_literals + +import os +import sys +import json +import pathlib + +from PyInquirer import prompt, print_json +from config import load_config + +CONFIG_PATH = "~/.config/twitter-tools/config.json" +config = load_config([CONFIG_PATH]) + +if config: + answers = prompt([ + { + 'type': 'confirm', + 'name': 'confirm', + 'message': f"""a config file has already been found at {CONFIG_PATH}, +with the following configs: {[c["name"] for c in config]}, +continue ?""" + } + ]) + + if not answers["confirm"]: + sys.exit(-2) +else: + config = [] + +def validate_length(n): + def validate(l): + if len(l) == n: return True + if len(l) < n: return f"your input too short, (it's {len(l)}, i expect {n})" + if len(l) > n: return f"your input too long, (it's {len(l)}, i expect {n})" + + return validate + +questions = [ + { + 'type': 'input', + 'message': 'creds name', + 'name': 'name', + 'default': 'twitter-tools' + }, + { + 'type': 'password', + 'message': 'Enter your consumer key', + 'name': 'consumer_key', + 'validate': validate_length(25) + }, + { + 'type': 'password', + 'message': 'Enter your consumer key secret', + 'name': 'consumer_secret', + 'validate': validate_length(45) + }, + { + 'type': 'password', + 'message': 'Enter your access token', + 'name': 'access_token_key', + 'validate': validate_length(50) + }, + { + 'type': 'password', + 'message': 'Enter your access token secret', + 'name': 'access_token_secret', + 'validate': validate_length(45) + } +] + +answers = prompt(questions) +config.append(answers) + +dirname = os.path.dirname(os.path.expanduser(CONFIG_PATH)) +if not os.path.exists(dirname): + os.mkdirs(dirname, parents = True, exist_ok = True) + +with open(os.path.expanduser(CONFIG_PATH), "w") as f: + f.write(json.dumps(config, indent=4)) From 397ae3504533151253f02739fc3730d472f13d15 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 04:42:45 -0300 Subject: [PATCH 38/90] update README Signed-off-by: Niv Sardi --- README.md | 16 ++++++++++------ config-screenshot.png | Bin 0 -> 32725 bytes 2 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 config-screenshot.png diff --git a/README.md b/README.md index 6949990..0bd94ad 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This is a collection of tools to monitor deleted tweets, automate screenshoting, and archiving. -* `streaming.py` and `db_{mysql,sqlite}.py` work together to grab a real-time streamed timeline from Twitter and save all the results in a database. +* `streaming.py` and `DB modules` work together to grab a real-time streamed timeline from Twitter and save all the results in a database, we currently support `SQLITE, MySQL and python networkx` but you can easily implement your own driver * All the tweets in the database are then screenshot by `screenshot.py` * Finally, the `monitoring.py` worker crawls through the database and checks if the tweets have been deleted. * I included `get_user_ids.py`, as the Twitter API often requires the ID, and not the screen name (eg not "@basilesimon"). @@ -14,9 +14,13 @@ then `sudo python get-pip.py` * pip install -r requirements.txt # Configuration -you should put your credentials in a json file, we'll pick up a random -entry, the default file is `config.json` but you can change that with command -line arguments +there is a nifty tool that will generate a config file in the default location (`~/.config/twitter-tools/config.json`), just run `python3 ./setup.py` and you'll be prompted. +[./config-screenshot.png] + + +we'll pick up the first entry, not that we'll look for `./config.json` and +`../config.json` too, of course you can specify any file with the command +line. it should look like this: ```json @@ -47,9 +51,9 @@ it should look like this: * `pip install MySQL-python` (but you might need to `apt-get install build-essential python-dev libmysqlclient-dev`. I read it's easy to install on Max OS, with Homebrew) -* `apt-get install mysql-server +* `apt-get install mysql-server` -* apt-get install nodejs-legacy nodejs npm` +* `apt-get install nodejs-legacy nodejs npm` * `sudo apt-get install build-essential chrpath git-core libssl-dev libfontconfig1-dev libxft-dev` * `sudo npm -g install phantomjs` diff --git a/config-screenshot.png b/config-screenshot.png new file mode 100644 index 0000000000000000000000000000000000000000..7227e0e359161e313a51bf15f2948df4cd6d3887 GIT binary patch literal 32725 zcmagF1ymf}wk_NNLU580+=9EiClK5_xCeK4O&~zf#$AFt1a}YaPUG(G&MUrq&V6^h zanJuV22!J&s;<4)nrqIvRs|`@iK8OnA%Q?3R7r_1U=Rof3jAGx01y0(U!7nCe!)2i zODZ7%U!Dktzkp+0M^QD$uQtYxF5m2pKql5URz?gC2KGiq)()mNj>oXAg20;?Ufv{P zZ}iR4%*Ohol9`neNX*{sBP;jE&rT*ES(#axKQgoPvT^XTvwi$5`;mc}4cC!G3n$u;F5)kRNv>ak78X0!RA86)abFZqu@B%$A7&$h+Ak2 z)jBDN7FAc%_P?dB;6fxJA^8y@c{M!0e%(Wct}ndgwMqfUo$eFmaa_OcIdbheQornS zbe%#skircO_;N(Y@VPAF+_;0fp!Q;55nB5(zR%5|=RVwl)<5{Rpi&|n$3MOwBNf`V zJI(v#wIhv!l|{A)d+zfyzCW4?VogcsjI!E7g2O<$w0v)4KDP%#xJLbEeTK5;n-w{* zG3mly(d@|7Ffp`NY|$4gW%TtF%{lJKA*J7cA#)o@3-OeK29&;*nB?~rPQfflmS#F%2HS6oC zipNBu)w;wbi*wp6WXjk{GVpO%JeXfK%DW|u?re}qAhx`G_r}$AtkA;Xx=)0ln0s7A zcX%8WiV(atY&Kvv;dJs4$K?@ASMBp^us@%wx2LUum)Kq&o_Dx5I~N?ED*Ndh$)Tj= zO>CULa={KB3(Rsf)vwtW*zctYI7!{%T{v&Qi)MW;omtmZ{JfJUKrKTFDyGFmji#J< zM;s_xB4*8=X=X|8K7h4BiE$V4R*peqG7rtGe<0}g&H`j<&l9%ZGZB~Yt7`&7S^UjY zP7WL)kME#+jjXgvNk^s%MvDWLRbL+t3~Xfb(BD$cz})h3RowN768?J&o>wjQv_(Vo zkNyOH<*F6lr&m{NW8y@anH9nDI!B9?Pm~xVn}oR?10h1$VRj4-KR$hYi=r8WU9=LJ zVfW?Dmy@yf5s#Gg=#-(qb>&L6)An_1)ZTc+?>oga!G-QTuRt@oBN-Jr0iChpK_|!} zIZChIuI!Fn=a#jJ?0!xzO$`_}7WVSRA>^!(r@Hq%9&QFdEU}6}eJt zegm=(BAbG=ni3WfG0htH~6 zN>L=pP+z*kAR(6*3YZed{u_@t$$qUAiJMDgGs73agG{S<%B6h5#}e!O8+(0#y39_r9MzIZxie2W0*JAhrw166!l4Mhs;H04yZz2i;| zU<5VsH$@B>hOb*2Q_1C&Hb<__VF(Nd-od@HEw1^gO@*#k303ID6dL0uhJ}4z_O$s&B#9lI`yCr4ufOE!`VFlQ z8|T5mHrP zSC_foXCHje&zRxi6oky) zixCsYq^80V3rNlWfK`i?qQ^tk$+O5$Y6V%zgp#2fd;)=5^7;|_N8Tid+4{o_4+_%p z@deFRBLlA-X1$M0U;pi5aCMr&!1B(&{nLZVv#2$zfpP(L^QDT$_&9=qEvUZ0BpIFv zIR8>}xIuDe-cXO&tm~OK{^}1T=J_`BPH_eU;b53z&Ob0J_=KrgMDEJaJU@o$zMEA6 zm$yV%{m0KIA9zcKjR=w9V1BHO=LAMSUj6WDe(!0sW|)^q9?~2wB3yr zV6YeH;`8T^(cct{c)WXDJhZv)ZZpix_w0sii%q;5jgF1Yb7PxeaNS;0@_(2$DJzM< zG9hCWMpMDpD2dHmiZ5^cb6g*+sk-!cb`a(1;zrlh+B#B;_yrQRh%VfDCF}}4j7U{a z%#L`a_T*O&S0zDuO+9fi8zy*qatrXi zNtOxlF{9C$no^rG-^-PI4TY-fK1Z#)R#lG+ntzzHn z@ljMfK8F{vK}@aMZRMoB!3Np)@YJI$x%amHdW$RO7d!KWtb2IK=V&@N88nzK6rcBe zMv93!RjMTN5x|*vL~BHBnnFH&;on@lP^Bh6*)G028GL*kSx;N~=f20n5!Fc@T6tPc zi+szHS9*87h=bCpvifRG(sG}y8U?Vp$#?-o7N%E0W;KMnhr6;pNC84GfNfY`>aS@G z5`~8x(Z)25GyqEM&YXP4V>$XYvl9bG_J+N8DLh9^vf7Q0Z^8n>UF~ONRl9ryGx{)O zW)szoAowZe*~gi5~-Gn-g>I~8+$T&&ThA9IbVzp z5#9%T(X!h2r59!O1{R1{Z1xq?fqoR9>wH;0f`DaRFeJ6#R{`;^VYug1=1;j!&#bH4 z_;`$_M1FpZp)oZuaOZ}yo+s8S+Se~i3iV)3Fl>+sstC>*8Vng8r{VuVX_a#)l+O42cv3D~( z;pF4-r&7su*iY6*KghmhXfFqiS$2pN=x=6=vRh~tSlFzbB))bS9n&phfZd;X zs3jFBu%E;dY?x{NqfEG(SHX|jJ9$EGj4qE78A5IslmZ_8Xlz~^a4mdZJX0mr)1PD^zY&p77$0Nk}H|w>Zn4ub491B zENg4Z-*q?uaq`ZgGL9-VQrtuqgg(>=(MDlnY7uB~_q5(&u*|5W5dnADUBHTD)Wdu-BA-5?F#vE_@m* z?#(EEaK6wwNQHAouT#uu-|YT2yz;agV7ROlf+30-r=S}P&&2oXT`@QG!k)v7-BokN z#R7L6L)s-kWjB!Wj5C-9nlk^6^{lW@alL3^LRx-m{k7ka1kIn?io z(++PywW?CBC_I=o=|ho%vSer4VS5HV+wWk(8u@x8SSS0{aw|8bP!5 zgvuU#+T|=&*?AK@YX>mU-otqW9!CU!9eI-j1jooofd0ZDBQJu}H*k=n#pY)dO;!sa zQwgbG-g=$&VN+vDfxkx2)*ioY_1GABxM_LgdRbk|qwYN2H?2l*$|%eo1pqPvJTbKX z-muk6Pq&o?irpZ8)gKgsaEZE=Q*wMU3%Su}S=hj}MtDSkb0*R$Fj4v$tJ}0HX`y&fiZ_C1D2tH&4 zL}e@q)3Mlna9OSn_U#~DHPl_398L-9n@^s-+-Itx{^z;~0l18TAv>5zI#L8f0Vlof z5hlIrY^33}>*6|=EO4DMQOZT8lCsUS`Y3puiJ=$+abq#D9HvHEN4&tNh57NDNec~| zWx$XC3BfV#%W4T3acRFZp`cW*e>$k$m!H!{n zQ?uf%<$~JRp{iW5he2aJ0sdJ+f{un?67;OsM_4CwMI&qb>qAK#&ue={4hTp{D?4Kb zO;6p8j*iZ+n0=NAj_SPp*VBLnOmjWsi9c7Q{y?wZ6oJc@ySBY8ZXiO&=W@6iQjPy7 z<(B_E3&Q^i3?|j}EKftk=zd2)&>Q&xqvLg`jRFsAF#h#+sLV&s$%z5|=}r;E&(H6= zUTvaC8@q5_sXH+~PFw%PP*A`wmm+boH3X!XFUdy`Kq@b0bl0Jyqb6!y1)?D*g{731 z&)#iB?%ys>KQE3;-RDOF66zr&7~q<1IW_r-(SGXdK~i~HY@ArdD!%9Eh`jc2ph~gM zpjf(~xdzPT3f3MkLNbi}zC@9=K;rLHVts|m;TBF%=%kSe?Avk!zE`4M0{xc#{h1UD zsCQB7lvy-v(Q#Eu^l3WzbMuIO0-?IQh~n>ErWTD1S(CcM$o~8W`H#)6g9j2=*z*M+cK1qHD~2t&q3(2Z2Fe~600ZEpJw1&Ef!WsA_%tQO1Wtf9>- z>|8L${*yIy*tLg@6?F~4JgApd`TT0w@A}?7gVw6G^Giy-7?}U+t<;M4;LJ}4t7NRy zl`(l!Vi?TN-O2c9it@nwrx224Ocrvurrf&ErRMdkxi-LOX&v3LZbt3Vswk;wsfpfy zJ6+r7api_|cT-kWu&USF7p!ZDieln(Sy4Rilq2_V-@7aiXX2N2nle{-%;p+leJ>V zTJ}kwMa4HA#FIDiBmhY@6DLxx89L@GdTW#m}cf9e0J_Qf^(Ie`WTe|%B4@DqD`Sj4WL3(dRJH#SYpWZ9NVG+^cczFri zw`>)8zo*RIHpZx2*N>l9t|5X!W2PH>MJ8D)MIIZKR|BY?7pHla4|9UBxWw4V->1ws zPgXratgPe153+Ao+MYUL3mcU)K01!*@Z;k}B+Ix?OFWf+3~X@lSodGOt23CkwcQ|2%7E{gr11k$_^3aRp?(eZ z{%u*}iq6hg5%nAbgwGgMCqg>D;vRhM_4Tft?4xHTuWOD98x4V6s_1^FruM5tZ^y20 zWj|!Wvc0hoDMuwFw>&69MBp%!#e$!tzWyeaT6Ajchl?`i=iA#V${dv7y5YG=?c2Hu z&X-DIYcw$c>wVwWLev8ms%#NrP6(~xv@R{&n46m*xW3#>j$l4}eAIqVT{=L7RnEr^ zH<9=UjZPu!^q_I!_iv&ef*`;aIGP~dM>x8+Vj+U^;$?3|zu}TBPzP)4n{6l0g zg(Y?{RH^0Gm@3gEd2kXmZzxIpd2Te^Y7!I@VUM3O4N3xPs1BrTAGwk z`T}qnflW@-;X8N}tG!as1vC7Y+E`byyJ{%ylz*=;E`d;uhx#=sSz>QT&r=$CT~ou= zY|<3%^;NS1*gM=|T-K$kN>+ciO40eL??x^sswAV?1h$%SxKvuyO|AvW|ItU2*r{pM zlb4ql+GM0zcHc1k@M*W(nD06CG4eqes1Cbk1wVhny~D)(Y-Gey?S#rWfc%jw*@HV- z;Pvjh7EshG1drm)-v}H_=6A>g$uJePJiTx4KTr2)M%oN46zNnf zZ7(Rh6tEi^y;D&730~%d6V8}mIg6Gb75(xX74n-$AT_s`{{7SAFfb#= zi1X|^;55c3Sm5B^dNgH^G#O$c_r4)g-W*$+wwRuK`ysTj`H|m6iyNyNuSm*YxUBa1 z<`yfJOiT|2VaLVCCb7#`)rkX8(QsSD85>vgV*!Ed9(x~J|o)-)S&}m={VO?da({ z*yNq;pCD8u;{k^&eGg=luI9fc_i0YfkIP{teb@`94iJ%y%^ffxm>Uj*Bj|~!nNAu? zocOK%1lw`EKFP1ow<4nWlV%f4LC%Lu+q0+Zak0_>J^3O!XD~oU+M(t&Zx5=mSsp4l zL(02lXJ@alm=VmCPam9+e@gQ;5bce?z`*bi3=H=0xr>lKBjUDA=!>pMU^9VdK2dmQ z;>yX%O@QU3i9ck}t*3~tL;X`ZE7y9C-4_W7ScWkk((rr+0hewC^J-UB6|j&I($W+M zbtCWn;6PS&*zl~{gIwAtYxW;gbKAw|w+P%S5{p&4uG4AVl{Mkvkz`~30q<)(u3t}; z3jzCMMj-Um#5N?Wu6~1!$c?G$LMsWguQX0Bc~<{$=22osKImfkuyG>FI#1P-J7#VfFJYg!vQy|6zt`ZswF32t|{Sh489>jM^!zcPl2@!L-{#AY7FI`E*z1-Z-G{|j5 zg-FTZu7a+Rh(wv61pL2_3o3MWVSR~a}{yIsl!ZN<7_wj~wzwdpM|^K}ohMRILsBi9y87QaV3>$OC7l+B zk*<84eDqjCd^ePyUx-(i1>kF`q50N^o^s)@-JYO>To0e)zvzO00@<}*#Vp&xCgqBC zxHLRv9q6x0B{$He(~bR8*j9A3lKP&c!AOgAvAi*bPZ+wS4+7rgdu$#S_?y8Y^kI6` ze+rcVYtR&r59@htZJ;BFw&C8o&&=uDw<;99`^JzkIwwa}!7i62=sNf2v5!j#%0D3g zVAt2jcy&og&%=n92XFT(P0tk^9WA12Nxu{u`;&kuh5I67%ij4Zz@DWkGQ zvHjQMuIPXClSDfp8UIVw0LLo?wO*%+|5j~pXWz-y{3Cc?UNZi$syOCTH!G}#^fNG! z)otAM;qfuQsF8cV4AtbtL(t=Ppb1Zm&m8n!?Npe9$WR){4WqnY6Ial7z$@B(&`oKH z!bv#}(bfO^_se-Fv1fcjf`k$+W)CF(tf%K`j!ag6jryp?0`s-M=*A`?Va=JLiTq}d z5Pu_?uz>-a(0!K;yzI0%vD^3T>@R5^Ps(TngM?M~3tbBPCLd)I`P0&uT;$Uay)(4> z=f6npVq^QYTQdD%517X}Vq_Fe41|sE;P%Nt;YyD0GhFbRp#`B43knyFvFC4U(^6v+ z;dMmky?Yz`Gcb=>O#97c&b=wWo!ddfM}LYC?5GH4-*F71n7BB_<$DN!nM}C-nte%t-1$P&>hPpmZyawRZ@>B&U_HQ6$yl z-}eu9OY!2<5mK2$4nNY{y*EDw#itw$4mw(^xuUX+MY`U*PArW{T3ZVqm)+u|${j~3 zl$Vdux4ONf>C1|w+fY)TI)QHM+MSHP18vyzNMkV>P8XZq+~1Ckj`phVb5$A*aEcQx zena_R1ttjn`A9W8mz?M5c8bXdWpurI;K9wIZv6@~oE?Y>{?iQyfsM=a>&>*rp5tHCD@g{g_;esxxg&$3ANI&5IO}44mi=qC<$w`Ecdz?PLd3BPe%EI5Dhz6&Zml@JUImN}k@jC7psX7ghbrWcfB6LgM zE~pO1BjlW%NX_kS6Y{NXeHHeSyfLKmhK6uLbsm_Ah;}ilg?h_RodO#P0({BD3toJ{ zUhqHM3&CDpd0cLqoB*v#2;?z#y+>NRa6UiM?%|+?_$}Uh86EFeHcjSBX7L=b~2ON}^d~xnANjEfEJ*_mJ#LvF-N?&k(54LVp_RoW2irtq(Mw7o+^F1Jf`-%y@wytAujikJjM zJjD6XAGhQM&Y}i3i@mpv!Z8WAgQ(ISXe(nXvgaU^CO;}qM{&WMk^1=HafxpqM^eu( zL+IMK?nz1cV_F5yT_Fxa_1CX7=I01|z!wLMk0@O32aKfAeZTq>{3Mq@Ta;L!A68A0 z3k&=94lo%m-VHfcxkRw>B^wQ3HO9!Bnj%01&pxV4^<+u?dlo=F(Q()D17`dh@ybYD zIc_*RT^!T5)6D(T6NRq|YBCbG%r1*=bd|<(`?0zJtN^($S~SICw&rSYUqLv7M!-GT zEvvb^vjg{$iz_`Wxu)VdCD8c)O5^i>&uzk5R%VYIkLH4~QsGXLXcFFs(M z(6ZWlkF-5r)w%74EjSMh!S){q9BWBydhppNHpQpq%yy6GaM@l?RhpSwe>*X&n-FN&(f`gzT7&r&TRiXfv#$AuLPN$5={Ej zwRHS(^ZV|TmJ70+X4=9+1ax!}#^d^O)h|CEwM5-=$P;s7D5ku%ys%Pi_#Fe3+_Gl_ z8t)p5{=L^D;_)gyz=?0&&!^9GBQs50c1`M5)o!SPT~7CeP5Yr3<5>wG@_8RDwm)OD zl?8bY5i9BLnF;JL=;A*q^5`41Jv<+pp|3-dmq+dDqTs*1Ku(aM1YjXD@*Bw2&CPDe zw_dG8g~;Q9NlA5v)u{iAxBKz6IuPaaHOEJh%X3Uzf360?b{u)*m^5WPwGPx1xh!9O ze3<mbtpO)pgK7=H@=_})Em3feC%np^4;X5CZl?-2$N|%+ z2M%upg@sW9p`lU~EB7Zx`3vKY8rM+YmG%s8{nCPEGl#!&)Rtj~AX+cJr)@4j2j zJEP{y?Q>cJYbZn^AmG3JbcYQX1%sM4t@3)`TPNIa?_3)Vg&r?I({S%&mxd#Pxzh*1 zbJF(BDYIe*U)caBK|)Ch4my#i$C{+pd?yk=@ampkUrY=S-tUKi2m1Kj9Q^mPbS9+4 z;+|rRwl(~+yv)obzd$YYf%^Xrkp3c^=rk!)Q|5_0S>vT9f+eqO*kYA?u2ovvB8oox z9C{iv_%r$)3x57q!^=JN$brql zM%ug)_Th^IIRG9Anmf7lKRk^Ud?@vJ4Yb)?J?_XE1pJMpZOAWVQWUrZ1xvkZ_(_&Z ziwXIz^jV>*c^V}ug+&&xeuW2r%1}_y)ZFHF*-;?kOb#0zeap|0L{#Rsw}|t|V$#LgK6bqRfK6;N@a$aIL^`MK?8@kIMR;Im$(c;C zKbpw@Eu$Tzq@*%7JB!lx!>_(MrKUvFg_X;Maw1oDYht8Df9+>E5Y#otIBHFc8Eoi5 zzP_ZPVq`=_L~yTPvyyW5n>7my!)RBew;%cadnf0mk^H_Dd9eVW;LTAhdg9>G<3fc! zO`ya1n7NZ-7-6Sw+HEVqxivIc`}%^KZpQIvElYjg-q;g&GJO%kj8*#5)U&O??!>PgrxcY8IL*k#SOLcmLe8NV9V{*Q>T@9n9;vIu2-xxuzhI1$6&p84c?Ys~=R zt-WjO19TYtQdFHq^az_}lSDgeWrrP)lZH)!Tea~ z(AgdbVvWTVZbdOsQ4TV(FO@sp`zL2-q|ZAnNy(bosv88uhg6H`p$^0Zk27?mW=TXw zsP@GC#M}574Uq$eUTJT(S@|rOOzK<)4~TQ5`sm7B}e@g}(|$p6DPHm3M4lmK@mqw|(LTPjNTJs z{*Ynp)MGx`v#^0>He0)SwW+M5BWZ6>3gFjlivhZEC2pdOn4uvz2S>Hi%%py~8Uivh z1uw7f-kwG`gc$jghzObc<0yRV%+!4CP!Iw-6+8RSfB-R8wuwNcfR@pHC(zBEdzKQd z2R966>{Vc)Sc&mK+#x#$7hfp_CFSVsWN2fAIev@aQcO%*cJ>81XhxR{qN&1YWNDma zNHveicOWkWj>zmRPu!K;EU+H*s>Gz-i~uv+Utnh;(7qOmGS(<_}avs2<35bC{fZ;=7Ya~ z4?&1L6FeH#JQF?9bAll+eoEQSkV-IIE!{AU_}B>>Sx`Kb6C@T4UN~w&QYUXgxBJcw-}obsQZ1>SiR*;*FBA&r5yKpZC$+K<4IqJA316hlgD@ z-j5D)sfF1kC1E42YVtrhG&&y|++FnXKW-e;aI5@8;bCD>balAsZm-4& z$bAJ3jR2KAgER8RPP;qD)cFD5r%STeG%q|BxX>nM?BB3on=|N7F=uDATU&z%m4g5f z|Ixpdb@4$MQV{9GSLRw1U0wESM^xGP1G^l3C(*6~!r;>lA7XSZnLze*hyW z-q2Yk?t>ennC_AM3z`K#0{NFC=#o->ziD|s>f;md%O-#x;7uumGf-!TtHux)vS-M* zBwg6_c#0NGq(I5QQ1Y-mU;|dx?qr~guh56SuQ|5$Du4G$TN@T4aE0RLCVqN#O3J{2 zTOudF6bU1q<(!ndwp|v_ZAbg-6XvXw$0qhi;mx=7xqeZmZ|B6H#T+(K$R8XUp$J( zy&i|A9=0c{tDGDoI2fY=27Li9kj&|`uHu$6C-(Upi%N3XE;L%*_0e+@@1B0-E=+Sl z2a9_K@*xHAkNC@)&N!*$)AT3v;iA5ZrO%8V?~U8vL_8KHHC_|7nd048%wI3v|N8zN z1jz1sH}Tt*&PwFO?AdMwC`2hc6!96LXfLiND{ItP76!C>{xa~jgv~#h%7BGz(|j%n zY@eZVt^>f_LPzlUK0@yefNKHZb9-!}`>pZ2OHceh4x_Tr@LbWFOsGTODaLFuXU~Do zb*6X$t2B+YR8q##!0^V4tT^m}ax-n#ljSTjsV2L;hVrbsEPH2=ijEvjL-O(-!;D0} zPJBOd!LAq#iVpk?cbXGcTZ^qxGJHB`@$|s6P;UOZCpi54CjHqtG6zG85rB}Vhnr3y zh*!si1Vzovpj}v!wzziBOg_SZ~J(fN+h6PZqK*CIyylz))1H1 zv$fa1qxoLLY`dDXzjRb*XHD0{{sMVMNnrUyQ_(G-TU})s5WU(e;Xjp9!d6uqyi1iB zx1C+$m+c4obEq+qz~DpTUYfG}hI};6Pe2in%87b@xE=`E)AO^fSQNIhB6RmlV=(|L z(8jW}bDj@z@!i~!HuGX@>Eurlz2suC3LVuzzZ)=Bz*r;UJAe{B#!g+H5sx(7eL|-# zq)Hp_(AYR&lPJXWCRWP7&omPN{9~ZIc+^EM)M`8Hxu*Ry^iQ|` z#RXn9d}-qUWBwe8y(7YWd{YqA*(s99Z^r-hT2u)majByJeD9OW%$}rql%(}+Q!XR z6CM9uFDxLE&Hx%5=Bnvc^$ z1gHB9SzlZ!P#`*SMm0p6bb$iGcjk6iARZJH4s*a~`Eca|K~*Da-}KY$6wS;eRXbnW ze|WsyUQ9d`Ws;@^tO#;I4{Ty87cc+u7r4*1@&EeuYv&!R=Is2)6%TkEVPU|(YS&gg zZ{{3Wg0szd-D|hUtsJh3t>&3TR_G-Yna&=Hv+j{GceufD1ns%P+(@itk#9zOV%Xz) zGsN{VY)XVaRcxefPjv77(-sL=ih$K{=fX1@bySUJN~P@ zW!`XsW`1>GWk3Y{2|bk~t>a|RJ3A8A8uqZOt0V+{Oe<3}OI>=*67OeIjc7g~#nJ2Z zhT?ubJwMMX&xlNIygKWeLsg?67h;%u4E~d>`*_v^640sb+q8dqXR-Rk$@O_wMn+`W zv<*ObtfB@YgDWd^ymoQt_~w}E7FR9RhOu>vO*($s!M%BNHC~zC%5=Tp^~P+sfPIn+ zu&w1(O6!6e8sNR%o;w+*7&tkTHo2HcDcsoY{Zk4H@dN=&1&xT8wBQJA6gQp=1~m22 zf3}X6$y(i1H{TMx?+wD(JP@+&rC6*F`3~|)b6>j#O2Nrlkxe;J!BpsV2VJmqM1cAW z;lCrNj1KybPTQ~!6AkYJ=kongiI&q8JuPja+ggDMV0SpuqytpB)Y&C~Ze7hKzi0lq z^xN=3;@@tu3@#kkyIpJmgcv197DMrHdq$4#UuBTAZ){?pbSD?>?v29?--j0&9NVtg z*C^XUtN@m9;@7Ae8Sz#eT7YzQD6~*SK*QR&kBj||sDC-lt*NIzec>KdVs#j`S-)4P z?s?o;TXDMo`~r-GeAk&{roDLa?MKb`Zu=$I6f@Ev>)vjxu72y?6vz@IB|X@t?WcQ{ zpP#P}_!b+)0@Egxhlhlg3stee?!xGCCIb!z-OC^Y7gww59e#@ggVo_-uaP~aEP=e7 zT#ez6MfK_HR2sU0{#XB}-Ms6Jf?mf_fQZN(2`K~-5pAoK z&&Fta(2Oyb;|aUucqh# ziVe8V5yiza4UW9Q!P2MbCohwYeNOR!U&a}mNlfFU+i(G2+TzYBJH$||-S}exkP76X zpGvNFVx7AO3p`C$IYRNd#FEAtzn2a(0=ji{-{T2nZDqAS)H<&l!r1D@0+`11YPK9e z5X9@aAxHNS@g>G3eUXrO>vrp3tl63veuZXlY*KkZKv<`502+vA*eWbCwkvG%rAm89 z&hpag%M&7OC|XQi-Q$KoS)5nZXU4nPO4goCD-|zMgdM+F7)o_n5H?Z81|Ay-!sxc! z=P^jFY*{ZH+^$S-Uy-Vsa;Sehf71s!BBh~um#P8*+G<8zOnb4d9}*WYL-)t`W`XU| zwOMjsVh1~Mvy!9*1Q>4(CrSd2K}ku;3wxHZjgY==8=b3MIs04yEj5@k5F{j){!e2+5+a(u*wu$B4F(`(-~e7J9~Pjh^F*hrE07G>>;)Up z%S-$kP((`1ud*`qWWX*xStSeK*U{0@s?=iygj)~MGxF3_HQ>BNrZ3%ly`V?UVI#%N zGA_8kkh)A?#(nQ~?R|amqdlxe>%uR&JcHYcE;NDr+j~~K4mC?oe2Bm)3UGp#4#L*P zpl!YNf3Ucs_8CQuZI@oKqWWk|F%m>D>yyAV;}te#E?S_Zb1`z`UG0sbTe3X4UF{bb z{>SZ{v$Vk+tMz&D5un^n*H7XCSW_jst%(r9CnmgVJYrp9U}ERIt?Y<`jch2g8xud- z#R;viFN=n*Xr(VN-C?k;AxYk+E^9Go1&Dz^-#U$#o>wq>%E4xktLV&a>uIamL2+Uq zDR+0I*2haGV6_u#m%NFqH4dwe>u4Q(uGXzfzi0}?|CkfAv}8v_B>SUL!haS>Wol`; z^+R{@g+vRSxlmTp8I*wwHU^U{K=dk_5Uw6V!qj4&M-uz`6ae%t)O8DZAIu*qef?nT zoi@QOYK>|6y%GS_$=FIThsh2xR(C1<6C#LLGol4Fk~g<^Q7I{3EHv>!o14mh;GKf6 zo1m(y8kpZ>u4qy`kFl6QBS(g^b6_B(cFD7R#_Qecw-{BW2GM|>d<4Mhuda3i+(Q`K z|LG0se0$}mt|FS#ti1?5a&ig-Ihq}!M_UVs|Ad0o2Xo|Elxvhsrj!!K2sfI@1j3jYIXOC3)I$==OG``J-}n8H zF@EN;SuYOdL@bPq%5ANnr9F_V;+tPI-uIZU<|FTN9xgs;OKz=gAzz#dOj~_mZA30` zTCVZJ&Xb>E^_nvTgQF6-NkW1!DqN3lXSi+uxOi^ZlCYB}{Ql$vA|>rOJ-y1znpm;{ zS_1#URO0+28wQ~3i9!L;wlds{kvxDlE?YU_qQH;K-*+0aU-NDkrg?;O)DZdHuX%cH zgV*hAr;w{rLv9ab>}&mw&;p7cdyQDPgWzDeepji{%cLauYaF&WIO(3R_z8&=C@Fk( zm}AkT##~SIqQ{ha&6-%e+Kc)*D*@QM%lAvP zJ)ugDN^q?8D1Pm2MP<#XZP`KCHO+ImvYLQ<19Y7P{~p#dt?!Hp^S(JTaLsq5&SBEs z;od4tV`CeqVPGKR;dxiB5eBu)&B?(mQK70ou5(C9PiBkWNuWkThVWo9H73(dspL!+ zwPYZE&)<;%raqbR!a}8FY}MsdyO-{XJtsFGn5Y6ewqAF>G5QZ$T|5)8m_#!a-eTgS z&0_bKG+6O~=4+3)l-yo}02vPW@a%XT*Ux8ElhYgu>grw7HqH&&C&&y%zzS+NUa@%H z8Kwc0^8*2$a6n2z0yRIs(!%{j-~wflN|DiH0E3p#B0rJa&1bnZYBE5te}DFDs1ct_ zO5$y3sB-o2I8D}lP67N0GYgCLbEau&DY6`TU_gOyzF9~^L!)nK$Yh}o#ryf>5wIqr z$Ha^CKTO*!3=XPKNXSaA+4y*c0u}qEK8-MK0L{0$Y!jD{qC()kmjB0Dx%WG{*lo}; z%acp~vX&3Wj&~{QR1v?}^y{YX9njF;tjE?_XOb7_-;h)vOyfwksW}85Uc`&;`>9c4 z{4~YGUS0LazAH-Sw$9vuN-hTQO0A7Yl7<;-={Li;R2x%9{Pz^h)xmdX{^17T%Y`AQ znISTOpaMV*nyvN_!V2liX;~k(^X~Xr9*Q6TV@e;FJ#byBTAAjuu(&w4DKo0>UHQXZ zdpaOsyB1OsVGHkV@jMn|Au%Q?MJWD5$N}hQ!EAOt6g~2Km2(7@W{V-qpskUCRlu4c zth8> zV1tqx=I;RwE1TSE*Vy#@up{Jox6H(X8y1QiE%nub&8D?6cSfHd=wJb+v>d>LgRHEM>@H@B%!7C9TC)h^L>ieEBVWJfUOgG9N+?@+8(A zW~6+KOiSxE>)vkaOu7oTg&5i_907*A|i#gp$F1to!xtIRIchQ zC(%=@8&@YCcw~tz9|j+mckU$MXLato>QC>mwy}yyPf>ROG?s;nrg(JdM$&}>`BX|) z)W+Y)kHK?~5B&`aAz0N8|3svSy{90-&|q>5pt6mnA1+v8+>4=}955 zU=gF`Q-5a;lOPftEQyIT=lS+fPE*tS#04U3q(8?P4y_2Wm9g4m zU;OL;vM!`oEfEM_mP*RsH%94RJfokkFre8F_2dq}h;W&cImP4=^5>Qgoh6z9$B{C0 z#1LB1^)mnt@T<*CEw_?woKFCi=hwjP%xz0CqYaFqXl*Xm>I&z8zp*B9D=YT#9T#Ub z>D$&|U?ITc{||@4+E_37`+s{EzUQPVmj51UX&XYB_lK;mW@P73PArViZExoQPfGmb zYxu7A|4HqWlOgC@bSYW8+uzc*RQ$K+0TYjNmUsZTxleZ&#A9Yo3tl55fekmG+m}Zm z{>$oZH^nl(zP_e57AE)Mi!YWP(DbQ|Js*q$Z{={FkF_(WsSzJ@KMxlggc6Un@AGB4 zZ6=%6#!~$=VE5@G4o;^trN{QLz(T_w5ipluAHop)6%2k~4|v=-C}K#+Awawv1H8B1 ze3-sA?tE<;XHW%Kx|`$erp$X z+3oA5ySYXj6nHVpzo)b5-un{AyPJJU*E(-jR@(W~yq+)tXh`lPtSz(6rlY3c+cf2) zLtcTJ5K-~-$MmG%;P-?RZs$@gf9qTG2gV`8YM0buzYAyR0f&PLESN$rxPQX>#f;qbT2Cpc5SiOAgTH=;^9K$)dVqL&%wzm_Y)&;gyVzozn0ZLKmJa1 z_A_m(L_|+Y{f-ZEZjS1hSmM9fr4|?bcRc(cK}SVh1s*CVr=yEzObyRm13$n%;Bs+x zMw77`DLC98lHE*q_Yd zY0o_wxrIIxe+sm%E{u8h!{K3VZR9Yob+uO|&ZE_p>5~=YwHx0Y9|RkEn1Re9wOtDr zu$M^xD->;WGMkLsm0jJED@a?*VStJDeufoiOR`FQlsuM)?1cat10|v_RhSpEceq774iCF)^X_s}?qN3VZW6 zCfyGY>bL%(z#2%z-IMSkLuSL<9i^+=JanuQd%Z-2RrBz`JU}&l+A*!>D8@r3;sml} zL^HfhlPKm?ps`fz$^%H^KOn$}VMx0UbZ;4@l+~`vlu) zg#DeymGAKIeDlQo%;OYpz|?}Uo*qd4aq~5B z>sk#nm`~+QQAQa1h^L&9s)wChYoB_mH%=H0Sx=_aW*^(yb*{q^CckQJcLn(BRfyM| zp4Z2Ey3CoODc_tOcYZtIUKKL6|CcGNMMK>#2y5P1O(T<7e0*((kO zE>88lyp$R!uDtyC?30#AA1#R>d?kYw_er7+R6$iSK$?t4O{%e9+p6NRe7M-?qG{n{ zZJNO-6g#)Xs1P!Moxik%rSuj?_&fP0HJG1&DBuDP5EBpjWa9b;k0B&k?*>zTy!x;V z+3$B^anNxX-gXd7*K%{%Vl#2}wz6`IkBh^qoyWk$eD(d*GjhJnDKJO7^9S;eK|+nr zBVR$)!O7(2o8_#Gc!A~Fy}FD+lODf%g{<|X435xXGkpzD!ily0n=*O$$TXi#FC>zm zZ-o(@nu3F+enB8)6h;Jz>QDe`KZuf$l7^n0N%i;B?H(MEv$Mnbk)%{qNFCJ~aD%3% zC^a+}HNj&`M|B#dm*bh#NY3|{>gl8IW(9#d$8`;aXJP0hSn)Bizt1m$iHqo@IgRa_ zy1J(Xb91|yB$t_t^UE&F|5M#rheh3P{eBEYLB&8iB&3lB$x)D!lJ0Jh?i>|GX#wdH zkS^&CsUZjH7(%)m28KNMJp0-2yU%;}e)c)n^`3KG$G?8c%s9We*Zp0e^<9f%9h=gk z@$n$frrn?I3ZWmA##iPiqfIO_(|+zA^Fo=cO|L9OV?lNzcORX(dfsBZM)JLbK=dZn zhbjp@OQaj8!@%_i{u*UEzWPn-%AVvx~~c?qquq$$ z#t_JFJg%S>!lAC+0Z2Tue3GYiHzg3aye4757ITX}YX_*Ss`7NlG!APg&A0mcq|V3)VqTWn0oF5D9aO3$kAR>cAi@F!nNJ@xS~AdZlQ=p$ zCW)hZh?Rk^ab5B|L7AfREg|>>$;!GWc4JimpLbrL9BxZW)<%K+m%~HJv9f2)P2^k2 z8Z|ggVt(oXRE6u)cv%yZEvH(2$n>;UU46Y`)!x8Y(#7%KYYc^h9OfE#B5w>2PNb&h zPsj8*o(gb*^L#Lv6~bRR`t4}zduCfgId(I8IrT@vnB(70%rAo5Gk;P6b7R4yhf^lPp&B$5JF+Vd=$-0`HTcdG1O}X%CknIp|2pn=lf- zG~sp<@^!qA4z<17wSISH9{QXH zSMf8O{Po?ofNu#?!P`m7k1i5(cg#(kBk{+?j^U=GcMU1|gq2J*YFteDyW z-=^tm#K&L~ovy93(*=^C50O$|`cV220E-%y)i2u*8+YCisE_J zActe?Ccychd8|jSWUtlI-wv^uO@RupdXxP|N^dD+Ul~*V^=am8`?2f!5<|He8^Ogn z%C>0OYJ<#kj{Q@R*Y3&*l@NPPAQZvmXbZKc`XV`s=di9}#R<_P5p0ZAX|FS+JGr63 zSadWSRA(@aqG*La2MEslq1VJCU&>s&P3ixRJXMEevXz-xV zwY^BaP#ftbbJPt2`VilX(ofY#$^2!N<)yHhD-hqZ6$%Qi6Pjgf7~1X<&kq}J$Wb{r zKi@k?IpsBeLCC7rr0XV^nOPAEh5|TjSWujw!hf%?&#N`o$i_L+e{u>n?q0;Q7(Q6_dxMNaGsvrAIy3bGH_nb;WP2eY6D+o zm2L6B`|HDY-}$If!#w`z_}fiRP9bMP8Hs9n{sl3G@t0Pw7#kbcyDo{(VCIhuSAXqT z+TIlB5jgW6I+ql;@w#xbVho$s3`HQ@f2KXB_N?6RnIZ&t#F06@osB-K#C=FSm&C{2 zPXBt`$I5V`{`V=By#?o(H9XgD@4KU@h<_5I`&K_i3bCcBV4g5uK;E0oLk@+KqtWIb zyNitj{r&GG(y6HDy3{u+Y>=HbFyB2@vAVjOch|a;80Xm&dWA|iu(Y(1JAZV?` zUf2oR1tmnq{8ne6Hxhc=x%%qgcB*vGH=aJCk#J^Px)XjMLMGz;m94@zpw|r0RosNl zwZMp8{Wr$@gveqImdVCrk&`wF!ODz`%>|gq1%6Ty+m-sO&s+pj;IJd+2wR1XiZE77 z!J9a1Eq-_`MuSV-)HKu@j+Z{yu>=G8)qS>W9DNIzNW9%SSffEG0^7e>f@({UW$_dz zma1p_kzK33HmMt4fnnzglb%#io0*%+6c!=5xU0Man`59ChnvQohbMeG!^J};;DT~( zyFQdF(28yl#CNJCfThR}z?{CGN~-UUPW5X{)Xs$OrF?33caaRz`l*e4AJ;8W=Gxh-TOH)pPUB9iRvzO%gDSsd$KmVGuvoi#Y7-C?~t1}Wg zHIT1_gX+TA7z4iei=xrerKCn(RQ%45*YEvKA+L3m0y`HpT0EaEkC}>tR!T#E`a7nb z5h_cF1aV*AE33679lgS{t}~F_TrzbE?Viv}?-gfXgiV0hryuV9!XG|3v&CP%TEix{ zQl?exszH9f`NOE*IT=7^)x-Mis)dr}qL}wqlJ}?BI+|n0Bu>@@t7tNbl}q^`=$Jy2 z8HQa$R))azs;NtNgelVaq)cxlb2IDI)oZdJJ#30r@J*I%{8kiwXY<)$%*;8nyo=Y3 z>zX?QV~n}O4$}*n=#3C4Zv-xpUT6y-L#+T*jk4(V#GTU7e4S7c_Ixn`Q{puA;df=WBK%6tz; z8izg64V;`uii(QTdmoB#6LZG)r%P~p4oO|?oiN>9)@krZKB_(MQg8C}^E*C*`v#^d zc1(D^eMm2V|MzyE7#^!cUiN7rWyqfD?x^yN-S_veB!3D#9&7&fnE;iz*|HhltCQW| zZ>h{aa$G&ua;RvBnuCSuU2I7iuP?f#zbTzQ-qu!7C#pMy$ zqR?u|j-|_N8|7ea2)#Tpxwne;Da`4`?EO1J=B7&Op8nA_fqB#5tKZt)b@={0n1+j) zG;cUCjp60X4~lxei+STSDmqGc-y%k14i03ktQu>OqB=S-S)TIaRq{67?^IVQDJdtA zNMfSV%MzbMuqD2k^*|6LV;gKG60`Ui+;53|P?Xzo!Ry~)j~^nctxalI0q?qU*I*uji2`B5k6UCi za!VT-i;Q4T5aECD%5+}cdOh-XuL^fn<;X>{s`Tj4QsMr?sjR3gKM4>CXR(~wci)2j zq-Rr0HSX!sT{!Hf?f!a`w)M8_@YHTy)MN?7vHL3IxbqmtSFrxW*DH7aCW2;OAgBe| z%P8y9=PV1Kt}$K~3{-5J%zeK_dz-k&x`-t!KR>af!+vj1zIdtrFc+n$YhyIi;73az zwzx=`vbeCy#bEI{Ft8O42W9?~Td!VqF)%%);-!m+Ud)c-KZ@eC>1ypp9nOCuczFN5 zykYA7(zfO?j?m`Hw6wL{lOvlznJ!>LZWtf znbBgiXBBqLu-Mh($8*bYKYpjxKz9YW(qQsm>gKA{<*Xdl?i0V+GZSyDZDH^Dhl>}% z0XA0+R5PzUSIrFSUyw|13wjP^&WlN&m5qY(^y=l06W(f~G$ZI|%Z{4QxWZj&U2Xk& z9;!=dV;ykFUF-8#E=KugzZTu~MQg!eijtQY8G zgO>JQZf+h9$CnQRZutSFWeP_KBY?ZSz8k$yNc8LnbXV-gE;_TU8|Ad8BEnmnwD7gK zCqeL`eiEK`rLXh^>L71xJ_=ZV_Gj!G#4I)WWhgaTBieFI%r+ZyeR5W7={0R|x+_uX zRcexZZ*NeLDR(ZsI7DoMXueId5Pid-lo^qiZJyVtb$Zsc@(TQ1dha>>6Gh84q=$X* ze6?>3jvWtNDbmArAEIVTlFj4AW%*jz*oc&)(i;5?UXPs3SDoDtG9~x)xb)zBkU?h`rf1xT4{%I%3^k7kjEtcp=rUPd+h(gk1@4L`Eo zX6EEf1b^KR8!oCn*P`-WiMZXnAmVbbU@ z9A3E-_3>p`toU>L9@WiXCb_BERYPcHHNCA_I$Wv-X+?v!vwhj%(=}4YdVIkk3B84w zNSP-z#GN|J!Qi_^s+Z<`FjEfL59k`y{LmiqbM%roUark{+kUeqj6cz7BM=!ay#EY{ zw_;0Wl5~lX#|+tc@O}9`RPGTdqZmKG)QG-Zl{TMN*oL2!=UwUO>@;0mx4Qyn7)aOK zQMgU>CpR@cmmQ`lS-)Z>=6!V+O^>RKFt><9ZB~~kp0V=)kEz8 z*NEig*~!r^b@h7$1kHhz$y@{`Ue6Cejdw>+k^sK7?c>QpJ0y--+UFT7{ zbGU2hKEVD|O+7NkjfAa$(0s4Yh;z7VL)mmu+zgTU3b;_lvF{EJ4fX7NPsM2TC{;rA zP7CbHPE*83mRM*4LY@>0Yx#3XfZ4&yeMH^~1oUaqJZ?$}?m3T@2w-hTOF5DmSg?eD)qLXyBv0-QN_BwjdB$jBw=h7dMIJjv;a=MO2_!zn2#vCOsu z?m2dlZe$!PQElaJU%S!D2_l)RU|FR$aWePUkqX{774i|bl&>V(MS2Lj?~uB^m5z)6 zW}ms=sWjhd!{)cf2LUJP31U0{n7KsIbSR`E2ow5gtKPwk_pPKq2CGHR-Eu?21_yTL z{P;Aa{vX%KC=_2jb!H6f_%L8$Bl*y|BCg2l)8%0TY#rr3RTDN|V`E;q%nK_H0lFD( zRBV4ace+AIdgpCUW;CA2@fWbmDMi5h`=@_mQ+khWmj8U=Q_W>c1}yUC(e=-*tx0_a z$nEOUIBrusz~wQ$Qr9(MimXV=KV7ERcbVwM<(Zh67)$Z_Y^h8a!D+~Hd0Ny*%!k;0 zQ578Q16eur1m!{hUNd8K;AI#sQ209;C3TDZ!}w3*)w0ZA%vLvK3utu+jZLO znA`B(=GnT`O&Pe#ZV?StzE-@#L{7umYS2YQc(6&KvBXWPhE7eF0~|^zJF%--6%FFG z8N0b|7M#Psj4GZ{^6~QMN9W{nUioF@iq9gDl(KdX2YxmN9MAe>p6#>ElFeDeRM0Bu z?Dh0d-K~1K>$*MN4f4M@c0(3YD6)A)v88zYjft7*HG|K$CG&=rG&RLFlRb_8fN z*7xl$YQCd9=58w&tpW`}vhy-mjbc5k+OV*rJCsLHcX$0D*+oTe1Y_TPDQe=O$wuXj z`3P?N>}gC%(5I1j{_YQBxb0K5(VeprdiUemw!~)+flB~c zPYe#8Ra3lL^Y$+6=wRt!mD;+g;VzVfXSy0<}QD7jGdHWyNVR~_r zpN2T@G){;yE<=WJ_sC7eK2KPK`8mmB0~2O;tIif!81gwUg6)609upUA_jsrmUFqLmEli@L2ZPkqR}}jY+58&Blmm z0xrq0XVFc7p2ea^etO1yivfb+h8FG@{KO~lO* z>T_DlCb;t*)G?#kU{RBk509}|q-8(D-&j65I+m`S$ZzjdMvrZ{Mku#)+HJ{tyNJA* zI(cG=984lsI6RjKJ00~o*uBAGh8s!$gv)v5UggBR)dfcB&5fkfhEOo;YcIY}V>t6@ z8*FWVS-gRcVPN5&-k=NEz8?n#%CHqL?DUBH^a_X-Qb5&!4GUZb@RC>WXjLN7;PDVW zvzHE^x&|6jmwMvffu_^C;?I@p1=Ztn4`ygMVc-RyDW7USr3MG!fBj^sBl)+Zds|@#r(N`$^7BvG1l8T`;9jX#(b*pSP((MkFN{HkG9l4~b~Dd=gtV8y~xW zJDusMoKsK$FJ9~RRcoke-t-Zd@Gk}Ly(m6$snlowaJ*I3-`FkfZ|0aH@L6qtGi5nC z*aRa!q$K2)v@FlAitUUXrg6JrqiaRJrIvhIxnFwoW@vRTdAuwS2(SXMuC8w3NfVIN z2c|UzqL<@{)VIA6GR3793*x@~)he8rcn&^qm{?)YiZ|-l8zu)4MGt|XUsW8(Co>YI zIb)mTg}O%^fa&QvApeTR-d>^X-UBGL=XCX4qtTTI!p~ow?|se+0Zz}HquzOhAp9W_ z(dV3r)@dK?u`&bXW#!bD35R^PaiRR_Y@ZrhV zPIq<5Wj41<=q>OCvzQNs_(nfh>*6-ihb1>zDZ4v`z4(V)GvF^ylDzKujRD91KG)6& zTG`W^9)-9&EEv!0^1$chfKKp~9W;Qc_W-mi`uep#HBHNS<>lk>`?I&n#!~#-B@jtm zX~S0io*s%|7nOC!2cV05l{irl_U`?eXfPp}_@!}}NOOLia!!#QXul8p_N^h9l8*#d zpFBBFp9oLjvmPvWrKqO&0z~6Ghat4Ix6S&J>&x_*T6GV7n%-(igi1BrenPo2w?z7b zgvGyiM0#LTtg~b6)3{N!;rv~8OC?UKpD)OSoIiYBaF`VK5+Do=^w`a4{53RrwJQ0O z)p8h#6{0kLKi$kWoBvZ`Xa>q{{2|y!>-Hc_q&}MLlOJN*Lx(eW;0o9!Gh^ESaO(g{ z3rzcEw{i8%FO9M1?8{x5NNvG2rV{#K$`glOg`Qt!V`KBb zBlis$=MocjdI)f~E|>%IX8hy{8vPLnQ{VlRuq!(oC4^EkNNMq5@%9jMajs2a50=(ElwK&iG@! ztY*)rgx*+NJMqJY99@!!FZ8P7Np(F1KKYzZF70VxGB#L@aGQ+rvR_J~nVAu*{sNXm zZR$rC>}HR>E$Z7o6Jt&lVSmO>&RkAVv=}-J)0@4vY9sY~Xr$7jl3VZau3RRGRu94Q ze0$OP?3R+M>D6y-8w?uG0?^^pu#gZ@Mf?L#Ve4r$(iGX!BFf10bQPt8`k81OR-l%* zHorMt9|M#=r(v@OuW!Z2{&aui((#U-t{YAFh(p!#GD=DUOE}?=&S)eZi;?85$iObj)eGak&3$_)M{T*h39Tq3$$a zGOssFrq`>Xqo9`1ye50?Q=sR4N;9+7SSB@7;$wFD;IW2+KX-%9Y=nD6X$NZkQE>T` zw;UGlK^!xmj&ynO0am_Btrm)l+KBoP?Y{W*Zcep}e-7syPL zp2^wR=!O+3<6}Vt)h~u}mj+OFpdrLRIqbZk=p-7W&Vh6R%_OuQ%^LoIE3PQgV3aM) z!{-rrRP=Uv3Qwk$FC;>|Vz|)ezA$O4d!GJX(@a zSpJn%Ts4hRxl4KfMCAf87~#M?&xoL|Ej+l*>hg#J+B9ro=cZ#g=xM2l-&VZ=`Bv5H zQ27AVNLE+lhK5E2D%;u);VQ~OS(}KCx!F%!%hOm+kdBVA^+`1~J!x}u`qWe^qT91y zqf{N?3V<+f@?ZaP?ZZ{0z;3)TYKd5@A(@>SO}&e9TK;ARLEefg4T zaWN_vnJ-ZUfdN#EA;u8Ui18CR1UQ5k`!&xz(zf}GC$G3}jPHK$*z-sp2Xn6#-X&yyz^8`GsRfX$qu$Z7dKSlAkm zS?Co~Uc3-fQK1zPnLb+^RnSpBEdmCsgeOOAV4whQ(0KD~i@q*DcS7%X8yRx|zJ>M= zD4)R)QbwXe5K>-S*QKXA40OD7UBz^~zW0pbRWLTk^1npN$b8T%uO<8P<+G0;C3*;g zO7$N-Qbh#QpZIdM*JGYGP;cP!P~wz{c{;bf?fv_oUKEU#!`2jG=Y4?M4uQ>BH~L81 z+utkZP()CQnS4i{0-|No>$RSppGyba3#I0A^P%A_Nl5j^{VU*0XQt)^gwsSwf$F8T zHnX7XR`n8)`!Yds>5^nJ24Jg~^^!cQ{004IGFTWq+DUPDNcn&xKwY)ditS)W zrr60Us;a$vi=lXN=>G;8oSnKPlxGgRt@J|y?>8eleux* zI3fB5H>Yd#zAkM&hK*u#s}MW@Mu;mvIn>L=fVKdZz-0wZkLpheEFX^P#NR#I@8jYq zoJJ?c7@7BX1f4lL=KOt=sB~Vc?f74v5v0ADMwM<=u2XO_1ix#WO?R?K;!nT%u`s|g!Dx8RA*FF~nSc{WowhFf>u(28evrsZEh|gx6PXc3gwN-4 zS+zI+gp}Zk3?+G0x#ecJ|BRLRMMY$Zfi^N z;)M?})Rz0gQ@9$g@GOkaZkom0$D1gbJ2+@~cwsTAU1q&4?yMBR(Wj>>njYbCTv-nu zJa~zE*TT)i!zEgSD@M#6s?m=@kg==N5=hCPxxxrN#Qu=9q7afFT=l6FhuC68wJdO;~(zeDPl>V(wuR9YsOdQ1Iw?>CO+=u`!e0 z0OE8w+E~?LU|MR6xe!%$3R-KZjo(i;s$^b>!hSuz(*1sx4uC;`zM;MXU(Va7|6{!k z#BEcXhZiv5oVP!1f-=N6_u#Gm;I1xDFQoQrKIUMNP1-&yj%CHyW1_3U2?Y_ccz2Ku zjDIm2`*od!gru{x6WH{=BzV0Cu-FU{`%2rIv|GInQwTGEhfg*XBMG~B$U=Y{1}zT0 zh8^}LDyny5d@vx0v?;nkFj4(}EzU?;8F}6A67-oTOW2fGNp_K4lUZr?hBDZoL6UH= zO&C<&){`@co^=GP7^uj& zwnaFujx=j>DECk%p_fn0EXz-vivaCW@HtTSI7R6~z-cKoe|fR6oU#Z$AUv)*WfSS9 z=(0-gwUZyZM;0bDo`q9Lv=yi1;-WPq)`YVRv8* z{hK~3E>5GH(Bs9Vt%c##JNx9i+M8l2yr3ffNA?d=j2_km^iyEb@GRPScfd<)Z?&#n zrcF}h`3XErtY$&`4)YoBH`jA>Wg%s;OE{h z>ZxmO5-0?mISUMTlM4+mwWf+`)U8keGFvN5dGGi3ncJmrKg8s3ST+=crBN^BBsMin z%qh*>2VO_BNSkieX)Y`ieraIt_E05%9kqMi4bCj$dk=DWcvz;x^kZt(E5aA;)Y@K``x5%R-nG4Dsb46?CzxPt%L zv(GTtMiv9f$NYNiXAX1tS1vAu4t^Pxb&YP1jX9kTnd^j!ODu^nF)>Levqw{j-QWV& z2Z=C}PJKQ7w%Nq=T&~f<%8*eIERNm1xExI$%)>*dPMcC~k;4cvg&q%ejB4flSE{vo zk!yzMKV&OkDXaYEW@uif78`3|nzQ!P*E(}m+5vxeCs-#T=jKj~b934TUCm8)cf^jI zw%_wZOiVUhY?+yhzJs(6%*(j#Zx1R>)$^gV5Eb{87O|1OE@Mja?gz z|JAI(*7(HuUsxpca+uOxS^fpEy{aWAQINb8HnF=_^3RxUbOI;dFjLO{Vw~Kyzd0vsA;VM5gsLdI;`PUgWND{d?((;QzaH<@sm+1ife(WJ6>? zB;6Nw6ZWF|5@2O^1Q@2XTX3-s8ITt|QG5)zbHL}DSwDV#9yfA8)u*L(jZ^Rtmt*8S ze0?1tnuze~6kxH{tZ}{%gDv}ki}MkvtKIWi9q8{rNiONv|89~i!~ai7?!T6>O6ux; zutxOV4AVu7!9|83D6m{!pH_!g_@?P#sW)1B{Ud@F4&xK+3@Q19Ro5KFBDU z908d3iQmZ#fRAT(EOVQx*0!#Zf4sN#=q(H3eay@5HMejV()@8GdecoRob`{NfA}Q* zk4LSKgwBVyF=+E)dsxG`9Oj*^M_(k?(`kV7k{00^eE elB1Cp3tvzI~twctSs>2 zLsKX-XyZUNfQLa7y!TS$yarkHNOY;s4IV-fL@UNstw8=V4~^<`lKC^1rPDbTaQ~;7 z_e_`a*K19wgU_1RufuhQF^B?mBP2mh7Z)jPOF;2P{7K#w7{yAQc=Ns;~>lwos#UlbbjEQuU|{@7H}!Gh6@77Ul@It*VXImBlNCZ z)SX^(Ns zw|gNHVjwSx<+i+9f9+aMA#d>$8~W8%VuVkDw+oh`AyvNnERTH3Qv+a_$5cx_J#Sb~ zYi}3xf;lw*(h+R_RA8d%99k1Ywd?oGf(Oh3-XCAq?%;RSagpfN0I5!ibEl|82#I`= z-j%Z0UZRk;y4`j`4J~a|^FJjM-}yl}b{y~%w6&Sm?F_sEEJXEiYB*6{rWdVv>Xm9X zsoN{24p$N+NdBg{kytpiKdD2r0Y@0E{%am zqNhoFSz&cIX-rU1WgGl*GwFlX!2|$*K%=}K2N$5sK#zis9`l(Czn2mpxag;9*Z>bd zcsM65MHJkx(b{k9L1E3+G{oq}>Av$^KKbeI)=sX2>HFAm410*-Q#yuQ(Kvjv$jX}y#dlL`W z6bNGP3dRr+N5;S~0kFu&Hbcu_TX7a<$AOJ4FRx>3O7IK7>O>)-S8J==ZfB=_0$HC{ zAtG!7l}+rn;z5DhbV}$p=HcO~CByyS6~Do`!IGCEF6Zg$$Im@UA3Q5RWSw9QQLo3O z_ynbG=&D_7SXHfxY8=FZLlB1`388R!VQifK8Xv1(uP@&1!gU-4yuBch)(FS=uxBBS z^M-T5kXE;C(}}MEva#&ZAt^S72iddWcTfUycZL0>fBLXhPJVt`evbpzWkrTUM(W)D9j!9pzm9d5Bg-Qt-@AZ&s1&w6I|K%!@U-C2T3mN~vr+A{##IQX{tLT;;&9yhDzbeY6py^?>WM*n!tQ+g z7oXp+D>to{XUh>Dpg88@H56hQlf0BF*yuN^J;D!1db^m}0i$>DRN-snoSaQi<=+X7 zt*KC(rpahja%x6l^E`GH2@uE3Z~lA5am=O`U*7)>ajf`n#4)J|o^BFl`KBV*R$E5B?9uh=0=IAblIi^=HnsCDCc2bY z*K4kIzP(Z$Ocwr9)8h&DT{73LZre(bMp4nx#b3Dw#xPhw#tdN<^93is{|3reAN)VE z+dqe$psGXsGbKQL;E#ljRHqc;*=@@9eM_BPi}?wf3+|EHJZ55w=*i#cj&4a5a=9U{ z2xcME($V24%iW=b?Ge|y!BnL{l^%Q+G%Y=ScfiwO%~|B!D82E}FH9Y|G%}+0>Qy&@ zv+RbGL^lq$;h=IZB^wN%FkIa4wFEPEVmo_<0n^r}rf#{3dndJ`;oVeiX`$blyJc`M4`Bkk`}O0jK!oq=(p#6< z{b)Ert`L&RMP|g64)0NQ*+uNrG~{vPoWWao1@dvs`e3Jp7&&``R0Z07yOejunHqr|%wP7MTx|JhBLs{p_8t5(0!9aCssmVS4uuk} zmZ#&id@sJssp_TofBC|W>2)w*fBD1r{8C9pXM(kK&V(YI02k8cDctwu?!J31xN4JR amr!R?{S!EkUls)XkrtPKStx4o{(k`F+lM*; literal 0 HcmV?d00001 From 06f126bc018e7ebe2c80b14bade7bbbaa60be330 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 04:42:56 -0300 Subject: [PATCH 39/90] config: remove debug Signed-off-by: Niv Sardi --- config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/config.py b/config.py index 7b6d0a2..73348a2 100644 --- a/config.py +++ b/config.py @@ -105,7 +105,6 @@ def try_load_json(j): for p in paths: c = try_load_json(os.path.expanduser(p)) - print((p, c)) if c: return c return [] From 0757c2e160b6dd4508663fb8f0784057b08588ac Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 04:44:18 -0300 Subject: [PATCH 40/90] i never get markdown Signed-off-by: Niv Sardi --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0bd94ad..06f5d5e 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ then `sudo python get-pip.py` # Configuration there is a nifty tool that will generate a config file in the default location (`~/.config/twitter-tools/config.json`), just run `python3 ./setup.py` and you'll be prompted. -[./config-screenshot.png] +![screenshot](./config-screenshot.png) we'll pick up the first entry, not that we'll look for `./config.json` and From 06f9a6974af141c84298cae9e0f48c1e5f98feb8 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 05:03:21 -0300 Subject: [PATCH 41/90] fix get_user_ids Signed-off-by: Niv Sardi --- config.py | 6 ++---- get_user_ids.py | 29 ++++++++++++----------------- streaming.py | 3 +++ 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/config.py b/config.py index 73348a2..c5680d2 100644 --- a/config.py +++ b/config.py @@ -173,10 +173,8 @@ def add_argument(o): add_argument(last) - opts = parser.parse_args() - if opts.db == DBS["default"]: - opts.db = load_db_driver(DBS["default"]) - return opts + return parser.parse_args() + if __name__ == "__main__": parse_args(options) diff --git a/get_user_ids.py b/get_user_ids.py index c8ebf4d..f914878 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -8,37 +8,32 @@ def twitter_login(): opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) - authdata = opts["config"][0] + authdata = opts.config[0] users = None try: - users = opts["users"] + users = opts.users except KeyError: - users = opts["csv"] + users = opts.csv + + sys.stderr.write(f"looking for: {users}") - print(("looking for", users)) auth = tweepy.OAuthHandler(authdata["consumer_key"], authdata["consumer_secret"]) auth.set_access_token(authdata["access_token"], authdata["access_token_secret"]) - return tweepy.API(auth) - + return tweepy.API(auth), users -API = twitter_login() - -def get_user_ids(api=API): +if __name__ == "__main__": + api, users = twitter_login() handles = [] for screen_name in users: try: u = api.get_user(screen_name) - print(screen_name, u._json["id"]) + sys.stderr.write(f"""\n{screen_name} -> {u._json["id"]}""") handles.append(str(u._json["id"])) except Exception as e: - print("ERROR", e, authdata) + sys.stderr.write(f"ERROR: {e}, {authdata}") - sys.stderr.write(" ".join(handles) + "\n") - return handles - - -if __name__ == "__main__": - get_user_ids() + print("\n".join(handles) + "\n") + diff --git a/streaming.py b/streaming.py index 435a2d1..4b80015 100644 --- a/streaming.py +++ b/streaming.py @@ -43,6 +43,9 @@ def run(): """ opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.DBS, c.TERMS]) + if opts.db == c.DBS["default"]: + opts.db = c.load_db_driver(c.DBS["default"]) + database = opts.db config = opts.config[0] From 72bad01aa55b3a51d4f893db1bedf141e9dc710e Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 05:25:57 -0300 Subject: [PATCH 42/90] make ids nargs too Signed-off-by: Niv Sardi --- config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/config.py b/config.py index c5680d2..316af71 100644 --- a/config.py +++ b/config.py @@ -120,6 +120,7 @@ def try_load_json(j): IDS = { "flags": "-i, --ids", "dest": "ids", + "nargs": "+", "help": "twitter user ids, as a comma-separated list", "action": ParseComasAction, } From 3d57f1df21c4fabbfe6165f0c13049f0a2266784 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 07:41:18 -0300 Subject: [PATCH 43/90] migrate to logging Signed-off-by: Niv Sardi --- DB/generic.py | 12 +++++++----- DB/mysql.py | 28 +++++++++++++++------------- DB/pynx.py | 9 +++++---- DB/sqlite.py | 33 ++++++++++++++++----------------- config.py | 7 +++++-- get_user_ids.py | 10 +++++----- monitoring.py | 9 +++++---- requirements.txt | 1 + screenshot.py | 6 ++++-- streaming.py | 12 ++++++------ 10 files changed, 69 insertions(+), 58 deletions(-) diff --git a/DB/generic.py b/DB/generic.py index 7070a0c..f9a7d85 100644 --- a/DB/generic.py +++ b/DB/generic.py @@ -1,18 +1,20 @@ +import logging + class DB: def __init__(self): self.name = "Generic DB Driver" def getTweets(self): - print("NOT IMPLEMENTED") + logging.warning("NOT IMPLEMENTED") def writeSuccess(self, path): - print("NOT IMPLEMENTED") + logging.warning("NOT IMPLEMENTED") def markDeleted(self, path): - print("NOT IMPLEMENTED") + logging.warning("NOT IMPLEMENTED") def getLogs(self): - print("NOT IMPLEMENTED") + logging.warning("NOT IMPLEMENTED") def save(self, url, status): - print("NOT IMPLEMENTED") + logging.warning("NOT IMPLEMENTED") diff --git a/DB/mysql.py b/DB/mysql.py index 67e423f..3bad0ff 100644 --- a/DB/mysql.py +++ b/DB/mysql.py @@ -1,4 +1,6 @@ import MySQLdb +import logging + from . import generic @@ -26,15 +28,15 @@ def writeSuccess(path): [path], ) self.db.commit() - print(("Screenshot OK. Tweet id ", path)) + logging.info(f"Screenshot OK. Tweet id {path}") except MySQLdb.Error as e: try: - print(("MySQL Error [%d]: %s" % (e.args[0], e.args[1]))) + logging.error(("MySQL Error [%d]: %s" % (e.args[0], e.args[1]))) except IndexError: - print(("MySQL Error: %s" % str(e))) + logging.error(("MySQL Error: %s" % str(e))) - print(("Error", e.args[0], e.args[1])) - print(("Warning:", path, "not saved to database")) + logging.error(("Error", e.args[0], e.args[1])) + logging.warning(("Warning:", path, "not saved to database")) return True def markDeleted(path): @@ -47,15 +49,15 @@ def markDeleted(path): [path], ) self.db.commit() - print(("Tweet marked as deleted ", path)) + logging.info(("Tweet marked as deleted ", path)) except MySQLdb.Error as e: try: - print(("MySQL Error [%d]: %s" % (e.args[0], e.args[1]))) + logging.error(("MySQL Error [%d]: %s" % (e.args[0], e.args[1]))) except IndexError: - print(("MySQL Error: %s" % str(e))) + logging.error(("MySQL Error: %s" % str(e))) - print(("Error", e.args[0], e.args[1])) - print(("Warning:", path, "not saved to database")) + logging.error(("Error", e.args[0], e.args[1])) + logging.warning(("Warning:", path, "not saved to database")) return True def getLogs(): @@ -85,8 +87,8 @@ def save(url, status): (author, text, url, id_str, 0, 0), ) self.db.commit() - print(("Wrote to database:", author, id_str)) + logging.info(("Wrote to database:", author, id_str)) except MySQLdb.Error as e: - print(("Error", e.args[0], e.args[1])) + logging.error(("Error", e.args[0], e.args[1])) self.db.rollback() - print("ERROR writing database") + logging.error("ERROR writing database") diff --git a/DB/pynx.py b/DB/pynx.py index 682ab66..a7b4700 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -1,5 +1,6 @@ import networkx as nx import unicodedata +import logging import json import re @@ -67,7 +68,7 @@ def __init__(self, filename="graph.gexf"): self.G = self._open_graph(self._user_graph) self.H = self._open_graph(self._hash_graph) - print("graphs opened", self.G.nodes(), self.H.nodes()) + logging.info("graphs opened", self.G.nodes(), self.H.nodes()) def _open_graph(self, filename): try: @@ -82,10 +83,10 @@ def markDeleted(self, id): self.G.nodes[id]["deleted"] = True def writeSuccess(self, path): - print("NOT IMPLEMENTED") + logging.warning("NOT IMPLEMENTED") def getLogs(self): - print("NOT IMPLEMENTED") + logging.warning("NOT IMPLEMENTED") def _write_all(self): self._write(self.H, self._hash_graph) @@ -102,7 +103,7 @@ def save(self, url, status): add_tags(self.H, text) add_users(self.G, text, status) - print("H", self.H.nodes()) + logging.info("H", self.H.nodes()) self._write_all() diff --git a/DB/sqlite.py b/DB/sqlite.py index c90201f..9d2b675 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -1,4 +1,6 @@ import sqlite3 +import logging + from . import generic class Driver(generic.DB): @@ -33,31 +35,28 @@ def _commit(self, query): cur.execute(query) self.db.commit() except sqlite3.Error as e: - print(("Error", e)) + logging.error(e) return False return True def writeSuccess(self, path): - if self._commit( - """UPDATE Tweets \ + q = """UPDATE Tweets \ SET Screenshot=1 \ - WHERE Tweet_Id='%s'""" - ): - print(("Screenshot OK. Tweet id ", path)) + WHERE Tweet_Id='?'""" + if self._commit(q, path): + logging.info(f"Screenshot OK. Tweet id {path}") return True - print(("Warning:", path, "not saved to database")) + logging.warning(f"{path} not saved to database") return False def markDeleted(self, path): - if self._commit( - """UPDATE Tweets \ + q = """UPDATE Tweets \ SET Deleted=1 \ - WHERE Tweet_Id='%s'""" - % [path] - ): - print(("Tweet marked as deleted ", path)) + WHERE Tweet_Id='?'""" + if self._commit(q, path): + logging.info(f"Tweet marked as deleted {path}") return True - print(("Warning:", path, "not saved to database")) + logging.warning(f"{path} not saved to database") return False def getLogs(self,): @@ -77,8 +76,8 @@ def save(self, url, status): """ cur.execute(c, (author, text, url, id_str, 0, 0)) self.db.commit() - # print "Wrote to database:", author, id_str + # logging.info("Wrote to database:", author, id_str) except sqlite3.Error as e: - print(("Error", e, c)) + logging.error(e, c) self.db.rollback() - print("ERROR writing database") + logging.error("ERROR writing database") diff --git a/config.py b/config.py index 316af71..3372c2d 100644 --- a/config.py +++ b/config.py @@ -5,6 +5,9 @@ import csv import argparse +import logging + +logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) def flatten(lists): return [i for l in lists for i in l] @@ -71,7 +74,7 @@ def load_db_driver(arg): filename = filename or "graph.gexf" else: - print(("ERROR could not find db driver for ", db_driver)) + logging.error(f"ERROR could not find db driver for {db_driver}") sys.exit(-2) return Driver(filename) @@ -100,7 +103,7 @@ def try_load_json(j): except FileNotFoundError: return None except Exception as e: - print((e, "is your config file well formated ?")) + logging.error(f"{e} is your config file well formated ?") raise e for p in paths: diff --git a/get_user_ids.py b/get_user_ids.py index f914878..c2764dd 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -2,8 +2,9 @@ import time import csv import sys -import config as c +import logging +import config as c def twitter_login(): opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) @@ -15,8 +16,7 @@ def twitter_login(): except KeyError: users = opts.csv - sys.stderr.write(f"looking for: {users}") - + logging.info(f"looking for: {users}") auth = tweepy.OAuthHandler(authdata["consumer_key"], authdata["consumer_secret"]) auth.set_access_token(authdata["access_token"], authdata["access_token_secret"]) @@ -30,10 +30,10 @@ def twitter_login(): for screen_name in users: try: u = api.get_user(screen_name) - sys.stderr.write(f"""\n{screen_name} -> {u._json["id"]}""") + logging.info(f"""\n{screen_name} -> {u._json["id"]}""") handles.append(str(u._json["id"])) except Exception as e: - sys.stderr.write(f"ERROR: {e}, {authdata}") + logging.error(f"{e}, {authdata}") print("\n".join(handles) + "\n") diff --git a/monitoring.py b/monitoring.py index 369e4a1..077cc5a 100644 --- a/monitoring.py +++ b/monitoring.py @@ -1,4 +1,5 @@ import requests +import logging import config as c opts = c.parse_args([c.DBS]) @@ -12,14 +13,14 @@ def query(url): if r.status_code != 200: return True else: - print("Tweet still exists") + logging.info("Tweet still exists") def read_database(db): cur = db.getTweets() for tweet in cur: list_of_tweets.append(tweet) - print(tweet) + logging.info(tweet) return list_of_tweets @@ -28,8 +29,8 @@ def check_tweet(): if query(tweet[3]) is True: db.markDeleted(tweet[4]) - print(("tweet deleted, id is", tweet[4])) - print(("url is", tweet[3])) + logging.info(f"tweet deleted, id is {tweet[4]}") + logging.info(f"url is {tweet[3]}") if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 7b054b3..388d9f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ tweepy needle pyinquirer +clint diff --git a/screenshot.py b/screenshot.py index 05db4b5..eaad302 100644 --- a/screenshot.py +++ b/screenshot.py @@ -1,5 +1,7 @@ # Run me with 'nosetests screenshot.py --with-save-baseline --nocapture' +import logging + from needle.cases import NeedleTestCase from needle.driver import NeedlePhantomJS @@ -30,14 +32,14 @@ def list_to_screenshot(self): try: self.driver.get(Url) except: - print(("Url doesnt exist ", Url)) + logging(f"url does not exist: {Url}") logFile.write("Url doesnt exist \n") continue try: self.assertScreenshot(".tweet", Tweet_Id) except: - print(("Tweet deleted ", Url)) + logging(f"tweet deleted: {Url}") self.markDeleted(Tweet_Id) message = "Tweet deleted %s \n" % Url logFile.write(message) diff --git a/streaming.py b/streaming.py index 4b80015..5a9edba 100644 --- a/streaming.py +++ b/streaming.py @@ -2,6 +2,7 @@ stream tweets to database driver and stdout """ +import logging import signal import sys @@ -27,15 +28,14 @@ def on_status(self, status): tweet_url = ( "http://twitter.com/" + status.user.screen_name + "/status/" + status.id_str ) - print(("TWEET", status.text)) - print(("URL", tweet_url)) + logging.info(f"TWEET: {tweet_url}\n{status.text}") self.database.save(tweet_url, status) def on_error(self, status): """ error handler """ - print(("error", status)) + logging.error(status) def run(): """ @@ -65,9 +65,9 @@ def run(): # test authentication try: api.verify_credentials() - print("Authentication OK") + logging.info("authentification OK") except: - print("Error during authentication") + logging.error("Error during authentication") def signal_handler(): database.close() @@ -76,7 +76,7 @@ def signal_handler(): signal.signal(signal.SIGINT, signal_handler) stream = tweepy.Stream(auth = auth, listener = listener) - print(("STREAM", stream_config)) + logging.info(f"STREAM: {stream_config}") while True: try: stream.filter(**stream_config) From 0c52462a84d77ef6957003e236ecf431691f55a2 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 07:48:52 -0300 Subject: [PATCH 44/90] add tsv driver and make it default Signed-off-by: Niv Sardi --- DB/tsv.py | 25 +++++++++++++++++++++++++ config.py | 7 +++++-- 2 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 DB/tsv.py diff --git a/DB/tsv.py b/DB/tsv.py new file mode 100644 index 0000000..49e6305 --- /dev/null +++ b/DB/tsv.py @@ -0,0 +1,25 @@ +import sys +import logging + +from . import generic + +class Driver(generic.DB): + def __init__(self, filename=sys.stdout): + generic.DB.__init__(self) + + self.name = "Simplest TSV driver" + self.filename = filename + + print("id\tauthor\ttext\turl") + def save(self, url, status): + try: + text = status.extended_tweet.text + except AttributeError: + text = status.text + + print("\t".join(( + status.id_str, + status.user.screen_name, + status.text.replace("\n", "\\n").replace("\t", "\\t"), + url + ))) diff --git a/config.py b/config.py index 3372c2d..2da05a6 100644 --- a/config.py +++ b/config.py @@ -57,7 +57,10 @@ def load_db_driver(arg): db_driver = arg filename = None finally: - if db_driver == "mysql": + if db_driver == "tsv": + from DB.tsv import Driver + + elif db_driver == "mysql": from DB.mysql import Driver filename = filename or "mysql://" @@ -145,7 +148,7 @@ def try_load_json(j): "flags": "-D, --database", "dest": "db", "help": "database system to use (mysql, sqlite, elasticsearch)", - "default": "sqlite", + "default": "tsv", "action": LoadDBDriverAction, } CSV_FILE = { From f1591f7e09ae097fbdf754d1dcd9ccb7c28d7792 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 16:24:03 -0300 Subject: [PATCH 45/90] allow to pass usernames to commandline Signed-off-by: Niv Sardi --- config.py | 24 +++++++++++++++++++----- get_user_ids.py | 39 +++++++++++++++++++++------------------ streaming.py | 2 +- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/config.py b/config.py index 2da05a6..3a10c20 100644 --- a/config.py +++ b/config.py @@ -7,6 +7,8 @@ import argparse import logging +from get_user_ids import fetch + logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) def flatten(lists): @@ -98,6 +100,17 @@ class ParseComasAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, flatten([v.split(",") for v in values])) +class FetchUsersAction(argparse.Action): + """ + Parse a coma separated usernames into an array of user ids + """ + + def __call__(self, parser, namespace, values, option_string=None): + old_ids = getattr(namespace, self.dest) or () + ids = fetch(namespace.config[0], flatten([v.split(',') for v in values])) + ids.extend(old_ids) + setattr(namespace, self.dest, ids) + def load_config(paths): def try_load_json(j): try: @@ -126,21 +139,22 @@ def try_load_json(j): IDS = { "flags": "-i, --ids", "dest": "ids", - "nargs": "+", + "nargs": "*", "help": "twitter user ids, as a comma-separated list", "action": ParseComasAction, } USERS = { "flags": "-u, --users", - "dest": "users", - "nargs": "+", + "dest": "ids", + "nargs": "*", "help": "twitter usernames, as a comma-separated list", - "action": ParseComasAction, + "action": FetchUsersAction, } TERMS = { "flags": "-t, --track", "dest": "track", - "nargs": "+", + "nargs": "*", + "default": [], "help": "terms to track, as a comma-separated list", "action": ParseComasAction, } diff --git a/get_user_ids.py b/get_user_ids.py index c2764dd..b9dcd3e 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -6,34 +6,37 @@ import config as c -def twitter_login(): - opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) +def twitter_login(config): + auth = tweepy.OAuthHandler(config["consumer_key"], config["consumer_secret"]) + auth.set_access_token(config["access_token"], config["access_token_secret"]) - authdata = opts.config[0] - users = None - try: - users = opts.users - except KeyError: - users = opts.csv + return tweepy.API(auth) +def fetch(config, users): logging.info(f"looking for: {users}") - auth = tweepy.OAuthHandler(authdata["consumer_key"], authdata["consumer_secret"]) - auth.set_access_token(authdata["access_token"], authdata["access_token_secret"]) - - return tweepy.API(auth), users - - -if __name__ == "__main__": - api, users = twitter_login() + api = twitter_login(config) handles = [] for screen_name in users: try: u = api.get_user(screen_name) - logging.info(f"""\n{screen_name} -> {u._json["id"]}""") + logging.info(f"""{screen_name} -> {u._json["id"]}""") handles.append(str(u._json["id"])) except Exception as e: - logging.error(f"{e}, {authdata}") + logging.error(f"{e}, {config}") + return handles + +if __name__ == "__main__": + opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) + + config = opts.config[0] + users = None + try: + users = opts.users + except KeyError: + users = opts.csv + + handles = fetch(config, users) print("\n".join(handles) + "\n") diff --git a/streaming.py b/streaming.py index 5a9edba..0b0fb8c 100644 --- a/streaming.py +++ b/streaming.py @@ -41,7 +41,7 @@ def run(): """ main entry point """ - opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.DBS, c.TERMS]) + opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.USERS, c.DBS, c.TERMS]) if opts.db == c.DBS["default"]: opts.db = c.load_db_driver(c.DBS["default"]) From 56aa5b33e7b0f827437e51bd5bdd6f28fa005022 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 16:28:29 -0300 Subject: [PATCH 46/90] get_user_ids is now a bit weird, but will leave it like that Signed-off-by: Niv Sardi --- get_user_ids.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/get_user_ids.py b/get_user_ids.py index b9dcd3e..a772a16 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -30,13 +30,11 @@ def fetch(config, users): opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) config = opts.config[0] - users = None + ids = None try: - users = opts.users + ids = opts.ids except KeyError: - users = opts.csv + ids = fetch(config, opts.csv) - handles = fetch(config, users) - - print("\n".join(handles) + "\n") + print("\n".join(ids) + "\n") From c285879fefb38b1e78dbc146fa84b20437ae06d1 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Fri, 28 Aug 2020 16:40:42 -0300 Subject: [PATCH 47/90] support mutiple DB outputs Signed-off-by: Niv Sardi --- DB/multi.py | 22 ++++++++++++++++++++++ config.py | 20 +++++++++++++++----- streaming.py | 5 ----- 3 files changed, 37 insertions(+), 10 deletions(-) create mode 100644 DB/multi.py diff --git a/DB/multi.py b/DB/multi.py new file mode 100644 index 0000000..13df12c --- /dev/null +++ b/DB/multi.py @@ -0,0 +1,22 @@ +from . import generic +import logging + +class MultiDriver(generic.DB): + def __init__(self, databases): + self.name = "Multiple Dispatch DB Driver" + self.dbs = databases + + def getTweets(self): + return self.dbs[0].getTweets() + + def writeSuccess(self, path): + return [d.writeSuccess(path) for d in self.dbs] + + def markDeleted(self, path): + return [d.markDeleted(path) for d in self.dbs] + + def getLogs(self): + return [d.getLogs() for d in self.dbs] + + def save(self, url, status): + return [d.save(url, status) for d in self.dbs] diff --git a/config.py b/config.py index 3a10c20..d0703b2 100644 --- a/config.py +++ b/config.py @@ -9,6 +9,8 @@ from get_user_ids import fetch +from DB.multi import MultiDriver + logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) def flatten(lists): @@ -53,6 +55,7 @@ def __call__(self, parser, namespace, filename, option_string=None): def load_db_driver(arg): + db_driver, filename = None, None try: db_driver, filename = arg.split(":") except ValueError: @@ -89,8 +92,8 @@ class LoadDBDriverAction(argparse.Action): load a db driver by name """ - def __call__(self, parser, namespace, arg, option_string=None): - setattr(namespace, self.dest, load_db_driver(arg)) + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, [load_db_driver(v) for v in values]) class ParseComasAction(argparse.Action): """ @@ -110,7 +113,7 @@ def __call__(self, parser, namespace, values, option_string=None): ids = fetch(namespace.config[0], flatten([v.split(',') for v in values])) ids.extend(old_ids) setattr(namespace, self.dest, ids) - + def load_config(paths): def try_load_json(j): try: @@ -162,6 +165,7 @@ def try_load_json(j): "flags": "-D, --database", "dest": "db", "help": "database system to use (mysql, sqlite, elasticsearch)", + "nargs": "*", "default": "tsv", "action": LoadDBDriverAction, } @@ -194,8 +198,14 @@ def add_argument(o): add_argument(last) - return parser.parse_args() - + opts = parser.parse_args() + if DBS in options: + if opts.db == DBS["default"]: + opts.db = MultiDriver([load_db_driver(DBS["default"])]) + else: + opts.db = MultiDriver(opts.db) + + return opts if __name__ == "__main__": parse_args(options) diff --git a/streaming.py b/streaming.py index 0b0fb8c..74970b8 100644 --- a/streaming.py +++ b/streaming.py @@ -43,16 +43,12 @@ def run(): """ opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.USERS, c.DBS, c.TERMS]) - if opts.db == c.DBS["default"]: - opts.db = c.load_db_driver(c.DBS["default"]) - database = opts.db config = opts.config[0] stream_config = { "follow": opts.ids or None, "track": opts.track or None - } listener = StdOutListener(database) @@ -60,7 +56,6 @@ def run(): auth = tweepy.OAuthHandler(config["consumer_key"], config["consumer_secret"]) auth.set_access_token(config["access_token"], config["access_token_secret"]) - api = tweepy.API(auth) # test authentication try: From 3b22fcdcbb3a183dfea522b79d010a03bfcb579d Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Wed, 9 Sep 2020 22:42:40 -0300 Subject: [PATCH 48/90] update pynx DB Signed-off-by: Niv Sardi --- DB/pynx.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/DB/pynx.py b/DB/pynx.py index a7b4700..f2ae7ba 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -61,14 +61,19 @@ def __init__(self, filename="graph.gexf"): self.name = "NetworkX DB Driver" self.filename = filename self.type = filename.split(".")[-1] or "gexf" + self._user_graph = "user-%s" % filename self._hash_graph = "hash-%s" % filename + self._twit_graph = "twit-%s" % filename + self._write = getattr(nx, "write_%s" % self.type) self._read = getattr(nx, "read_%s" % self.type) - self.G = self._open_graph(self._user_graph) + self.U = self._open_graph(self._user_graph) self.H = self._open_graph(self._hash_graph) - logging.info("graphs opened", self.G.nodes(), self.H.nodes()) + self.T = self._open_graph(self._twit_graph) + + logging.info(f"graphs opened {self.U.nodes()} {self.H.nodes()} {self.T.nodes()}") def _open_graph(self, filename): try: @@ -77,10 +82,10 @@ def _open_graph(self, filename): return nx.Graph() def getTweets(self): - return [n for n in self.G.nodes()] + return [n for n in self.U.nodes()] def markDeleted(self, id): - self.G.nodes[id]["deleted"] = True + self.U.nodes[id]["deleted"] = True def writeSuccess(self, path): logging.warning("NOT IMPLEMENTED") @@ -90,7 +95,7 @@ def getLogs(self): def _write_all(self): self._write(self.H, self._hash_graph) - self._write(self.G, self._user_graph) + self._write(self.U, self._user_graph) def close(self): self._write_all() @@ -102,8 +107,8 @@ def save(self, url, status): text = status.text add_tags(self.H, text) - add_users(self.G, text, status) - logging.info("H", self.H.nodes()) + add_users(self.U, text, status) + logging.info(f"H, {self.H.nodes()}") self._write_all() From df570ba9c7f6d8ecd7c3b0cb66a1d0ebd0695ffe Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 11:14:29 -0300 Subject: [PATCH 49/90] config: much cleaner and generic driver import Signed-off-by: Niv Sardi --- config.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/config.py b/config.py index d0703b2..ace653b 100644 --- a/config.py +++ b/config.py @@ -3,6 +3,7 @@ import sys import json import csv +import importlib import argparse import logging @@ -62,30 +63,16 @@ def load_db_driver(arg): db_driver = arg filename = None finally: - if db_driver == "tsv": - from DB.tsv import Driver - - elif db_driver == "mysql": - from DB.mysql import Driver - - filename = filename or "mysql://" - elif db_driver == "sqlite": - from DB.sqlite import Driver - - filename = filename or "twitter.sqlite" - elif db_driver == "elasticsearch": - from DB.elasticsearch import Driver - - filename = filename or "ec://" - elif db_driver == "pynx": - from DB.pynx import Driver - - filename = filename or "graph.gexf" - else: + try: + M = importlib.import_module(f"DB.{db_driver}") + except: logging.error(f"ERROR could not find db driver for {db_driver}") sys.exit(-2) - return Driver(filename) + if filename: + return M.Driver(filename) + + return M.Driver() class LoadDBDriverAction(argparse.Action): """ From 1544c2a9cdfedc1a9b315c2d7ab966bb32c100a3 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 11:14:47 -0300 Subject: [PATCH 50/90] config: better logging, that actually helps Signed-off-by: Niv Sardi --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index ace653b..47d08ab 100644 --- a/config.py +++ b/config.py @@ -12,7 +12,7 @@ from DB.multi import MultiDriver -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.basicConfig(format='%(asctime)s - %(pathname)s:%(lineno)s:%(funcName)s() - %(levelname)s - %(message)s', level=logging.INFO) def flatten(lists): return [i for l in lists for i in l] From 9ce600c6703425eeb3f650d469280ba681ac5d1a Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 11:15:24 -0300 Subject: [PATCH 51/90] DB/generic: cleaner generic method interception Signed-off-by: Niv Sardi --- DB/multi.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/DB/multi.py b/DB/multi.py index 13df12c..74761ac 100644 --- a/DB/multi.py +++ b/DB/multi.py @@ -6,17 +6,20 @@ def __init__(self, databases): self.name = "Multiple Dispatch DB Driver" self.dbs = databases - def getTweets(self): - return self.dbs[0].getTweets() - - def writeSuccess(self, path): - return [d.writeSuccess(path) for d in self.dbs] + def __getattribute__(self, name): + try: + return object.__getattribute__(self, name) + except AttributeError: + pass - def markDeleted(self, path): - return [d.markDeleted(path) for d in self.dbs] + def wrapper(*args, **kwargs): + for d in self.dbs: + try: + getattr(d, name)(*args, **kwargs) + except AttributeError: + logging.warn(f"{d} has no attribute {name}") - def getLogs(self): - return [d.getLogs() for d in self.dbs] + return wrapper - def save(self, url, status): - return [d.save(url, status) for d in self.dbs] + def getTweets(self): + return self.dbs[0].getTweets() From 1c152a63e0fe1a3d37312dfb1307a6aea083c226 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 11:15:48 -0300 Subject: [PATCH 52/90] DB/generic: now that all goes through multi, really generic is about to disappear Signed-off-by: Niv Sardi --- DB/generic.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/DB/generic.py b/DB/generic.py index f9a7d85..9abd7f5 100644 --- a/DB/generic.py +++ b/DB/generic.py @@ -3,18 +3,3 @@ class DB: def __init__(self): self.name = "Generic DB Driver" - - def getTweets(self): - logging.warning("NOT IMPLEMENTED") - - def writeSuccess(self, path): - logging.warning("NOT IMPLEMENTED") - - def markDeleted(self, path): - logging.warning("NOT IMPLEMENTED") - - def getLogs(self): - logging.warning("NOT IMPLEMENTED") - - def save(self, url, status): - logging.warning("NOT IMPLEMENTED") From 93916d088a4685eabc9956409eac2e380657ffcb Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 11:18:49 -0300 Subject: [PATCH 53/90] DB: s/save/saveTweet to be more consistent and open the way to save other stuff Signed-off-by: Niv Sardi --- DB/mysql.py | 2 +- DB/pynx.py | 2 +- DB/sqlite.py | 3 +-- DB/tsv.py | 3 ++- streaming.py | 3 ++- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/DB/mysql.py b/DB/mysql.py index 3bad0ff..08e5488 100644 --- a/DB/mysql.py +++ b/DB/mysql.py @@ -66,7 +66,7 @@ def getLogs(): "SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 " ) - def save(url, status): + def saveTweet(url, status): (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) cur = db.cursor() diff --git a/DB/pynx.py b/DB/pynx.py index f2ae7ba..90b3252 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -100,7 +100,7 @@ def _write_all(self): def close(self): self._write_all() - def save(self, url, status): + def saveTweet(self, url, status): try: text = status.extended_tweet.text except AttributeError: diff --git a/DB/sqlite.py b/DB/sqlite.py index 9d2b675..ee10327 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -65,8 +65,6 @@ def getLogs(self,): "SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 " ) - def save(self, url, status): - (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) cur = self.db.cursor() try: @@ -81,3 +79,4 @@ def save(self, url, status): logging.error(e, c) self.db.rollback() logging.error("ERROR writing database") + def saveAuthor(self, status): diff --git a/DB/tsv.py b/DB/tsv.py index 49e6305..08ea7d7 100644 --- a/DB/tsv.py +++ b/DB/tsv.py @@ -11,7 +11,8 @@ def __init__(self, filename=sys.stdout): self.filename = filename print("id\tauthor\ttext\turl") - def save(self, url, status): + + def saveTweet(self, url, status): try: text = status.extended_tweet.text except AttributeError: diff --git a/streaming.py b/streaming.py index 74970b8..2f321b4 100644 --- a/streaming.py +++ b/streaming.py @@ -29,7 +29,8 @@ def on_status(self, status): "http://twitter.com/" + status.user.screen_name + "/status/" + status.id_str ) logging.info(f"TWEET: {tweet_url}\n{status.text}") - self.database.save(tweet_url, status) + + self.database.saveTweet(tweet_url, status) def on_error(self, status): """ From 7167e1f18c3d4255cd05354b0d634552914dbb15 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 11:20:18 -0300 Subject: [PATCH 54/90] DB/sqlite: refactor and introduce saveAuthor Signed-off-by: Niv Sardi --- DB/sqlite.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index ee10327..7e6752e 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -20,6 +20,11 @@ def __init__(self, filename="twitter.db"): Deleted INTEGER)" ) + cur.execute( + "CREATE TABLE IF NOT EXISTS Authors (Author VARCHAR(255) PRIMARY KEY, \ + Id INTEGER)" + ) + def getTweets(self): cur = self.db.cursor() @@ -65,18 +70,27 @@ def getLogs(self,): "SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 " ) + def execute(self, q, args): cur = self.db.cursor() - try: - c = """ - INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) - VALUES (?, ?, ?, ?, ?, ?) - """ - cur.execute(c, (author, text, url, id_str, 0, 0)) + cur.execute(q, args) self.db.commit() - # logging.info("Wrote to database:", author, id_str) except sqlite3.Error as e: logging.error(e, c) self.db.rollback() logging.error("ERROR writing database") + def saveAuthor(self, status): + (author, id) = (status.user.screen_name, status.author_id) + self.execute(""" + UPSERT INTO Authors (Author, Id) + VALUES (?, ?) + """, (author, id)) + + def saveTweet(self, url, status): + (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) + self.execute(""" +INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) \ + VALUES (?, ?, ?, ?, ?, ?) + """, (author, text, url, id_str, 0, 0)) + From dbcf310c4df5ca2ae163788dd844e0675cfc827d Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 11:20:48 -0300 Subject: [PATCH 55/90] DB/tsv: actually use filename to write Signed-off-by: Niv Sardi --- DB/tsv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DB/tsv.py b/DB/tsv.py index 08ea7d7..08c5a54 100644 --- a/DB/tsv.py +++ b/DB/tsv.py @@ -10,7 +10,7 @@ def __init__(self, filename=sys.stdout): self.name = "Simplest TSV driver" self.filename = filename - print("id\tauthor\ttext\turl") + self.filename.write("id\tauthor\ttext\turl") def saveTweet(self, url, status): try: @@ -18,7 +18,7 @@ def saveTweet(self, url, status): except AttributeError: text = status.text - print("\t".join(( + self.filename.write("\t".join(( status.id_str, status.user.screen_name, status.text.replace("\n", "\\n").replace("\t", "\\t"), From 9221b8081d115ae5a6695f1a6fe79f5c30898576 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:02:15 -0300 Subject: [PATCH 56/90] DB/tsv: actual fix Signed-off-by: Niv Sardi --- DB/tsv.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/DB/tsv.py b/DB/tsv.py index 08c5a54..2b21772 100644 --- a/DB/tsv.py +++ b/DB/tsv.py @@ -1,3 +1,4 @@ +import os import sys import logging @@ -8,9 +9,16 @@ def __init__(self, filename=sys.stdout): generic.DB.__init__(self) self.name = "Simplest TSV driver" - self.filename = filename - self.filename.write("id\tauthor\ttext\turl") + if type(filename) is str: + exists = os.path.exists(filename) + self.file = open(filename, 'a') + if not exists: + self.file.write("id\tauthor\ttext\turl") + else: + self.file = filename + self.file.write("id\tauthor\ttext\turl") + def saveTweet(self, url, status): try: @@ -18,7 +26,7 @@ def saveTweet(self, url, status): except AttributeError: text = status.text - self.filename.write("\t".join(( + self.file.write("\t".join(( status.id_str, status.user.screen_name, status.text.replace("\n", "\\n").replace("\t", "\\t"), From 52ed1605050e2c344c467eeb7eaaff61a8a66597 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:03:15 -0300 Subject: [PATCH 57/90] config: dbs actually needed more love to make multi work Signed-off-by: Niv Sardi --- config.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 47d08ab..692f471 100644 --- a/config.py +++ b/config.py @@ -80,7 +80,14 @@ class LoadDBDriverAction(argparse.Action): """ def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, [load_db_driver(v) for v in values]) + old_dbs = getattr(namespace, self.dest) + if type(old_dbs) is str: + old_dbs = () + + dbs = [load_db_driver(v) for v in values] + + dbs.extend(old_dbs) + setattr(namespace, self.dest, dbs) class ParseComasAction(argparse.Action): """ From 3338f3b4ebef0dbde01f3f832fb27a4899d4ea46 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:03:51 -0300 Subject: [PATCH 58/90] streaming: fix signal_handler Signed-off-by: Niv Sardi --- streaming.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming.py b/streaming.py index 2f321b4..891f83c 100644 --- a/streaming.py +++ b/streaming.py @@ -65,7 +65,7 @@ def run(): except: logging.error("Error during authentication") - def signal_handler(): + def signal_handler(*argv, **argh): database.close() sys.exit(0) From 011c68ef829f8fae6b65ae7dfac27776600369e8 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:25:27 -0300 Subject: [PATCH 59/90] use coloredlogs for more readability Signed-off-by: Niv Sardi --- config.py | 6 ++++-- requirements.txt | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index 692f471..a3352fa 100644 --- a/config.py +++ b/config.py @@ -6,13 +6,15 @@ import importlib import argparse -import logging +import coloredlogs, logging from get_user_ids import fetch from DB.multi import MultiDriver -logging.basicConfig(format='%(asctime)s - %(pathname)s:%(lineno)s:%(funcName)s() - %(levelname)s - %(message)s', level=logging.INFO) +LOGGING_FORMAT = '%(asctime)s - %(pathname)s:%(lineno)s:%(funcName)s() - %(levelname)s - %(message)s' +coloredlogs.install() +logging.basicConfig(format=LOGGING_FORMAT, level=logging.INFO) def flatten(lists): return [i for l in lists for i in l] diff --git a/requirements.txt b/requirements.txt index 388d9f3..d59fb15 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ tweepy needle pyinquirer clint +coloredlogs From ba7a9e73a7d310b2d52beae07a67fc67f9ffce49 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:26:16 -0300 Subject: [PATCH 60/90] DB/multi: separate getattr and fn call so that we properly catch errors Signed-off-by: Niv Sardi --- DB/multi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/DB/multi.py b/DB/multi.py index 74761ac..3a42958 100644 --- a/DB/multi.py +++ b/DB/multi.py @@ -14,11 +14,15 @@ def __getattribute__(self, name): def wrapper(*args, **kwargs): for d in self.dbs: + logging.debug(f"{d} -> {name}({args})") + fn = None try: - getattr(d, name)(*args, **kwargs) + fn = getattr(d, name) except AttributeError: logging.warn(f"{d} has no attribute {name}") + if fn: fn(*args, **kwargs) + return wrapper def getTweets(self): From 506cbb957ba55c8d0bccd83d0c99d8e227692bf3 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:26:46 -0300 Subject: [PATCH 61/90] DB/sqlite: fix error reporting in execute Signed-off-by: Niv Sardi --- DB/sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index 7e6752e..19be907 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -76,7 +76,7 @@ def execute(self, q, args): cur.execute(q, args) self.db.commit() except sqlite3.Error as e: - logging.error(e, c) + logging.error(e, q, args) self.db.rollback() logging.error("ERROR writing database") From 211ce2856ed28aaf1e8fb0e84232186c08a357ac Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:27:04 -0300 Subject: [PATCH 62/90] DB/sqlite: fix saveAuthor upsert syntax Signed-off-by: Niv Sardi --- DB/sqlite.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index 19be907..a8e66cc 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -80,17 +80,17 @@ def execute(self, q, args): self.db.rollback() logging.error("ERROR writing database") - def saveAuthor(self, status): - (author, id) = (status.user.screen_name, status.author_id) - self.execute(""" - UPSERT INTO Authors (Author, Id) - VALUES (?, ?) - """, (author, id)) def saveTweet(self, url, status): (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) self.execute(""" INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) \ VALUES (?, ?, ?, ?, ?, ?) - """, (author, text, url, id_str, 0, 0)) + """, (author, text, url, id_str, 0, 0)) + def saveAuthor(self, status): + (author, aid) = (status.user.screen_name, status.user.id) + self.execute(""" + INSERT INTO Authors (Author, Id) + VALUES (?, ?) ON CONFLICT(Author) DO NOTHING + """, (author, aid)) From 8f82dcc187d0329c5ad55cfa00a3fbd1771d5af9 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 12:27:31 -0300 Subject: [PATCH 63/90] streaming: saveAuthor Signed-off-by: Niv Sardi --- streaming.py | 1 + 1 file changed, 1 insertion(+) diff --git a/streaming.py b/streaming.py index 891f83c..4bc2931 100644 --- a/streaming.py +++ b/streaming.py @@ -31,6 +31,7 @@ def on_status(self, status): logging.info(f"TWEET: {tweet_url}\n{status.text}") self.database.saveTweet(tweet_url, status) + self.database.saveAuthor(status) def on_error(self, status): """ From db1339348298b5abef7faa99026cf55437a28f70 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 15:53:59 -0300 Subject: [PATCH 64/90] config: properly implement debug and coloredlogs Signed-off-by: Niv Sardi --- config.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index a3352fa..d83a7cc 100644 --- a/config.py +++ b/config.py @@ -12,9 +12,8 @@ from DB.multi import MultiDriver -LOGGING_FORMAT = '%(asctime)s - %(pathname)s:%(lineno)s:%(funcName)s() - %(levelname)s - %(message)s' -coloredlogs.install() -logging.basicConfig(format=LOGGING_FORMAT, level=logging.INFO) +LOGGING_FORMAT = '%(asctime)s - %(pathname)s:%(lineno)s:%(funcName)s()\n - %(levelname)s - %(message)s' +coloredlogs.install(fmt=LOGGING_FORMAT) def flatten(lists): return [i for l in lists for i in l] @@ -109,6 +108,14 @@ def __call__(self, parser, namespace, values, option_string=None): ids = fetch(namespace.config[0], flatten([v.split(',') for v in values])) ids.extend(old_ids) setattr(namespace, self.dest, ids) + +class IncreaseVerbosityAction(argparse.Action): + """ + up debug level + """ + + def __call__(self, parser, namespace, values, option_string=None): + coloredlogs.increase_verbosity() def load_config(paths): def try_load_json(j): @@ -171,6 +178,12 @@ def try_load_json(j): "help": "load data from a csv file", "action": LoadRowFileAction, } +DEBUG = { + "flags": "-v", + "help": "increase verbosity", + "action": IncreaseVerbosityAction, + "default": 0 +} options = [CONFIG_FILE, IDS, USERS, TERMS, DBS] @@ -179,7 +192,8 @@ def try_load_json(j): def parse_args(options): parser = argparse.ArgumentParser( - description="Twitter Tools: query twitter from the commandline" + description="Twitter Tools: query twitter from the commandline", + allow_abbrev=False ) def add_argument(o): From 19ca43868f37b2a6c14610786246795c6b5c4780 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:19:57 -0300 Subject: [PATCH 65/90] DB: generic getAuthor should raise an exception Signed-off-by: Niv Sardi --- DB/generic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DB/generic.py b/DB/generic.py index 9abd7f5..ea6ae99 100644 --- a/DB/generic.py +++ b/DB/generic.py @@ -3,3 +3,6 @@ class DB: def __init__(self): self.name = "Generic DB Driver" + + def getAuthor(self, status): + raise KeyError(f"{status} not found") From f9b7d2b2e43dfb945249b4450e23683bcd0037d9 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:21:01 -0300 Subject: [PATCH 66/90] DB: pynx, split out full text extractor, should probably go to utils Signed-off-by: Niv Sardi --- DB/pynx.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/DB/pynx.py b/DB/pynx.py index 90b3252..5003463 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -100,11 +100,13 @@ def _write_all(self): def close(self): self._write_all() - def saveTweet(self, url, status): + def extract_text(self, status): try: - text = status.extended_tweet.text + return status.extended_tweet.text except AttributeError: - text = status.text + return status.text + def saveTweet(self, url, status): + text = self.extract_text(status) add_tags(self.H, text) add_users(self.U, text, status) From b0f37392f70c68d746411580a099c18e31c327c7 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:21:24 -0300 Subject: [PATCH 67/90] DB: pynx, trivial saveAuthor Signed-off-by: Niv Sardi --- DB/pynx.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/DB/pynx.py b/DB/pynx.py index 5003463..fcbbc90 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -109,10 +109,13 @@ def saveTweet(self, url, status): text = self.extract_text(status) add_tags(self.H, text) - add_users(self.U, text, status) logging.info(f"H, {self.H.nodes()}") self._write_all() + def saveAuthor(self, status): + text = self.extract_text(status) + add_users(self.U, text, status) + self._write_all() if __name__ == "__main__": G = nx.Graph() From ecf95ecc548579a244b96b278045615b3bb4d77e Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:23:15 -0300 Subject: [PATCH 68/90] DB: sqlite, implement getAuthor and simplify saveAuthor Signed-off-by: Niv Sardi --- DB/sqlite.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index a8e66cc..26f7f12 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -22,7 +22,7 @@ def __init__(self, filename="twitter.db"): cur.execute( "CREATE TABLE IF NOT EXISTS Authors (Author VARCHAR(255) PRIMARY KEY, \ - Id INTEGER)" + Id INTEGER NOT NULL UNIQUE)" ) @@ -34,6 +34,12 @@ def getTweets(self): WHERE Deleted=0""" ) + def getAuthor(self, author): + cur = self.db.cursor() + return cur.execute( + """SELECT Id FROM Authors WHERE Author=?""", (author,) + ).fetchone()[0] + def _commit(self, query): cur = self.db.cursor() try: @@ -89,8 +95,8 @@ def saveTweet(self, url, status): """, (author, text, url, id_str, 0, 0)) def saveAuthor(self, status): - (author, aid) = (status.user.screen_name, status.user.id) + args = (status.user.screen_name, status.user.id) self.execute(""" - INSERT INTO Authors (Author, Id) - VALUES (?, ?) ON CONFLICT(Author) DO NOTHING - """, (author, aid)) + INSERT INTO Authors(Author, Id) + VALUES(?, ?) ON CONFLICT(Author) DO NOTHING + """, args) From 9ac3a00689e592a171fea821c67959fece5352cb Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:23:36 -0300 Subject: [PATCH 69/90] DB: pynx, make add_tags return tags Signed-off-by: Niv Sardi --- DB/pynx.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/DB/pynx.py b/DB/pynx.py index fcbbc90..cbac7e9 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -30,15 +30,14 @@ def add_edge(G, n, p): def add_tags(G, text): tags = hashre.findall(text) - while len(tags) > 1: - t = normalize(tags.pop()) - add_node(G, t) - for u in tags: + for i, t in enumerate(tags): + n = normalize(t) + add_node(G, n) + for u in tags[i:]: u = normalize(u) add_node(G, u) add_edge(G, t, u) - return G - + return tags def add_users(G, text, status): users = set(userre.findall(text)) From 8e2e0bf9a8d88426695b57f430ecf0f312f55cdc Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:24:43 -0300 Subject: [PATCH 70/90] DB/config: allow for passing instanciated DBs to config if that happens, don't send in a multi Signed-off-by: Niv Sardi --- DB/multi.py | 8 +++++++- config.py | 9 +++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/DB/multi.py b/DB/multi.py index 3a42958..ad65cf3 100644 --- a/DB/multi.py +++ b/DB/multi.py @@ -4,7 +4,13 @@ class MultiDriver(generic.DB): def __init__(self, databases): self.name = "Multiple Dispatch DB Driver" - self.dbs = databases + + if type(databases) == list: + self.dbs = databases + else: + self.dbs = [databases] + + logging.debug(self.dbs) def __getattribute__(self, name): try: diff --git a/config.py b/config.py index d83a7cc..033a94d 100644 --- a/config.py +++ b/config.py @@ -60,10 +60,11 @@ def load_db_driver(arg): db_driver, filename = None, None try: db_driver, filename = arg.split(":") - except ValueError: + except: db_driver = arg filename = None finally: + print(db_driver, filename) try: M = importlib.import_module(f"DB.{db_driver}") except: @@ -209,12 +210,12 @@ def add_argument(o): add_argument(last) opts = parser.parse_args() - if DBS in options: + if DBS in options and type(DBS["default"]) == str: if opts.db == DBS["default"]: - opts.db = MultiDriver([load_db_driver(DBS["default"])]) + opts.db = MultiDriver(load_db_driver(DBS["default"])) else: opts.db = MultiDriver(opts.db) - + return opts if __name__ == "__main__": From 1342574b8b58ba49f9da0d5555a43eeae634819b Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:25:05 -0300 Subject: [PATCH 71/90] streaming: add debug option Signed-off-by: Niv Sardi --- streaming.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streaming.py b/streaming.py index 4bc2931..e721852 100644 --- a/streaming.py +++ b/streaming.py @@ -43,7 +43,7 @@ def run(): """ main entry point """ - opts = c.parse_args([c.CONFIG_FILE, c.IDS, c.USERS, c.DBS, c.TERMS]) + opts = c.parse_args([c.CONFIG_FILE, c.DEBUG, c.IDS, c.USERS, c.DBS, c.TERMS]) database = opts.db config = opts.config[0] From 74a718b9bcd01cfdab7d9cd9b74485f370c559b8 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:25:41 -0300 Subject: [PATCH 72/90] get_user_ids: cache ids in SQLITE by default Signed-off-by: Niv Sardi --- config.py | 2 +- get_user_ids.py | 40 ++++++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/config.py b/config.py index 033a94d..65a0256 100644 --- a/config.py +++ b/config.py @@ -106,7 +106,7 @@ class FetchUsersAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): old_ids = getattr(namespace, self.dest) or () - ids = fetch(namespace.config[0], flatten([v.split(',') for v in values])) + ids = fetch(namespace.config[0], flatten([v.split(',') for v in values]), namespace.db) ids.extend(old_ids) setattr(namespace, self.dest, ids) diff --git a/get_user_ids.py b/get_user_ids.py index a772a16..446a6aa 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -12,29 +12,49 @@ def twitter_login(config): return tweepy.API(auth) -def fetch(config, users): +class MiniUser(): + def __init__(self, author, id): + self.screen_name = author + self.id = id + +class MiniStatus(): + def __init__(self, author, id): + self.user = MiniUser(author, id) + +def fetch(config, users, db): logging.info(f"looking for: {users}") - api = twitter_login(config) + api = None handles = [] for screen_name in users: try: - u = api.get_user(screen_name) - logging.info(f"""{screen_name} -> {u._json["id"]}""") - handles.append(str(u._json["id"])) - except Exception as e: - logging.error(f"{e}, {config}") + u = db.getAuthor(screen_name) + + except KeyError: + logging.warn(f"{screen_name} not found in DB {db}") + try: + if not api: api = twitter_login(config) + u = api.get_user(screen_name)._json['id'] + db.saveAuthor(MiniStatus(screen_name, u)) + except Exception as e: + logging.error(f"{e}, {config}") + break + + handles.append(str(u)) + logging.info(f"{screen_name} -> {u}") return handles if __name__ == "__main__": - opts = c.parse_args([c.CONFIG_FILE, c.CSV_FILE, c.USERS]) - + DB_CONFIG = c.DBS + DB_CONFIG["default"] = c.load_db_driver("sqlite") + + opts = c.parse_args([DB_CONFIG, c.DEBUG, c.CONFIG_FILE, c.CSV_FILE, c.USERS, ]) config = opts.config[0] ids = None try: ids = opts.ids except KeyError: - ids = fetch(config, opts.csv) + ids = fetch(config, opts.csv, opts.db) print("\n".join(ids) + "\n") From 9d9c017727cfbdd3860cb828687f5a16e38e6bd4 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:42:17 -0300 Subject: [PATCH 73/90] config: remove debug code Signed-off-by: Niv Sardi --- config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/config.py b/config.py index 65a0256..e6f4d66 100644 --- a/config.py +++ b/config.py @@ -64,7 +64,6 @@ def load_db_driver(arg): db_driver = arg filename = None finally: - print(db_driver, filename) try: M = importlib.import_module(f"DB.{db_driver}") except: From 0288e3136ec8b1b38891e807c7ab42f778bf7d9e Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 16:42:52 -0300 Subject: [PATCH 74/90] get_user_ids: remplace class nonsense with SimpleNamespace Signed-off-by: Niv Sardi --- get_user_ids.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/get_user_ids.py b/get_user_ids.py index 446a6aa..f06f9e8 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -12,14 +12,8 @@ def twitter_login(config): return tweepy.API(auth) -class MiniUser(): - def __init__(self, author, id): - self.screen_name = author - self.id = id - -class MiniStatus(): - def __init__(self, author, id): - self.user = MiniUser(author, id) +def make_status(name, id): + return SimpleNamespace(user=SimpleNamespace(screen_name = name, id = id)) def fetch(config, users, db): logging.info(f"looking for: {users}") @@ -35,7 +29,7 @@ def fetch(config, users, db): try: if not api: api = twitter_login(config) u = api.get_user(screen_name)._json['id'] - db.saveAuthor(MiniStatus(screen_name, u)) + db.saveAuthor(make_status(screen_name, u)) except Exception as e: logging.error(f"{e}, {config}") break From 6f67a71b2d21d42dc9275db40d286203a623c5aa Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 17:12:47 -0300 Subject: [PATCH 75/90] DB/generic: back to nothing Signed-off-by: Niv Sardi --- DB/generic.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/DB/generic.py b/DB/generic.py index ea6ae99..9abd7f5 100644 --- a/DB/generic.py +++ b/DB/generic.py @@ -3,6 +3,3 @@ class DB: def __init__(self): self.name = "Generic DB Driver" - - def getAuthor(self, status): - raise KeyError(f"{status} not found") From 66602a073ebc02704c2d2275629f1461ddecadbf Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 17:13:15 -0300 Subject: [PATCH 76/90] DB/sqlite: properly handle no results in getAuthor Signed-off-by: Niv Sardi --- DB/sqlite.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index 26f7f12..7579524 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -36,9 +36,12 @@ def getTweets(self): def getAuthor(self, author): cur = self.db.cursor() - return cur.execute( + r = cur.execute( """SELECT Id FROM Authors WHERE Author=?""", (author,) - ).fetchone()[0] + ).fetchone() + + if not r: raise KeyError(f"{author} not found") + return r[0] def _commit(self, query): cur = self.db.cursor() From ffb8d0d31bd431e1cd2f64ba7b0a0b073644de7d Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 17:13:45 -0300 Subject: [PATCH 77/90] config: cleaner db loading Signed-off-by: Niv Sardi --- config.py | 12 ++++++------ get_user_ids.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/config.py b/config.py index e6f4d66..65c14f0 100644 --- a/config.py +++ b/config.py @@ -82,7 +82,7 @@ class LoadDBDriverAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): old_dbs = getattr(namespace, self.dest) - if type(old_dbs) is str: + if not isinstance(old_dbs, list): old_dbs = () dbs = [load_db_driver(v) for v in values] @@ -196,6 +196,9 @@ def parse_args(options): allow_abbrev=False ) + if DBS in options: + DBS["default"] = load_db_driver(DBS["default"]) + def add_argument(o): flags = o.pop("flags") parser.add_argument(flags, **o) @@ -209,11 +212,8 @@ def add_argument(o): add_argument(last) opts = parser.parse_args() - if DBS in options and type(DBS["default"]) == str: - if opts.db == DBS["default"]: - opts.db = MultiDriver(load_db_driver(DBS["default"])) - else: - opts.db = MultiDriver(opts.db) + if DBS in options: + opts.db = MultiDriver(opts.db) return opts diff --git a/get_user_ids.py b/get_user_ids.py index f06f9e8..3d37522 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -40,7 +40,7 @@ def fetch(config, users, db): if __name__ == "__main__": DB_CONFIG = c.DBS - DB_CONFIG["default"] = c.load_db_driver("sqlite") + DB_CONFIG["default"] = "sqlite" opts = c.parse_args([DB_CONFIG, c.DEBUG, c.CONFIG_FILE, c.CSV_FILE, c.USERS, ]) config = opts.config[0] From baa5380df36e9c7cab576ffd76bd0de6f111d4e3 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 17:15:05 -0300 Subject: [PATCH 78/90] get_user_ids: bugfix, SimpleNamespace actually needs an import Signed-off-by: Niv Sardi --- get_user_ids.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/get_user_ids.py b/get_user_ids.py index 3d37522..d5b83db 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -4,6 +4,8 @@ import sys import logging +from types import SimpleNamespace + import config as c def twitter_login(config): From e950c64ffadc81556011df60971e4b6710545158 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Thu, 10 Sep 2020 17:15:35 -0300 Subject: [PATCH 79/90] get_user_ids: force sqlite db if the provided one doesn't support author operations Signed-off-by: Niv Sardi --- get_user_ids.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/get_user_ids.py b/get_user_ids.py index d5b83db..96bf5d0 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -18,7 +18,10 @@ def make_status(name, id): return SimpleNamespace(user=SimpleNamespace(screen_name = name, id = id)) def fetch(config, users, db): - logging.info(f"looking for: {users}") + if not (hasattr(db, 'getAuthor') and hasattr(db, 'saveAuthor')): + db = c.load_db_driver('sqlite') + + logging.info(f"looking for: {users} in {db}") api = None handles = [] @@ -26,8 +29,8 @@ def fetch(config, users, db): try: u = db.getAuthor(screen_name) - except KeyError: - logging.warn(f"{screen_name} not found in DB {db}") + except (KeyError, AttributeError) as e: + logging.warn(f"{screen_name} not found in DB {db} ({e})") try: if not api: api = twitter_login(config) u = api.get_user(screen_name)._json['id'] From bb1ee66bd1bde86db766f7d2fe30952c7ef264bd Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 05:30:41 -0300 Subject: [PATCH 80/90] rewrite: proper caching, and bulk fetch Signed-off-by: Niv Sardi --- get_user_ids.py | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/get_user_ids.py b/get_user_ids.py index 96bf5d0..ca62e8d 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -17,6 +17,8 @@ def twitter_login(config): def make_status(name, id): return SimpleNamespace(user=SimpleNamespace(screen_name = name, id = id)) +TWITTER_BATCH_LIMIT = 100 + def fetch(config, users, db): if not (hasattr(db, 'getAuthor') and hasattr(db, 'saveAuthor')): db = c.load_db_driver('sqlite') @@ -25,22 +27,39 @@ def fetch(config, users, db): api = None handles = [] + need_fetch = [] + + def add_sn(screen_name, i): + if i: handles.append((screen_name, i)) + db.saveAuthor(make_status(screen_name, i)) + for screen_name in users: try: - u = db.getAuthor(screen_name) - + i = db.getAuthor(screen_name) + if i: add_sn(screen_name, i) except (KeyError, AttributeError) as e: logging.warn(f"{screen_name} not found in DB {db} ({e})") - try: - if not api: api = twitter_login(config) - u = api.get_user(screen_name)._json['id'] - db.saveAuthor(make_status(screen_name, u)) - except Exception as e: - logging.error(f"{e}, {config}") - break + need_fetch.append(screen_name) + + while len(need_fetch): + if not api: api = twitter_login(config) + + batch = need_fetch[:TWITTER_BATCH_LIMIT] + need_fetch = need_fetch[TWITTER_BATCH_LIMIT:] + + try: + lu = api.lookup_users(user_ids = None, screen_names = batch, include_entities = False) + except Exception as e: + lu = [] - handles.append(str(u)) - logging.info(f"{screen_name} -> {u}") + for u in lu: + add_sn(u._json['screen_name'], u._json['id']) + batch.remove(u._json['screen_name']) + + for sn in batch: + add_sn(sn, None) + + logging.info(handles) return handles if __name__ == "__main__": @@ -55,5 +74,4 @@ def fetch(config, users, db): except KeyError: ids = fetch(config, opts.csv, opts.db) - print("\n".join(ids) + "\n") - + print(ids) From 75954c7b59a67703a3f3a9537d3098c0a61f8719 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 05:37:12 -0300 Subject: [PATCH 81/90] DB/sqlite: don't reuquire authors to be NOT NULL so we can store non-existing users Signed-off-by: Niv Sardi --- DB/sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DB/sqlite.py b/DB/sqlite.py index 7579524..76d3c0e 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -22,7 +22,7 @@ def __init__(self, filename="twitter.db"): cur.execute( "CREATE TABLE IF NOT EXISTS Authors (Author VARCHAR(255) PRIMARY KEY, \ - Id INTEGER NOT NULL UNIQUE)" + Id INTEGER UNIQUE)" ) From 318cb95ee1705888c65bf6ce247384c2aa397188 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 05:37:34 -0300 Subject: [PATCH 82/90] config: row and csv, better logic and logging Signed-off-by: Niv Sardi --- config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 65c14f0..faf8cb7 100644 --- a/config.py +++ b/config.py @@ -35,9 +35,11 @@ class LoadRowFileAction(argparse.Action): def __call__(self, parser, namespace, filename, option_string=None): ret = [] + logging.debug(f"opening {filename} as CSV") with open(filename) as f: for row in f: - ret.append(row) + s = row.rstrip() + if len(s): ret.append(s) setattr(namespace, self.dest, ret) @@ -48,6 +50,7 @@ class LoadCSVAction(argparse.Action): def __call__(self, parser, namespace, filename, option_string=None): ret = [] + logging.debug(f"opening {filename} as CSV") with open(filename, "rb") as csvfile: reader = csv.reader(csvfile, delimiter=",", quotechar="|") for row in reader: From 0e42e27e8bdccfb2b5bfc6578dd9b1ff8e9e3866 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 05:37:51 -0300 Subject: [PATCH 83/90] get_user_ids: actually process csv argument Signed-off-by: Niv Sardi --- get_user_ids.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/get_user_ids.py b/get_user_ids.py index ca62e8d..1655bb8 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -72,6 +72,9 @@ def add_sn(screen_name, i): try: ids = opts.ids except KeyError: - ids = fetch(config, opts.csv, opts.db) + ids = [] + + if opts.csv: + ids.extend(fetch(config, opts.csv, opts.db)) print(ids) From 07fedf1150393dda16cb54498115bafe7571a06c Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 06:00:39 -0300 Subject: [PATCH 84/90] get_user_ids: all lower and tsv output Signed-off-by: Niv Sardi --- get_user_ids.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/get_user_ids.py b/get_user_ids.py index 1655bb8..9d2ccfe 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -34,12 +34,13 @@ def add_sn(screen_name, i): db.saveAuthor(make_status(screen_name, i)) for screen_name in users: + sn = screen_name.lower() try: - i = db.getAuthor(screen_name) - if i: add_sn(screen_name, i) + i = db.getAuthor(sn) + if i: add_sn(sn, i) except (KeyError, AttributeError) as e: - logging.warn(f"{screen_name} not found in DB {db} ({e})") - need_fetch.append(screen_name) + logging.warn(f"{sn} not found in DB {db} ({e})") + need_fetch.append(sn) while len(need_fetch): if not api: api = twitter_login(config) @@ -47,14 +48,17 @@ def add_sn(screen_name, i): batch = need_fetch[:TWITTER_BATCH_LIMIT] need_fetch = need_fetch[TWITTER_BATCH_LIMIT:] + logging.debug(f"this batch is {len(batch)}, still need to fetch {len(need_fetch)}") + try: lu = api.lookup_users(user_ids = None, screen_names = batch, include_entities = False) except Exception as e: lu = [] for u in lu: - add_sn(u._json['screen_name'], u._json['id']) - batch.remove(u._json['screen_name']) + sn = u._json['screen_name'].lower() + add_sn(sn, u._json['id']) + batch.remove(sn) for sn in batch: add_sn(sn, None) @@ -77,4 +81,6 @@ def add_sn(screen_name, i): if opts.csv: ids.extend(fetch(config, opts.csv, opts.db)) - print(ids) + print("screen_name\tid") + for u, i in ids: + print(f"{u}\t{i}") From fa7ba78531f15c16fbbe85d810defd816592803d Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 06:01:02 -0300 Subject: [PATCH 85/90] add pydictor/get_user_ids example Signed-off-by: Niv Sardi --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index 06f5d5e..dbc47ce 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,31 @@ You might want to consider running all these with `cron` on a server. Just sayin Then uncomment line 2 and 34-40 in `save_to_db.py` +## finding similar users +when looking for bots you may want to look for a LOT of similar usernames, we got you covered ! + +first you'll need to generate a list of usernames, you can do so with any password dict tool, +we recomend you use https://github.com/LandGrey/pydictor + +and then pass it to `./get_user_ids.py -f` that will spit out a TSV of valid usernames and id +pairs, it's all cached so you can run it multiple times. + +example: + +``` shell +$ python3 pydictor.py --head alejandro --len 4 4 -base d -o usernames.csv + _ _ _ + _ __ _ _ __| (_) ___| |_ ___ _ __ + | '_ \| | | |/ _` | |/ __| __/ _ \| '__| + | |_) | |_| | (_| | | (__| || (_) | | + | .__/ \__, |\__,_|_|\___|\__\___/|_| + |_| |___/ 2.1.4.1#dev + +[+] A total of :10000 lines +[+] Store in :./results/blah.txt +[+] Cost :0.0529 seconds +$ python3 get_user_ids.py -f results/usernames.csv > valid_usernames.tsv +``` ## License [PDD/ Unlicense](http://choosealicense.com/licenses/unlicense/) From 47ec95fd716ff378636f1312c0206d7e3fb109df Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 14:52:44 -0300 Subject: [PATCH 86/90] config: FetchUsersAction: support for not having a db in config object Signed-off-by: Niv Sardi --- config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index faf8cb7..47774a7 100644 --- a/config.py +++ b/config.py @@ -107,8 +107,13 @@ class FetchUsersAction(argparse.Action): """ def __call__(self, parser, namespace, values, option_string=None): + try: + db = namespace.db + except AttributeError: + db = None + old_ids = getattr(namespace, self.dest) or () - ids = fetch(namespace.config[0], flatten([v.split(',') for v in values]), namespace.db) + ids = fetch(namespace.config[0], flatten([v.split(',') for v in values]), db) ids.extend(old_ids) setattr(namespace, self.dest, ids) From 388ebe0191949ee7eb37f193c2d7b9fe2243ba2c Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 14:54:38 -0300 Subject: [PATCH 87/90] config: make USERS add elements to 'ids' and add USERS_NOFETCH to bypass queries Signed-off-by: Niv Sardi --- config.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index 47774a7..8e65aef 100644 --- a/config.py +++ b/config.py @@ -115,7 +115,7 @@ def __call__(self, parser, namespace, values, option_string=None): old_ids = getattr(namespace, self.dest) or () ids = fetch(namespace.config[0], flatten([v.split(',') for v in values]), db) ids.extend(old_ids) - setattr(namespace, self.dest, ids) + setattr(namespace, 'ids', ids) class IncreaseVerbosityAction(argparse.Action): """ @@ -159,11 +159,18 @@ def try_load_json(j): } USERS = { "flags": "-u, --users", - "dest": "ids", + "dest": "users", "nargs": "*", "help": "twitter usernames, as a comma-separated list", "action": FetchUsersAction, } +USERS_NOFETCH= { + "flags": "-u, --users", + "dest": "ids", + "nargs": "*", + "help": "twitter usernames, as a comma-separated list", + "action": ParseComasAction, +} TERMS = { "flags": "-t, --track", "dest": "track", From 8a6c7f5348b3f7350a7f45ac71b348b35673ad99 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 14:55:11 -0300 Subject: [PATCH 88/90] introduce utils.twitter_login and use it in streaming and get_user_ids Signed-off-by: Niv Sardi --- get_user_ids.py | 10 ++-------- streaming.py | 13 ++----------- utils.py | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 19 deletions(-) create mode 100644 utils.py diff --git a/get_user_ids.py b/get_user_ids.py index 9d2ccfe..ee83498 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -1,4 +1,3 @@ -import tweepy import time import csv import sys @@ -7,12 +6,7 @@ from types import SimpleNamespace import config as c - -def twitter_login(config): - auth = tweepy.OAuthHandler(config["consumer_key"], config["consumer_secret"]) - auth.set_access_token(config["access_token"], config["access_token_secret"]) - - return tweepy.API(auth) +import utils def make_status(name, id): return SimpleNamespace(user=SimpleNamespace(screen_name = name, id = id)) @@ -43,7 +37,7 @@ def add_sn(screen_name, i): need_fetch.append(sn) while len(need_fetch): - if not api: api = twitter_login(config) + if not api: api = utils.twitter_login(config) batch = need_fetch[:TWITTER_BATCH_LIMIT] need_fetch = need_fetch[TWITTER_BATCH_LIMIT:] diff --git a/streaming.py b/streaming.py index e721852..d900aae 100644 --- a/streaming.py +++ b/streaming.py @@ -11,6 +11,7 @@ from tweepy.streaming import StreamListener import config as c +import utils class StdOutListener(StreamListener): """ @@ -54,17 +55,7 @@ def run(): } listener = StdOutListener(database) - - auth = tweepy.OAuthHandler(config["consumer_key"], config["consumer_secret"]) - auth.set_access_token(config["access_token"], config["access_token_secret"]) - - api = tweepy.API(auth) - # test authentication - try: - api.verify_credentials() - logging.info("authentification OK") - except: - logging.error("Error during authentication") + api = utils.twitter_login(conf) def signal_handler(*argv, **argh): database.close() diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..b9e0b37 --- /dev/null +++ b/utils.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +import tweepy +import logging + +def twitter_login(config): + auth = tweepy.OAuthHandler(config["consumer_key"], config["consumer_secret"]) + auth.set_access_token(config["access_token"], config["access_token_secret"]) + + api = tweepy.API(auth) + + # test authentication + try: + api.verify_credentials() + logging.info("authentification OK") + return api + except: + logging.error("Error during authentication") + return None From 869e1be33cacf6272ec4782f184579510ededdf1 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Sat, 12 Sep 2020 14:56:32 -0300 Subject: [PATCH 89/90] implement blocking, supports CSV files Signed-off-by: Niv Sardi --- README.md | 4 ++++ block.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 block.py diff --git a/README.md b/README.md index dbc47ce..dba567a 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,10 @@ You might want to consider running all these with `cron` on a server. Just sayin Then uncomment line 2 and 34-40 in `save_to_db.py` +## blocking massive amount of users +You can use the `block.py` tool to block users massively. +the `-f` flag allows to pass a CSV file + ## finding similar users when looking for bots you may want to look for a LOT of similar usernames, we got you covered ! diff --git a/block.py b/block.py new file mode 100644 index 0000000..25db4a2 --- /dev/null +++ b/block.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 + +import logging +import config as c +import utils + +def run(): + """ + main entry point + """ + + UNBLOCK = { + "flags": "-U, --unblock", + "dest": "unblock", + "help": "unblock operation", + "action": "count", + "default": 0 + } + + opts = c.parse_args([c.CONFIG_FILE, c.DEBUG, UNBLOCK, c.CSV_FILE, c.IDS, c.USERS]) + config = opts.config[0] + + if not len(opts.ids): + return logging.error("need to provide at least one id") + + api = utils.twitter_login(config) + + print(opts.unblock) + if opts.unblock: + op = api.destroy_block + action = "unblocked" + else: + op = api.create_block + action = "blocked" + + for u, i in opts.ids: + try: + op(user_id=i) + except Exception as e: + logging.error(f"{op}({i}) -> [{u}] failed with error {e}") + + logging.info(f"all done, {action} {len(opts.ids)}") + +if __name__ == "__main__": + run() From 514a8ff92a34b8fe29d6bc7b0f086a5799d99c16 Mon Sep 17 00:00:00 2001 From: Niv Sardi Date: Mon, 14 Sep 2020 05:42:30 -0300 Subject: [PATCH 90/90] Rewrite for testing we introduce tests, and change database format to acomodate it this is a huge update but it breaks down somehow like this: - introduce tests for db utils and drivers, use pytest, iterate automatically - pynx: properly set node attributes and introduce tweets graph (still empty) - sqlite: rewrite, more columns, more data, migrations - tsv: reorganize - DBs: introduce open, close, and _WIPE methods to all (minus mysql) Signed-off-by: Niv Sardi --- DB/generic.py | 6 ++ DB/multi.py | 7 +- DB/pynx.py | 59 +++++++++++------ DB/sqlite.py | 149 +++++++++++++++++++++++++++---------------- DB/tsv.py | 29 +++++---- DB/utils.py | 28 ++++++++ config.py | 4 +- get_user_ids.py | 14 ++-- got.py | 47 ++++++++++++++ requirements.txt | 2 + streaming.py | 12 +++- tests/conftest.py | 12 ++++ tests/db.py | 15 +++++ tests/test_db.py | 158 ++++++++++++++++++++++++++++++++++++++++++++++ utils.py | 2 + 15 files changed, 441 insertions(+), 103 deletions(-) create mode 100644 DB/utils.py create mode 100644 got.py create mode 100644 tests/conftest.py create mode 100644 tests/db.py create mode 100644 tests/test_db.py diff --git a/DB/generic.py b/DB/generic.py index 9abd7f5..5074451 100644 --- a/DB/generic.py +++ b/DB/generic.py @@ -1,5 +1,11 @@ import logging +import os class DB: def __init__(self): self.name = "Generic DB Driver" + + def _WIPE(self): + self.close() + os.remove(self.filename) + self.open() diff --git a/DB/multi.py b/DB/multi.py index ad65cf3..c6b6afd 100644 --- a/DB/multi.py +++ b/DB/multi.py @@ -1,8 +1,8 @@ from . import generic import logging -class MultiDriver(generic.DB): - def __init__(self, databases): +class Driver(generic.DB): + def __init__(self, databases = []): self.name = "Multiple Dispatch DB Driver" if type(databases) == list: @@ -33,3 +33,6 @@ def wrapper(*args, **kwargs): def getTweets(self): return self.dbs[0].getTweets() + + def _WIPE(self): + return [d._WIPE() for d in self.dbs] diff --git a/DB/pynx.py b/DB/pynx.py index cbac7e9..83853bb 100644 --- a/DB/pynx.py +++ b/DB/pynx.py @@ -3,8 +3,10 @@ import logging import json import re +import os from . import generic +from . import utils hashre = re.compile(r"(#\w+)") userre = re.compile(r"(@\w+)") @@ -58,12 +60,20 @@ def __init__(self, filename="graph.gexf"): generic.DB.__init__(self) self.name = "NetworkX DB Driver" - self.filename = filename + self.type = filename.split(".")[-1] or "gexf" + if self.type == 'pynx': # this is for test handeling + self.type = "gexf" + filename.replace('pynx', 'gexf') + + self.filename = filename - self._user_graph = "user-%s" % filename - self._hash_graph = "hash-%s" % filename - self._twit_graph = "twit-%s" % filename + self.open() + + def open(self): + self._user_graph = "user-%s" % self.filename + self._hash_graph = "hash-%s" % self.filename + self._twit_graph = "twit-%s" % self.filename self._write = getattr(nx, "write_%s" % self.type) self._read = getattr(nx, "read_%s" % self.type) @@ -74,6 +84,15 @@ def __init__(self, filename="graph.gexf"): logging.info(f"graphs opened {self.U.nodes()} {self.H.nodes()} {self.T.nodes()}") + def _WIPE(self): + self.close() + + os.remove(self._user_graph) + os.remove(self._hash_graph) + os.remove(self._twit_graph) + + self.open() + def _open_graph(self, filename): try: return self._read(filename) @@ -83,37 +102,35 @@ def _open_graph(self, filename): def getTweets(self): return [n for n in self.U.nodes()] +# def getAuthor(self, screen_name): +# u = normalize("@%s" % screen_name) +# return self.U.neighbors(u) + def markDeleted(self, id): - self.U.nodes[id]["deleted"] = True - - def writeSuccess(self, path): - logging.warning("NOT IMPLEMENTED") - - def getLogs(self): - logging.warning("NOT IMPLEMENTED") + nx.set_node_attributes(self.U, {id: {"deleted": True}}) def _write_all(self): self._write(self.H, self._hash_graph) self._write(self.U, self._user_graph) + self._write(self.T, self._twit_graph) def close(self): self._write_all() - def extract_text(self, status): - try: - return status.extended_tweet.text - except AttributeError: - return status.text - def saveTweet(self, url, status): - text = self.extract_text(status) + def saveTweet(self, status): + text = utils.extract_text(status) add_tags(self.H, text) + add_users(self.U, text, status) + logging.info(f"H, {self.H.nodes()}") self._write_all() - def saveAuthor(self, status): - text = self.extract_text(status) - add_users(self.U, text, status) + def saveAuthor(self, user): + u = normalize("@%s" % user.screen_name) + add_node(self.U, u) + nx.set_node_attributes(self.U, {u: {'id': user.id, 'created_at': user.created_at.isoformat()}}) + self._write_all() if __name__ == "__main__": diff --git a/DB/sqlite.py b/DB/sqlite.py index 76d3c0e..cda94be 100644 --- a/DB/sqlite.py +++ b/DB/sqlite.py @@ -1,85 +1,118 @@ import sqlite3 +import json import logging +import sys +from . import utils from . import generic +VERSION = 1 + +def migrate_0_1(db): + db.execute( + """CREATE TABLE IF NOT EXISTS tweets (id INTEGER PRIMARY KEY, \ + screen_name VARCHAR(255), \ + text VARCHAR(1024), \ + date DATE, \ + link VARCHAR(255), \ + directed_to VARCHAR(255), \ + replies INTEGER, \ + retweets INTEGER, \ + favorites INTEGER, \ + geo VARCHAR(255), \ + mentions VARCHAR(1024), \ + hashtags VARCHAR(1024), \ + Screenshot BOOLEAN, \ + Deleted BOOLEAN)""") + + db.execute( + """CREATE TABLE IF NOT EXISTS authors (screen_name VARCHAR(255) PRIMARY KEY, \ + id INTEGER UNIQUE, \ + date DATE) + """) + + db.execute("PRAGMA user_version = 1") + +MIGRATIONS = [ + migrate_0_1 +] + class Driver(generic.DB): def __init__(self, filename="twitter.db"): generic.DB.__init__(self) - self.db = sqlite3.connect(filename) - - cur = self.db.cursor() + self.filename = filename + self.open() + + def open(self): + self.db = sqlite3.connect(self.filename) - cur.execute( - "CREATE TABLE IF NOT EXISTS Tweets (Id INTEGER PRIMARY KEY, \ - Author VARCHAR(255), \ - Text VARCHAR(255), \ - Url VARCHAR(255), \ - Tweet_Id VARCHAR(255), \ - Screenshot INTEGER, \ - Deleted INTEGER)" - ) + self._migrate() - cur.execute( - "CREATE TABLE IF NOT EXISTS Authors (Author VARCHAR(255) PRIMARY KEY, \ - Id INTEGER UNIQUE)" - ) + def close(self): + self.db.close() + + def _migrate(self): + cur = self.db.cursor() + user_version = cur.execute( + """ + PRAGMA user_version + """ + ).fetchone() + version = user_version[0] if user_version else 0 + if version != VERSION: + for i, m in enumerate(MIGRATIONS[version:VERSION]): + logging.info(f"running migration {i} -> {i+1} ({m})") + try: + m(self) + except Exception as e: + logging.critical(f"error {e} in migration {m}") + sys.exit(-2) def getTweets(self): cur = self.db.cursor() return cur.execute( """SELECT * \ - FROM Tweets \ + FROM tweets \ WHERE Deleted=0""" ) - def getAuthor(self, author): + def getAuthor(self, screen_name): cur = self.db.cursor() r = cur.execute( - """SELECT Id FROM Authors WHERE Author=?""", (author,) + """SELECT * FROM authors WHERE screen_name=?""", (screen_name,) ).fetchone() - if not r: raise KeyError(f"{author} not found") + if not r: raise KeyError(f"{screen_name} not found") return r[0] - def _commit(self, query): - cur = self.db.cursor() - try: - cur.execute(query) - self.db.commit() - except sqlite3.Error as e: - logging.error(e) - return False - return True - - def writeSuccess(self, path): - q = """UPDATE Tweets \ + def writeSuccess(self, id): + q = """UPDATE tweets \ SET Screenshot=1 \ - WHERE Tweet_Id='?'""" - if self._commit(q, path): - logging.info(f"Screenshot OK. Tweet id {path}") + WHERE id=?""" + if self.execute(q, (id,)): + logging.info(f"Screenshot OK. Tweet id {id}") return True - logging.warning(f"{path} not saved to database") + logging.warning(f"{id} not marked as success") return False - def markDeleted(self, path): - q = """UPDATE Tweets \ + def markDeleted(self, id): + q = """UPDATE tweets \ SET Deleted=1 \ - WHERE Tweet_Id='?'""" - if self._commit(q, path): - logging.info(f"Tweet marked as deleted {path}") + WHERE id=?""" + if self.execute(q, (id,)): + logging.info(f"Tweet marked as deleted {id}") return True - logging.warning(f"{path} not saved to database") + logging.warning(f"{id} not marked as deleted") return False def getLogs(self,): cur = self.db.cursor() return cur.execute( - "SELECT Url, Tweet_Id FROM Tweets WHERE Screenshot=0 AND Deleted=0 " + "SELECT link, id FROM tweets WHERE Screenshot=0 AND Deleted=0 " ) - def execute(self, q, args): + def execute(self, q, args = []): cur = self.db.cursor() try: cur.execute(q, args) @@ -88,18 +121,26 @@ def execute(self, q, args): logging.error(e, q, args) self.db.rollback() logging.error("ERROR writing database") + return False + return True - def saveTweet(self, url, status): - (author, text, id_str) = (status.user.screen_name, status.text, status.id_str) + def saveTweet(self, status): + text = utils.extract_text(status) + date = status.created_at + if type(date) == str: + date = utils.make_date(status.created_at) + self.execute(""" -INSERT INTO Tweets(Author, Text, Url, Tweet_Id, Screenshot, Deleted) \ - VALUES (?, ?, ?, ?, ?, ?) - """, (author, text, url, id_str, 0, 0)) + INSERT INTO tweets(id, screen_name, text, date, link, directed_to, replies, geo, mentions, hashtags, Screenshot, Deleted) \ + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, 0) + """, (status.id, status.user.screen_name, text, date, status.link, + status.in_reply_to_screen_name, status.replies_count, status.geo, + json.dumps(status.entities.user_mentions), json.dumps(status.entities.hashtags))) - def saveAuthor(self, status): - args = (status.user.screen_name, status.user.id) + def saveAuthor(self, user): + args = (user.screen_name, user.id, user.created_at) self.execute(""" - INSERT INTO Authors(Author, Id) - VALUES(?, ?) ON CONFLICT(Author) DO NOTHING - """, args) + INSERT INTO authors(screen_name, id, date) + VALUES(?, ?, ?) ON CONFLICT(screen_name) DO NOTHING + """, args) diff --git a/DB/tsv.py b/DB/tsv.py index 2b21772..5c5c04f 100644 --- a/DB/tsv.py +++ b/DB/tsv.py @@ -3,6 +3,7 @@ import logging from . import generic +from . import utils class Driver(generic.DB): def __init__(self, filename=sys.stdout): @@ -10,25 +11,29 @@ def __init__(self, filename=sys.stdout): self.name = "Simplest TSV driver" - if type(filename) is str: - exists = os.path.exists(filename) - self.file = open(filename, 'a') + self.filename = filename + self.open() + + def close(self): + self.file.close() + + def open(self): + if type(self.filename) is str: + exists = os.path.exists(self.filename) + self.file = open(self.filename, 'a') if not exists: self.file.write("id\tauthor\ttext\turl") else: - self.file = filename + self.file = self.filename self.file.write("id\tauthor\ttext\turl") - def saveTweet(self, url, status): - try: - text = status.extended_tweet.text - except AttributeError: - text = status.text + def saveTweet(self, status): + text = utils.extract_text(status) self.file.write("\t".join(( - status.id_str, + str(status.id), status.user.screen_name, - status.text.replace("\n", "\\n").replace("\t", "\\t"), - url + text.replace("\n", "\\n").replace("\t", "\\t"), + status.link ))) diff --git a/DB/utils.py b/DB/utils.py new file mode 100644 index 0000000..d16525f --- /dev/null +++ b/DB/utils.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +from types import SimpleNamespace +from datetime import datetime + +def make_date(date): + #"Sun Sep 13 14:21:23 +0000 2020" + if date: return datetime.strptime(date, '%a %b %d %H:%M:%S +0000 %Y') + return date + +def make_user(username, id, date): + return SimpleNamespace(screen_name = username, id = id, created_at = make_date(date)) + +def make_status(username, id, user_id, user_date=None, + text=None, date=None, link=None, to=None, + replies=0, retweets=0, favorites=0, + geo=None, mentions=[], hashtags=[]): + user = make_user(username, user_id, user_date) + entities = SimpleNamespace(hashtags=hashtags, user_mentions=mentions) + return SimpleNamespace(id=id, user=user, entities=entities, + geo=geo, text=text, created_at=make_date(date), + in_reply_to_screen_name=to, link=link, + replies_count=replies, favorite_count=favorites, retweet_count=retweets) + +def extract_text(status): + try: + return status.extended_tweet.text + except AttributeError: + return status.text diff --git a/config.py b/config.py index 8e65aef..0df0817 100644 --- a/config.py +++ b/config.py @@ -10,7 +10,7 @@ from get_user_ids import fetch -from DB.multi import MultiDriver +from DB.multi import Driver as MultiDriver LOGGING_FORMAT = '%(asctime)s - %(pathname)s:%(lineno)s:%(funcName)s()\n - %(levelname)s - %(message)s' coloredlogs.install(fmt=LOGGING_FORMAT) @@ -111,7 +111,7 @@ def __call__(self, parser, namespace, values, option_string=None): db = namespace.db except AttributeError: db = None - + old_ids = getattr(namespace, self.dest) or () ids = fetch(namespace.config[0], flatten([v.split(',') for v in values]), db) ids.extend(old_ids) diff --git a/get_user_ids.py b/get_user_ids.py index ee83498..ba1cc37 100644 --- a/get_user_ids.py +++ b/get_user_ids.py @@ -3,13 +3,9 @@ import sys import logging -from types import SimpleNamespace - import config as c import utils - -def make_status(name, id): - return SimpleNamespace(user=SimpleNamespace(screen_name = name, id = id)) +from DB import utils as db_utils TWITTER_BATCH_LIMIT = 100 @@ -23,15 +19,15 @@ def fetch(config, users, db): handles = [] need_fetch = [] - def add_sn(screen_name, i): + def add_sn(screen_name, i, date): if i: handles.append((screen_name, i)) - db.saveAuthor(make_status(screen_name, i)) + db.saveAuthor(db_utils.make_user(screen_name, i, date)) for screen_name in users: sn = screen_name.lower() try: i = db.getAuthor(sn) - if i: add_sn(sn, i) + if i: handles.append(i) except (KeyError, AttributeError) as e: logging.warn(f"{sn} not found in DB {db} ({e})") need_fetch.append(sn) @@ -51,7 +47,7 @@ def add_sn(screen_name, i): for u in lu: sn = u._json['screen_name'].lower() - add_sn(sn, u._json['id']) + add_sn(sn, u._json['id'], u._json['created_at']) batch.remove(sn) for sn in batch: diff --git a/got.py b/got.py new file mode 100644 index 0000000..6700ea3 --- /dev/null +++ b/got.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +import logging +import GetOldTweets3 as got +import config as c + +def run(): + """ + main entry point + """ + + GEO = { + "flags": "-g, --geo", + "dest": "geo", + "nargs": "*", + "help": "lookup for tweets near term", + } + + WITHIN = { + "flags": "-w, --within", + "dest": "within", + "nargs": "*", + "help": "radius of the geo query", + } + + opts = c.parse_args([c.CONFIG_FILE, c.DEBUG, GEO, WITHIN, c.USERS_NOFETCH, c.DBS, c.TERMS]) + + database = opts.db + config = opts.config[0] + + criteria = got.manager.TweetCriteria() + + if opts.ids and len(opts.ids): criteria.setUsername(opts.ids) + if opts.track and len(opts.track): criteria.setQuerySearch(" ".join(opts.track)) + if opts.geo: criteria.setNear(opts.geo) + if opts.within: criteria.setNear(opts.within) + + logging.info(criteria) + def handler(tweets): + for t in tweets: + print(t) + + got.manager.TweetManager.getTweets(criteria, handler) + + +if __name__ == "__main__": + run() diff --git a/requirements.txt b/requirements.txt index d59fb15..fc70930 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ needle pyinquirer clint coloredlogs +GetOldTweets3 +pytest diff --git a/streaming.py b/streaming.py index d900aae..54d0b53 100644 --- a/streaming.py +++ b/streaming.py @@ -49,13 +49,19 @@ def run(): database = opts.db config = opts.config[0] + print (opts.ids) + if opts.ids: + ids = [str(i[1]) for i in opts.ids] + else: + ids = None + stream_config = { - "follow": opts.ids or None, + "follow": ids, "track": opts.track or None } listener = StdOutListener(database) - api = utils.twitter_login(conf) + api = utils.twitter_login(config) def signal_handler(*argv, **argh): database.close() @@ -63,7 +69,7 @@ def signal_handler(*argv, **argh): signal.signal(signal.SIGINT, signal_handler) - stream = tweepy.Stream(auth = auth, listener = listener) + stream = tweepy.Stream(auth = api.auth, listener = listener) logging.info(f"STREAM: {stream_config}") while True: try: diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5aceb3b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 + +import pytest + +def pytest_addoption(parser): + parser.addoption( + "--dbblacklist", nargs="+", default=[], help="modules to ignore" + ) + +@pytest.fixture +def db_blacklist(request): + return request.config.getoption("--dbblacklist") diff --git a/tests/db.py b/tests/db.py new file mode 100644 index 0000000..98ad929 --- /dev/null +++ b/tests/db.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import pytest +import os + +drivers = [d for d in os.listdir('/../DB') if not d.match('(__init__|pycache|utils)')] + + +class TestDrivers(): + @pytest.mark.parametrize( + 'driver', drivers + ) + + def print_driver(self, driver): + print(driver) diff --git a/tests/test_db.py b/tests/test_db.py new file mode 100644 index 0000000..94e7cc3 --- /dev/null +++ b/tests/test_db.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 + +import pytest +import importlib +import sys +import os +import re + +if sys.version_info[0] < 3: + raise Exception("Python 2.x is not supported. Please upgrade to 3.x") + +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +BLACKLIST = ['__init__.py', '__pycache__', 'utils.py', 'generic.py', 'multi.py'] +drivers = [d.replace('.py', '') for d in os.listdir('DB') if not d in BLACKLIST] + +from DB import utils + +TEST_DATE = "Sun Sep 13 14:21:23 +0000 2020" +TEST_USER_DESCRIPTOR = { + 'username': 'test', + 'id': 1, + 'date': TEST_DATE, +} +TEST_STATUS_DESCRIPTOR = { + 'id': 0, + 'username': 'test', + 'user_id': 42, + 'user_date': TEST_DATE, + 'text': 'test tweet', + 'date': TEST_DATE, + 'link': 'http://twitter.com/test/id', + 'to': 'me', + 'replies': 42, + 'retweets': 0, + 'favorites': 23, + 'geo': 'Buenos Aires, Argentina', + 'mentions': ['me', 'anarchy', 'freedom'], + 'hashtags': ['#fun', '#test', '#twitter-tools'] +} +TEST_USER = utils.make_user(**TEST_USER_DESCRIPTOR) +TEST_STATUS = utils.make_status(**TEST_STATUS_DESCRIPTOR) + +class TestUtils(): + def test_make_date(self): + d = utils.make_date(TEST_DATE) + + print(d, dir(d)) + assert(d.year == 2020) + + def test_make_user(self): + u = utils.make_user('test', 0, TEST_DATE) + + print(u, dir(u)) + assert(hasattr(u, 'screen_name')) + assert(hasattr(u, 'id')) + assert(hasattr(u, 'created_at')) + + assert(u.created_at.year == 2020) + + def test_make_status(self): + s = utils.make_status(**TEST_STATUS_DESCRIPTOR) + + print(s, dir(s)) + assert(hasattr(s, 'user')) + assert(hasattr(s, 'entities')) + assert(hasattr(s, 'geo')) + assert(hasattr(s, 'text')) + assert(hasattr(s, 'created_at')) + assert(hasattr(s, 'in_reply_to_screen_name')) + assert(hasattr(s, 'link')) + assert(hasattr(s, 'replies_count')) + assert(hasattr(s, 'favorite_count')) + assert(hasattr(s, 'retweet_count')) + + assert(s.created_at.year == 2020) + + assert(hasattr(s.user, 'screen_name')) + assert(hasattr(s.user, 'id')) + assert(hasattr(s.user, 'created_at')) + + assert(s.user.created_at.year == 2020) + + assert(hasattr(s.entities, 'hashtags')) + assert('#fun' in s.entities.hashtags ) + + assert(hasattr(s.entities, 'user_mentions')) + assert('anarchy' in s.entities.user_mentions ) + +@pytest.mark.parametrize( + 'driver', drivers +) +class TestDrivers(): + def test_import_driver(self, driver, db_blacklist): + if driver in db_blacklist: pytest.skip("blacklisted") + return importlib.import_module(f"DB.{driver}") + + def test_load_driver(self, driver, db_blacklist): + M = self.test_import_driver(driver, db_blacklist) + return M.Driver(f"test_run.{driver}") + + def test__WIPE(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + + D._WIPE() + + def test_saveTweet(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + + D.saveTweet(TEST_STATUS) + + def test_saveAuthor(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + if not hasattr(D, 'saveAuthor'): + pytest.skip("Driver does not implement optional feature: saveAuthor") + + D.saveAuthor(TEST_USER) + + def test_getAuthor(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + if not hasattr(D, 'getAuthor'): + pytest.skip("Driver does not implement optional feature: getAuthor") + + D.getAuthor(TEST_USER.screen_name) + + def test_getTweets(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + if not hasattr(D, 'getTweets'): + pytest.skip("Driver does not implement optional feature: getTweets") + + D.getTweets() + + def test_writeSuccess(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + if not hasattr(D, 'writeSuccess'): + pytest.skip("Driver does not implement optional feature: writeSuccess") + + D.writeSuccess(0) + + def test_markDeleted(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + if not hasattr(D, 'markDeleted'): + pytest.skip("Driver does not implement optional feature: markDeleted") + + D.markDeleted(0) + + def test_getLogs(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + if not hasattr(D, 'getLogs'): + pytest.skip("Driver does not implement optional feature: getLogs") + + D.getLogs() + + def test__WIPE_again(self, driver, db_blacklist): + D = self.test_load_driver(driver, db_blacklist) + + D._WIPE() + diff --git a/utils.py b/utils.py index b9e0b37..a4a98c4 100644 --- a/utils.py +++ b/utils.py @@ -2,6 +2,8 @@ import tweepy import logging +from types import SimpleNamespace + def twitter_login(config): auth = tweepy.OAuthHandler(config["consumer_key"], config["consumer_secret"]) auth.set_access_token(config["access_token"], config["access_token_secret"])