-
Notifications
You must be signed in to change notification settings - Fork 0
Feature/parsing #49
base: master
Are you sure you want to change the base?
Feature/parsing #49
Changes from all commits
fdd2287
ccf2e86
c23b4b2
b0866e7
8ef6557
94533bb
a654885
a2a62bb
24a9ae5
90a736f
2344a6c
e49068e
40a8f70
6e3d1be
65f85be
45468ba
b86798d
a1022e5
6c6e001
f72afa6
af3783a
cc5ba48
94c12f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -105,3 +105,6 @@ venv.bak/ | |
|
|
||
| # direnv | ||
| .envrc | ||
|
|
||
| # ideas | ||
| .vscode | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # HOW TO RUN FLASK APP | ||
| ```sh | ||
| export FLASK_APP=eriwan_podcast.py | ||
| export FLASK_DEBUG=1 | ||
|
|
||
| flask db init | ||
| flask db migrate | ||
| flask db upgrade | ||
|
|
||
| flask run | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| # Creates the application object as an instance of class Flask | ||
|
|
||
|
|
||
| from flask import Flask | ||
| from flask_migrate import Migrate | ||
| from flask_sqlalchemy import SQLAlchemy | ||
|
|
||
| from config import Config | ||
|
|
||
|
|
||
| app = Flask(__name__) | ||
|
|
||
| # using Config class from ./config.py | ||
| app.config.from_object(Config) | ||
|
|
||
| # database | ||
| db = SQLAlchemy(app) | ||
| migrate = Migrate(app, db) | ||
|
|
||
| # The routes module is imported at the bottom and not at the top of the script | ||
| # as it is always done. The bottom import is a workaround to circular imports, | ||
| # a common problem with Flask applications. | ||
| from app import models | ||
| import atexit | ||
| from app.parser import parse_anekdot | ||
| from apscheduler.schedulers.background import BackgroundScheduler | ||
|
|
||
| def scheduler_parser(): | ||
| ''' | ||
| Starts the parser. | ||
| ''' | ||
| parse_anekdot() | ||
|
|
||
| # Scheduler settings and start. | ||
| # Variables locate in config.Config | ||
|
|
||
| scheduler = BackgroundScheduler() | ||
| scheduler.add_job(func=scheduler_parser, trigger="interval", hours=Config.PARSE_TIME_HOURS) | ||
| scheduler.start() | ||
|
|
||
| # Shut down the scheduler when exiting the app | ||
| atexit.register(lambda: scheduler.shutdown()) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| # Here will be Flask Web Forms | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Useless comment |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| import os | ||
| from werkzeug.security import generate_password_hash, check_password_hash | ||
|
|
||
| from app import app, db | ||
|
|
||
|
|
||
| class User(db.Model): | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| username = db.Column(db.String(64), index=True, unique=True, nullable=False) | ||
| email = db.Column(db.String(120), index=True, unique=True, nullable=False) | ||
| password_hash = db.Column(db.String(128), nullable=False) | ||
| is_admin = db.Column(db.Boolean, default=False, nullable=False) | ||
|
|
||
| def __repr__(self): | ||
| return f'<User {self.username}>' | ||
|
|
||
| def set_password(self, password): | ||
| self.password_hash = generate_password_hash(password) | ||
|
|
||
| def check_password(self, password): | ||
| return check_password_hash(self.password_hash, password) | ||
|
|
||
|
|
||
| class Episode(db.Model): | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| name = db.Column(db.String(255), nullable=False) | ||
| user_id = db.Column(db.Integer, db.ForeignKey('user.id')) | ||
|
|
||
| def __repr__(self): | ||
| return f'<Episode id: {self.id}>, name: {self.name}' | ||
|
|
||
| def get_file_path(self): | ||
| ''' | ||
| Return wrapped in jingles file path | ||
| ''' | ||
| static_path = os.path.join(app.config.get('STATIC_ROOT'), 'episodes') | ||
| file_path = f'{static_path}/{self.id}.mp3' | ||
| if os.path.exists(file_path): | ||
| return file_path | ||
|
|
||
| # todo: add to celery task | ||
| def generate_wrapped_file(self, upload_file): | ||
| ''' | ||
| Return generate file with name of episode prefix from upload_file | ||
| ''' | ||
| pass | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :( |
||
|
|
||
|
|
||
| class Joke(db.Model): | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| joke_text = db.Column(db.Text, nullable=False) | ||
| user_id = db.Column(db.Integer, db.ForeignKey('user.id')) | ||
|
|
||
| def __repr__(self): | ||
| return f'<Joke id: {self.id}>, joke_text: {self.joke_text}' | ||
|
|
||
| def get_file_path(self): | ||
| ''' | ||
| Return wrapped in jingles file path | ||
| ''' | ||
| static_path = os.path.join(app.config.get('STATIC_ROOT'), 'jokes') | ||
| file_path = f'{static_path}/{self.id}.mp3' | ||
| if os.path.exists(file_path): | ||
| return file_path | ||
|
|
||
| def generate_base_file(self): | ||
| ''' | ||
| Return generate base audio file from joke_text | ||
| ''' | ||
| pass | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :( |
||
|
|
||
| # todo: add to celery task | ||
| def generate_wrapped_file(self, upload_file): | ||
| ''' | ||
| Return generate wrapped in jingles file from upload_file | ||
| ''' | ||
| pass | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :( |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| # Parsing jokes from anekdotitut.ru and adds | ||
| # them to the database. | ||
| import urllib.request | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check imports order |
||
| from urllib.parse import quote | ||
| from urllib.parse import unquote | ||
| from bs4 import BeautifulSoup | ||
| import re | ||
| from app.models import Joke | ||
| from app import db | ||
|
|
||
|
|
||
| def parse_anekdot(): | ||
| ''' | ||
| Simple func for collecting jokes from anekdotitut.ru | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't abbreviate or shorten words. Spelling is important. |
||
| and add them to DB. | ||
| ''' | ||
| jokes_out = [] | ||
| for i in range(1, 10): | ||
| url = url = 'https://anekdotitut.ru/pro_armyanskoe_radio' + str( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| i) + '.php' | ||
| html_doc = urllib.request.urlopen(url) | ||
| soup_doc = BeautifulSoup(html_doc, 'html.parser') | ||
| jokes = soup_doc.body(class_='noselect', id=re.compile('anekdot\d+')) | ||
| for joke in jokes: | ||
| # Check for entry in DB. | ||
| if not bool(Joke.query.filter_by(joke_text = joke.text).first()): | ||
| j = Joke(joke_text = joke.text, user_id = 999) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Magic number 999 |
||
| db.session.add(j) | ||
| db.session.commit() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No new line at end of file |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| {{ feed_blank }} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't forget about new line at end of file |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| # Here will be tests |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| # Config Classes | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Useless comments |
||
|
|
||
| import os | ||
| basedir = os.path.abspath(os.path.dirname(__file__)) | ||
|
|
||
|
|
||
| class Config(object): | ||
| SECRET_KEY = os.environ.get('SECRET_KEY') or 'Wo7GhuD2OWIv' | ||
| SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL') or \ | ||
| 'sqlite:///' + os.path.join(basedir, 'app.db') | ||
| SQLALCHEMY_TRACK_MODIFICATIONS = False | ||
|
|
||
| ADMINS = ['your-email@example.com'] | ||
|
|
||
| STATIC_ROOT = '/static/' | ||
|
|
||
| # Time period for parser | ||
| PARSE_TIME_HOURS = 40 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # The top-level that defines the Flask application instance | ||
| from app import app |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Generic single-database configuration. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You forget new line at end of file |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| # A generic, single database configuration. | ||
|
|
||
| [alembic] | ||
| # template used to generate migration files | ||
| # file_template = %%(rev)s_%%(slug)s | ||
|
|
||
| # set to 'true' to run the environment during | ||
| # the 'revision' command, regardless of autogenerate | ||
| # revision_environment = false | ||
|
|
||
|
|
||
| # Logging configuration | ||
| [loggers] | ||
| keys = root,sqlalchemy,alembic | ||
|
|
||
| [handlers] | ||
| keys = console | ||
|
|
||
| [formatters] | ||
| keys = generic | ||
|
|
||
| [logger_root] | ||
| level = WARN | ||
| handlers = console | ||
| qualname = | ||
|
|
||
| [logger_sqlalchemy] | ||
| level = WARN | ||
| handlers = | ||
| qualname = sqlalchemy.engine | ||
|
|
||
| [logger_alembic] | ||
| level = INFO | ||
| handlers = | ||
| qualname = alembic | ||
|
|
||
| [handler_console] | ||
| class = StreamHandler | ||
| args = (sys.stderr,) | ||
| level = NOTSET | ||
| formatter = generic | ||
|
|
||
| [formatter_generic] | ||
| format = %(levelname)-5.5s [%(name)s] %(message)s | ||
| datefmt = %H:%M:%S |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| from __future__ import with_statement | ||
|
|
||
| import logging | ||
| from logging.config import fileConfig | ||
|
|
||
| from sqlalchemy import engine_from_config | ||
| from sqlalchemy import pool | ||
|
|
||
| from alembic import context | ||
|
|
||
| # this is the Alembic Config object, which provides | ||
| # access to the values within the .ini file in use. | ||
| config = context.config | ||
|
|
||
| # Interpret the config file for Python logging. | ||
| # This line sets up loggers basically. | ||
| fileConfig(config.config_file_name) | ||
| logger = logging.getLogger('alembic.env') | ||
|
|
||
| # add your model's MetaData object here | ||
| # for 'autogenerate' support | ||
| # from myapp import mymodel | ||
| # target_metadata = mymodel.Base.metadata | ||
| from flask import current_app | ||
| config.set_main_option( | ||
| 'sqlalchemy.url', current_app.config.get( | ||
| 'SQLALCHEMY_DATABASE_URI').replace('%', '%%')) | ||
| target_metadata = current_app.extensions['migrate'].db.metadata | ||
|
|
||
| # other values from the config, defined by the needs of env.py, | ||
| # can be acquired: | ||
| # my_important_option = config.get_main_option("my_important_option") | ||
| # ... etc. | ||
|
|
||
|
|
||
| def run_migrations_offline(): | ||
| """Run migrations in 'offline' mode. | ||
|
|
||
| This configures the context with just a URL | ||
| and not an Engine, though an Engine is acceptable | ||
| here as well. By skipping the Engine creation | ||
| we don't even need a DBAPI to be available. | ||
|
|
||
| Calls to context.execute() here emit the given string to the | ||
| script output. | ||
|
|
||
| """ | ||
| url = config.get_main_option("sqlalchemy.url") | ||
| context.configure( | ||
| url=url, target_metadata=target_metadata, literal_binds=True | ||
| ) | ||
|
|
||
| with context.begin_transaction(): | ||
| context.run_migrations() | ||
|
|
||
|
|
||
| def run_migrations_online(): | ||
| """Run migrations in 'online' mode. | ||
|
|
||
| In this scenario we need to create an Engine | ||
| and associate a connection with the context. | ||
|
|
||
| """ | ||
|
|
||
| # this callback is used to prevent an auto-migration from being generated | ||
| # when there are no changes to the schema | ||
| # reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html | ||
| def process_revision_directives(context, revision, directives): | ||
| if getattr(config.cmd_opts, 'autogenerate', False): | ||
| script = directives[0] | ||
| if script.upgrade_ops.is_empty(): | ||
| directives[:] = [] | ||
| logger.info('No changes in schema detected.') | ||
|
|
||
| connectable = engine_from_config( | ||
| config.get_section(config.config_ini_section), | ||
| prefix='sqlalchemy.', | ||
| poolclass=pool.NullPool, | ||
| ) | ||
|
|
||
| with connectable.connect() as connection: | ||
| context.configure( | ||
| connection=connection, | ||
| target_metadata=target_metadata, | ||
| process_revision_directives=process_revision_directives, | ||
| **current_app.extensions['migrate'].configure_args | ||
| ) | ||
|
|
||
| with context.begin_transaction(): | ||
| context.run_migrations() | ||
|
|
||
|
|
||
| if context.is_offline_mode(): | ||
| run_migrations_offline() | ||
| else: | ||
| run_migrations_online() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| """${message} | ||
|
|
||
| Revision ID: ${up_revision} | ||
| Revises: ${down_revision | comma,n} | ||
| Create Date: ${create_date} | ||
|
|
||
| """ | ||
| from alembic import op | ||
| import sqlalchemy as sa | ||
| ${imports if imports else ""} | ||
|
|
||
| # revision identifiers, used by Alembic. | ||
| revision = ${repr(up_revision)} | ||
| down_revision = ${repr(down_revision)} | ||
| branch_labels = ${repr(branch_labels)} | ||
| depends_on = ${repr(depends_on)} | ||
|
|
||
|
|
||
| def upgrade(): | ||
| ${upgrades if upgrades else "pass"} | ||
|
|
||
|
|
||
| def downgrade(): | ||
| ${downgrades if downgrades else "pass"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really need lambda here?