Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions compose.yml

This file was deleted.

Binary file added python/DB_INFO.pkl
Binary file not shown.
Binary file modified python/__pycache__/app.cpython-38.pyc
Binary file not shown.
Binary file added python/__pycache__/dapp.cpython-38.pyc
Binary file not shown.
Binary file added python/__pycache__/dijkstra.cpython-38.pyc
Binary file not shown.
Binary file added python/__pycache__/tags.cpython-38.pyc
Binary file not shown.
Binary file added python/airflow/DB_INFO.pkl
Binary file not shown.
42 changes: 42 additions & 0 deletions python/airflow/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# 기본 이미지 선택
FROM python:3.7-slim-buster

ARG AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
ENV JAVA_HOME /usr/lib/jvm/default-java
ENV PATH $PATH:$JAVA_HOME/bin


# 작업 디렉토리 설정
WORKDIR ${AIRFLOW_USER_HOME}

RUN apt-get update && apt-get install -y default-jre default-jdk
# Set the JAVA_HOME environment variable

COPY requirements.txt ${AIRFLOW_USER_HOME}/dags/requirements.txt
RUN pip install --no-cache-dir -r ${AIRFLOW_USER_HOME}/dags/requirements.txt

RUN useradd -ms /bin/bash -d ${AIRFLOW_USER_HOME} airflow
RUN chmod -R 777 ${AIRFLOW_USER_HOME}

USER airflow
# 초기화 및 사용자 생성
RUN airflow db init && \
airflow users create \
--username admin \
--firstname dp \
--lastname airflow \
--role Admin \
--email admin@example.com \
--password admin

RUN rm -rf ${AIRFLOW_USER_HOME}/dags/*
# 필요한 파일들을 복사
COPY . ${AIRFLOW_USER_HOME}/dags

USER root
RUN chmod -R 777 ${AIRFLOW_USER_HOME}/dags/data

# 에어플로우 설정 파일 수정: dags_folder 변경

CMD ["bash", "-c", "airflow webserver --port 5000 -D & airflow scheduler"]
68 changes: 68 additions & 0 deletions python/airflow/air_dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import pickle
import os
import crawl
import modeling
from konlpy.tag import Okt



now = datetime.now()
month = str(now.month)
day = str(now.day)
date = month+'_'+day


current_dir = os.path.dirname(os.path.abspath(__file__))
info_path = os.path.join(current_dir, 'DB_INFO.pkl')
model_path = os.path.join(current_dir, f'data/model_{date}.pkl')
data_path = os.path.join(current_dir, f'data/data_{date}.csv')

with open(info_path, "rb") as file:
USER = pickle.load(file)
PASSWD = pickle.load(file)
HOST = pickle.load(file)
PORT = pickle.load(file)
NAME = pickle.load(file)


# Airflow DAG 정의
default_args = {
'owner': 'dp_service',
'depends_on_past': False,
'start_date': now,
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}

dag = DAG(
'dp_airflow',
default_args=default_args,
description='A simple DAG to crawl travel spots and insert into the database',
schedule_interval='@daily', # DAG를 실행할 주기 (매일 실행)
)

# DAG에 포함된 Task 정의
crawl_task = PythonOperator(
task_id='crawl_and_insert_to_db',
python_callable= crawl.crawl_and_insert_to_db,
dag=dag,
)

# DAG에 포함된 Task 정의
modeling_task = PythonOperator(
task_id='modeling',
python_callable= modeling.modeling,
dag=dag,
)
# Task 간의 의존성 설정
crawl_task >> modeling_task

if __name__ == "__main__":
dag.cli()
Loading