Skip to content

Commit

Permalink
Merge pull request #1 from KakaoTech-14/saemi
Browse files Browse the repository at this point in the history
make ai db
  • Loading branch information
sminy85 authored Sep 10, 2024
2 parents 1c31bce + 2cac902 commit 4544b26
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 25 deletions.
113 changes: 88 additions & 25 deletions aiServer/stock/ai_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import List, Dict, Any
import datetime
from datetime import datetime
import FinanceDataReader as fdr
import joblib
import numpy as np
Expand All @@ -8,7 +8,9 @@
from bs4 import BeautifulSoup
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from .models import NewsData
import pickle
from .models import StockModelInfo

class Output:
def __init__(self, product_number, name, quantity):
Expand All @@ -35,9 +37,15 @@ def get_stock_order_ratio(amount: int, stocks: List[Dict[str, Any]]) -> List[Dic
stock_code = stock['productNumber']
today_price = get_today_open_price(stock_code)
stock['today_price'] = today_price


open_dif_data_list = prepare_stock_data(stocks)
print("open_dif_data_list" + str(open_dif_data_list))

# 모델 만드는 코드
for i in range(len(stocks)):
make_model(next((item[1] for item in open_dif_data_list if item[0] == stocks[i]['name']), None), stocks[i]['name'])

newslabel_match_openchange = predict_add_news_label(open_dif_data_list)
print("newslabel_match_openchange=" + str(newslabel_match_openchange))

Expand Down Expand Up @@ -84,34 +92,47 @@ def calculate_open_diff(df: pd.DataFrame) -> pd.DataFrame:
return data


def scrape_news_titles_and_dates(url_base: str, page_count: int):
def scrape_news_titles_and_dates():
title_list = []
date_list = []
url_base = 'https://www.sedaily.com/NewsList/GD05'

for i in range(1, page_count + 1):
# Get existing data from the database
existing_news = NewsData.objects.all()
existing_titles_dates = set((news.title, news.date) for news in existing_news)

for i in range(1, 5):
url = url_base if i == 1 else f'{url_base}/New/{i}'
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')

titles = soup.select('.article_tit')
dates = soup.select('.date')
rel_times = soup.select('.rel_time')

for title in titles:
title_list.append(title.get_text())
for rel_time in rel_times:
date_list.append(rel_time.get_text())
if title not in existing_titles_dates:
title_list.append(title.get_text())
for date in dates:
date_list.append(date.get_text())
if date not in existing_titles_dates:
date = datetime.strptime(date.get_text(), '%Y.%m.%d').date()
date_list.append(date)

for i in range(min(len(title_list), len(date_list))):
# 기존 데이터 확인
if not NewsData.objects.filter(title=title_list[i], date=date_list[i]).exists():
# 데이터가 없을 때만 저장
news_data = NewsData(title=title_list[i], date=date_list[i])
news_data.save()
else:
print(f"Duplicate entry found for title: {title_list[i]}, date: {date_list[i]}. Skipping insertion.")

return title_list, date_list

# news_url_base = 'https://www.sedaily.com/NewsList/GD05'

news_url_base = 'https://www.sedaily.com/NewsList/GD05'

title_list, date_list = scrape_news_titles_and_dates(news_url_base, 100)
makemodel_title_list, makemodel_date_list = scrape_news_titles_and_dates(news_url_base, 200)
# title_list, date_list = scrape_news_titles_and_dates(news_url_base, 100)
# makemodel_title_list, makemodel_date_list = scrape_news_titles_and_dates(news_url_base, 200)


def predict_add_news_label(open_dif_data_list: List[tuple]) -> Dict[str, pd.DataFrame]:
Expand All @@ -127,10 +148,17 @@ def add_news_label(data: pd.DataFrame, name: str) -> pd.DataFrame:
today_news_title = []
today_date_list = []

for i in range(len(title_list)):
if name in title_list[i]:
today_news_title.append(title_list[i])
today_date_list.append(date_list[i])
scrape_news_titles_and_dates()

# 데이터베이스의 데이터 중 절반만 쿼리
half_data = NewsData.objects.all()[:NewsData.objects.count() // 2]

# 절반 데이터를 출력하거나 처리
for data in half_data:
# print(data.title, data.date)
if name in data.title:
today_news_title.append(data.title)
today_date_list.append(data.date)

today_news_title_date = pd.DataFrame({'title': today_news_title, 'Date': today_date_list})
today_news_title_date['Date'] = pd.to_datetime(today_news_title_date['Date'])
Expand All @@ -147,16 +175,20 @@ def add_news_label(data: pd.DataFrame, name: str) -> pd.DataFrame:
today_news_title_date['title_label'] = today_data_title_predict

newslabel_match_openchange = pd.merge(today_news_title_date, data, on='Date')
print(newslabel_match_openchange)
return newslabel_match_openchange


def make_model(data: pd.DataFrame, name: str) -> LinearRegression:
today_news_title = []
today_date_list = []
for i in range(len(makemodel_title_list)):
if name in makemodel_title_list[i]:
today_news_title.append(makemodel_title_list[i])
today_date_list.append(makemodel_date_list[i])

all_news_data = NewsData.objects.all()

for data in all_news_data:
if name in data.title:
today_news_title.append(data.title)
today_date_list.append(data.date)

today_news_title_date = pd.DataFrame({'title': today_news_title, 'Date': today_date_list})
today_news_title_date['Date'] = pd.to_datetime(today_news_title_date['Date'])
Expand Down Expand Up @@ -201,7 +233,38 @@ def make_model(data: pd.DataFrame, name: str) -> LinearRegression:
model = LinearRegression()
model.fit(X_train, y_train)

return model
if StockModelInfo.objects.filter(stock_name=name).exists():
print(f"Model for stock {name} already exists. Skipping save.")
return

# 데이터베이스에 모델 저장
model_file_path = f'models/{name}_model.pkl'

# Save the model to a .pkl file
with open(model_file_path, 'wb') as f:
pickle.dump(model, f)

# Save the model information to the database
StockModelInfo.objects.update_or_create(
stock_name=name,
defaults={'model_file_path': model_file_path}
)


def load_stock_model(name: str):
try:
# Retrieve the model information from the database
stock_model_info = StockModelInfo.objects.get(stock_name=name)
model_file_path = stock_model_info.model_file_path

# Load the model from the file
with open(model_file_path, 'rb') as f:
model = pickle.load(f)

return model
except StockModelInfo.DoesNotExist:
print(f"No model found for stock: {name}")
return None


def predict_result(newslabel_match_openchange: Dict[str, pd.DataFrame], open_dif_data_list: List[tuple], stocks) -> Dict[str, Dict[str, float]]:
Expand All @@ -221,7 +284,7 @@ def predict_result(newslabel_match_openchange: Dict[str, pd.DataFrame], open_dif
continue

try:
model = make_model(raw_data, company_name)
model = load_stock_model(company_name)
predicted_price = predicted_tomorrow_openprice(data, company_name, model)
today_price = get_today_open_price(stock_code) # 오늘의 시가 가져오기 추가함
if predicted_price != 0:
Expand Down Expand Up @@ -319,4 +382,4 @@ def get_stocklist_top50(predicted_results: Dict[str, Dict[str, float]], stocks:
sorted_stock_orders = calculate_stock_amounts(predicted_results, 0, stocks)
top_50_stocks = sorted_stock_orders[:50]

return top_50_stocks
return top_50_stocks
16 changes: 16 additions & 0 deletions aiServer/stock/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
from django.db import models

# Create your models here.

class NewsData(models.Model):
title = models.CharField(max_length=255)
date = models.DateField()

class Meta:
unique_together = ('title', 'date')


class StockModelInfo(models.Model):
stock_name = models.CharField(max_length=100, unique=True)
model_file_path = models.CharField(max_length=255)
created_at = models.DateTimeField(auto_now_add=True)

def __str__(self):
return self.stock_name

0 comments on commit 4544b26

Please sign in to comment.