Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,13 +5,7 @@ import pandas as pd
|
|
5 |
from transformers import *
|
6 |
from tqdm import tqdm
|
7 |
from tensorflow.python.client import device_lib
|
8 |
-
|
9 |
-
from selenium import webdriver
|
10 |
-
from selenium.webdriver.chrome.service import Service
|
11 |
-
from selenium.webdriver.chrome.options import Options
|
12 |
-
from selenium.webdriver.common.by import By
|
13 |
-
from selenium.webdriver.support.ui import WebDriverWait
|
14 |
-
from selenium.webdriver.support import expected_conditions as EC
|
15 |
from bs4 import BeautifulSoup
|
16 |
import time
|
17 |
|
@@ -68,55 +62,41 @@ def movie_evaluation_predict(sentence):
|
|
68 |
elif predict_answer == 1:
|
69 |
st.write("(긍정 확률 : %.2f) 긍정적인 영화 평가입니다." % predict_value)
|
70 |
|
71 |
-
def setup_driver():
|
72 |
-
chrome_options = Options()
|
73 |
-
chrome_options.add_argument("--headless") # 백그라운드 실행
|
74 |
-
chrome_options.add_argument("--no-sandbox")
|
75 |
-
|
76 |
-
driver = webdriver.Chrome(options=chrome_options)
|
77 |
-
return driver
|
78 |
-
|
79 |
def scrape_content(url):
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
return {
|
94 |
-
'content': content.text if content else "본문을 찾을 수 없습니다.",
|
95 |
-
'comments': [comment.text for comment in comments]
|
96 |
-
}
|
97 |
-
finally:
|
98 |
-
driver.quit()
|
99 |
|
100 |
|
101 |
def main():
|
102 |
sentiment_model = create_sentiment_bert()
|
103 |
|
|
|
|
|
|
|
104 |
url = st.text_input("URL을 입력하세요")
|
105 |
|
106 |
-
if st.button("
|
107 |
if url:
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
else:
|
118 |
-
st.error("URL을 입력해주세요")
|
119 |
-
|
120 |
|
121 |
'''
|
122 |
test = st.form('test')
|
|
|
5 |
from transformers import *
|
6 |
from tqdm import tqdm
|
7 |
from tensorflow.python.client import device_lib
|
8 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
from bs4 import BeautifulSoup
|
10 |
import time
|
11 |
|
|
|
62 |
elif predict_answer == 1:
|
63 |
st.write("(긍정 확률 : %.2f) 긍정적인 영화 평가입니다." % predict_value)
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
def scrape_content(url):
|
66 |
+
# 웹 페이지 요청
|
67 |
+
response = requests.get(url)
|
68 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
69 |
+
|
70 |
+
# 본문 추출
|
71 |
+
article_body = soup.find_all('div', class_='newsct_article')
|
72 |
+
content = ' '.join([p.get_text() for p in article_body])
|
73 |
+
|
74 |
+
# 댓글 추출 (예시)
|
75 |
+
comments = soup.find_all('span', class_='u_cbox_contents')
|
76 |
+
comment_list = [comment.get_text() for comment in comments]
|
77 |
+
|
78 |
+
return content, comment_list
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
|
81 |
def main():
|
82 |
sentiment_model = create_sentiment_bert()
|
83 |
|
84 |
+
st.title("웹 컨텐츠 스크래퍼")
|
85 |
+
|
86 |
+
# URL 입력 받기
|
87 |
url = st.text_input("URL을 입력하세요")
|
88 |
|
89 |
+
if st.button("스크랩 시작"):
|
90 |
if url:
|
91 |
+
content, comments = scrape_content(url)
|
92 |
+
|
93 |
+
# 결과 표시
|
94 |
+
st.subheader("본문 내용")
|
95 |
+
st.write(content)
|
96 |
+
|
97 |
+
st.subheader("댓글")
|
98 |
+
for comment in comments:
|
99 |
+
st.write(comment)
|
|
|
|
|
|
|
100 |
|
101 |
'''
|
102 |
test = st.form('test')
|