duwing commited on
Commit
fec291e
·
verified ·
1 Parent(s): 44f78be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -47
app.py CHANGED
@@ -5,13 +5,7 @@ import pandas as pd
5
  from transformers import *
6
  from tqdm import tqdm
7
  from tensorflow.python.client import device_lib
8
-
9
- from selenium import webdriver
10
- from selenium.webdriver.chrome.service import Service
11
- from selenium.webdriver.chrome.options import Options
12
- from selenium.webdriver.common.by import By
13
- from selenium.webdriver.support.ui import WebDriverWait
14
- from selenium.webdriver.support import expected_conditions as EC
15
  from bs4 import BeautifulSoup
16
  import time
17
 
@@ -68,55 +62,41 @@ def movie_evaluation_predict(sentence):
68
  elif predict_answer == 1:
69
  st.write("(긍정 확률 : %.2f) 긍정적인 영화 평가입니다." % predict_value)
70
 
71
- def setup_driver():
72
- chrome_options = Options()
73
- chrome_options.add_argument("--headless") # 백그라운드 실행
74
- chrome_options.add_argument("--no-sandbox")
75
-
76
- driver = webdriver.Chrome(options=chrome_options)
77
- return driver
78
-
79
  def scrape_content(url):
80
- driver = setup_driver()
81
- try:
82
- driver.get(url)
83
- # 페이지 로딩 대기
84
- time.sleep(3)
85
-
86
- # 본문 추출
87
- soup = BeautifulSoup(driver.page_source, 'html.parser')
88
- content = soup.find('article') # 본문 태그에 맞게 수정
89
-
90
- # 댓글 추출
91
- comments = soup.find_all('span', class_='u_cbox_contents') # 댓글 태그에 맞게 수정
92
-
93
- return {
94
- 'content': content.text if content else "본문을 찾을 수 없습니다.",
95
- 'comments': [comment.text for comment in comments]
96
- }
97
- finally:
98
- driver.quit()
99
 
100
 
101
  def main():
102
  sentiment_model = create_sentiment_bert()
103
 
 
 
 
104
  url = st.text_input("URL을 입력하세요")
105
 
106
- if st.button("크롤링 시작"):
107
  if url:
108
- with st.spinner("크롤링 중..."):
109
- result = scrape_content(url)
110
-
111
- st.subheader("본문")
112
- st.write(result['content'])
113
-
114
- st.subheader("댓글")
115
- for idx, comment in enumerate(result['comments'], 1):
116
- st.write(f"{idx}. {comment}")
117
- else:
118
- st.error("URL을 입력해주세요")
119
-
120
 
121
  '''
122
  test = st.form('test')
 
5
  from transformers import *
6
  from tqdm import tqdm
7
  from tensorflow.python.client import device_lib
8
+ import requests
 
 
 
 
 
 
9
  from bs4 import BeautifulSoup
10
  import time
11
 
 
62
  elif predict_answer == 1:
63
  st.write("(긍정 확률 : %.2f) 긍정적인 영화 평가입니다." % predict_value)
64
 
 
 
 
 
 
 
 
 
65
  def scrape_content(url):
66
+ # 페이지 요청
67
+ response = requests.get(url)
68
+ soup = BeautifulSoup(response.content, 'html.parser')
69
+
70
+ # 본문 추출
71
+ article_body = soup.find_all('div', class_='newsct_article')
72
+ content = ' '.join([p.get_text() for p in article_body])
73
+
74
+ # 댓글 추출 (예시)
75
+ comments = soup.find_all('span', class_='u_cbox_contents')
76
+ comment_list = [comment.get_text() for comment in comments]
77
+
78
+ return content, comment_list
 
 
 
 
 
 
79
 
80
 
81
  def main():
82
  sentiment_model = create_sentiment_bert()
83
 
84
+ st.title("웹 컨텐츠 스크래퍼")
85
+
86
+ # URL 입력 받기
87
  url = st.text_input("URL을 입력하세요")
88
 
89
+ if st.button("스크랩 시작"):
90
  if url:
91
+ content, comments = scrape_content(url)
92
+
93
+ # 결과 표시
94
+ st.subheader("본문 내용")
95
+ st.write(content)
96
+
97
+ st.subheader("댓글")
98
+ for comment in comments:
99
+ st.write(comment)
 
 
 
100
 
101
  '''
102
  test = st.form('test')