Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,8 @@ from tensorflow.python.client import device_lib
|
|
9 |
import requests
|
10 |
from bs4 import BeautifulSoup
|
11 |
import time
|
|
|
|
|
12 |
|
13 |
PATH = './checkpoint-18750/'
|
14 |
SEQ_LEN = 128
|
@@ -58,53 +60,62 @@ def evaluation_predict(sentence):
|
|
58 |
return predict_answer
|
59 |
|
60 |
def get_comments(news_url):
|
61 |
-
# oid, aid 추출
|
62 |
-
|
63 |
-
list = news_url.split("/")
|
64 |
-
oid = list[-2]
|
65 |
-
aid = list[-1]
|
66 |
-
if len(aid) > 10:
|
67 |
-
aid = aid[:10]
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
"
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
"
|
87 |
-
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
content = response.text.replace("_callback(", "").replace(");", "")
|
92 |
-
json_data = json.loads(content)
|
93 |
-
|
94 |
-
response = requests.get(news_url)
|
95 |
-
article_soup = BeautifulSoup(response.text, "html.parser")
|
96 |
-
|
97 |
-
# 제목 추출
|
98 |
-
title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
|
99 |
-
if title is None:
|
100 |
-
title = article_soup.select_one("#content > div.end_ct > div > h2")
|
101 |
-
|
102 |
-
# 본문 추출
|
103 |
-
article = article_soup.select_one("#dic_area")
|
104 |
-
if article is None:
|
105 |
-
article = article_soup.select_one("#articeBody")
|
106 |
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
def processing_data(comments):
|
110 |
comment_list = []
|
@@ -134,7 +145,7 @@ def main():
|
|
134 |
title, content, comments = get_comments(url)
|
135 |
|
136 |
# 결과 표시
|
137 |
-
st.subheader("
|
138 |
st.write(title)
|
139 |
|
140 |
st.subheader("본문 내용")
|
@@ -147,7 +158,7 @@ def main():
|
|
147 |
|
148 |
|
149 |
# 결과 표시
|
150 |
-
st.subheader("
|
151 |
st.write(title)
|
152 |
|
153 |
st.subheader("본문 내용")
|
@@ -165,7 +176,7 @@ def main():
|
|
165 |
title, content, comments = get_comments(url)
|
166 |
|
167 |
# 결과 표시
|
168 |
-
st.subheader("
|
169 |
st.write(title)
|
170 |
|
171 |
st.subheader("본문 내용")
|
@@ -183,7 +194,7 @@ def main():
|
|
183 |
title, content, comments = get_comments(url)
|
184 |
|
185 |
# 결과 표시
|
186 |
-
st.subheader("
|
187 |
st.write(title)
|
188 |
|
189 |
st.subheader("본문 내용")
|
|
|
9 |
import requests
|
10 |
from bs4 import BeautifulSoup
|
11 |
import time
|
12 |
+
import instaloader
|
13 |
+
from instaloader import Post
|
14 |
|
15 |
PATH = './checkpoint-18750/'
|
16 |
SEQ_LEN = 128
|
|
|
60 |
return predict_answer
|
61 |
|
62 |
def get_comments(news_url):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
if ('naver' in news_url):
|
65 |
+
# oid, aid 추출
|
66 |
+
|
67 |
+
list = news_url.split("/")
|
68 |
+
oid = list[-2]
|
69 |
+
aid = list[-1]
|
70 |
+
if len(aid) > 10:
|
71 |
+
aid = aid[:10]
|
72 |
+
|
73 |
+
# API URL 구성
|
74 |
+
api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json"
|
75 |
+
params = {
|
76 |
+
"ticket": "news",
|
77 |
+
"templateId": "default_society",
|
78 |
+
"pool": "cbox5",
|
79 |
+
"lang": "ko",
|
80 |
+
"country": "KR",
|
81 |
+
"objectId": f"news{oid},{aid}",
|
82 |
+
"pageSize": 100,
|
83 |
+
"indexSize": 10,
|
84 |
+
"page": 1,
|
85 |
+
"sort": "FAVORITE" # 'NEW'(최신순), 'FAVORITE'(순공감순)
|
86 |
+
}
|
87 |
+
|
88 |
+
headers = {
|
89 |
+
"User-Agent": "Mozilla/5.0",
|
90 |
+
"Referer": news_url
|
91 |
+
}
|
92 |
|
93 |
+
# API 호출 및 데이터 처리
|
94 |
+
response = requests.get(api_url, params=params, headers=headers)
|
95 |
+
content = response.text.replace("_callback(", "").replace(");", "")
|
96 |
+
json_data = json.loads(content)
|
97 |
|
98 |
+
response = requests.get(news_url)
|
99 |
+
article_soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
# 제목 추출
|
102 |
+
title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
|
103 |
+
if title is None:
|
104 |
+
title = article_soup.select_one("#content > div.end_ct > div > h2")
|
105 |
+
|
106 |
+
# 본문 추출
|
107 |
+
article = article_soup.select_one("#dic_area")
|
108 |
+
if article is None:
|
109 |
+
article = article_soup.select_one("#articeBody")
|
110 |
+
|
111 |
+
return title.text.strip(), article.text.strip(), processing_data(json_data['result']['commentList'])
|
112 |
+
else if ('insta' in news_url):
|
113 |
+
list = news_url.split('/')
|
114 |
+
pid = list[-2] + list[-1]
|
115 |
+
L = instaloader.Instaloader()
|
116 |
+
post = Post.from_shortcode(L.context, pid)
|
117 |
+
|
118 |
+
return '', post.caption, post.comments
|
119 |
|
120 |
def processing_data(comments):
|
121 |
comment_list = []
|
|
|
145 |
title, content, comments = get_comments(url)
|
146 |
|
147 |
# 결과 표시
|
148 |
+
st.subheader("제목")
|
149 |
st.write(title)
|
150 |
|
151 |
st.subheader("본문 내용")
|
|
|
158 |
|
159 |
|
160 |
# 결과 표시
|
161 |
+
st.subheader("제목")
|
162 |
st.write(title)
|
163 |
|
164 |
st.subheader("본문 내용")
|
|
|
176 |
title, content, comments = get_comments(url)
|
177 |
|
178 |
# 결과 표시
|
179 |
+
st.subheader("제목")
|
180 |
st.write(title)
|
181 |
|
182 |
st.subheader("본문 내용")
|
|
|
194 |
title, content, comments = get_comments(url)
|
195 |
|
196 |
# 결과 표시
|
197 |
+
st.subheader("제목")
|
198 |
st.write(title)
|
199 |
|
200 |
st.subheader("본문 내용")
|