duwing commited on
Commit
6bc398e
·
verified ·
1 Parent(s): abb19c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -47
app.py CHANGED
@@ -9,6 +9,8 @@ from tensorflow.python.client import device_lib
9
  import requests
10
  from bs4 import BeautifulSoup
11
  import time
 
 
12
 
13
  PATH = './checkpoint-18750/'
14
  SEQ_LEN = 128
@@ -58,53 +60,62 @@ def evaluation_predict(sentence):
58
  return predict_answer
59
 
60
  def get_comments(news_url):
61
- # oid, aid 추출
62
-
63
- list = news_url.split("/")
64
- oid = list[-2]
65
- aid = list[-1]
66
- if len(aid) > 10:
67
- aid = aid[:10]
68
 
69
- # API URL 구성
70
- api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json"
71
- params = {
72
- "ticket": "news",
73
- "templateId": "default_society",
74
- "pool": "cbox5",
75
- "lang": "ko",
76
- "country": "KR",
77
- "objectId": f"news{oid},{aid}",
78
- "pageSize": 100,
79
- "indexSize": 10,
80
- "page": 1,
81
- "sort": "FAVORITE" # 'NEW'(최신순), 'FAVORITE'(순공감순)
82
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- headers = {
85
- "User-Agent": "Mozilla/5.0",
86
- "Referer": news_url
87
- }
88
 
89
- # API 호출 및 데이터 처리
90
- response = requests.get(api_url, params=params, headers=headers)
91
- content = response.text.replace("_callback(", "").replace(");", "")
92
- json_data = json.loads(content)
93
-
94
- response = requests.get(news_url)
95
- article_soup = BeautifulSoup(response.text, "html.parser")
96
-
97
- # 제목 추출
98
- title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
99
- if title is None:
100
- title = article_soup.select_one("#content > div.end_ct > div > h2")
101
-
102
- # 본문 추출
103
- article = article_soup.select_one("#dic_area")
104
- if article is None:
105
- article = article_soup.select_one("#articeBody")
106
 
107
- return title.text.strip(), article.text.strip(), processing_data(json_data['result']['commentList'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  def processing_data(comments):
110
  comment_list = []
@@ -134,7 +145,7 @@ def main():
134
  title, content, comments = get_comments(url)
135
 
136
  # 결과 표시
137
- st.subheader("기사 제목")
138
  st.write(title)
139
 
140
  st.subheader("본문 내용")
@@ -147,7 +158,7 @@ def main():
147
 
148
 
149
  # 결과 표시
150
- st.subheader("기사 제목")
151
  st.write(title)
152
 
153
  st.subheader("본문 내용")
@@ -165,7 +176,7 @@ def main():
165
  title, content, comments = get_comments(url)
166
 
167
  # 결과 표시
168
- st.subheader("기사 제목")
169
  st.write(title)
170
 
171
  st.subheader("본문 내용")
@@ -183,7 +194,7 @@ def main():
183
  title, content, comments = get_comments(url)
184
 
185
  # 결과 표시
186
- st.subheader("기사 제목")
187
  st.write(title)
188
 
189
  st.subheader("본문 내용")
 
9
  import requests
10
  from bs4 import BeautifulSoup
11
  import time
12
+ import instaloader
13
+ from instaloader import Post
14
 
15
  PATH = './checkpoint-18750/'
16
  SEQ_LEN = 128
 
60
  return predict_answer
61
 
62
  def get_comments(news_url):
 
 
 
 
 
 
 
63
 
64
+ if ('naver' in news_url):
65
+ # oid, aid 추출
66
+
67
+ list = news_url.split("/")
68
+ oid = list[-2]
69
+ aid = list[-1]
70
+ if len(aid) > 10:
71
+ aid = aid[:10]
72
+
73
+ # API URL 구성
74
+ api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json"
75
+ params = {
76
+ "ticket": "news",
77
+ "templateId": "default_society",
78
+ "pool": "cbox5",
79
+ "lang": "ko",
80
+ "country": "KR",
81
+ "objectId": f"news{oid},{aid}",
82
+ "pageSize": 100,
83
+ "indexSize": 10,
84
+ "page": 1,
85
+ "sort": "FAVORITE" # 'NEW'(최신순), 'FAVORITE'(순공감순)
86
+ }
87
+
88
+ headers = {
89
+ "User-Agent": "Mozilla/5.0",
90
+ "Referer": news_url
91
+ }
92
 
93
+ # API 호출 및 데이터 처리
94
+ response = requests.get(api_url, params=params, headers=headers)
95
+ content = response.text.replace("_callback(", "").replace(");", "")
96
+ json_data = json.loads(content)
97
 
98
+ response = requests.get(news_url)
99
+ article_soup = BeautifulSoup(response.text, "html.parser")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ # 제목 추출
102
+ title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
103
+ if title is None:
104
+ title = article_soup.select_one("#content > div.end_ct > div > h2")
105
+
106
+ # 본문 추출
107
+ article = article_soup.select_one("#dic_area")
108
+ if article is None:
109
+ article = article_soup.select_one("#articeBody")
110
+
111
+ return title.text.strip(), article.text.strip(), processing_data(json_data['result']['commentList'])
112
+ else if ('insta' in news_url):
113
+ list = news_url.split('/')
114
+ pid = list[-2] + list[-1]
115
+ L = instaloader.Instaloader()
116
+ post = Post.from_shortcode(L.context, pid)
117
+
118
+ return '', post.caption, post.comments
119
 
120
  def processing_data(comments):
121
  comment_list = []
 
145
  title, content, comments = get_comments(url)
146
 
147
  # 결과 표시
148
+ st.subheader("제목")
149
  st.write(title)
150
 
151
  st.subheader("본문 내용")
 
158
 
159
 
160
  # 결과 표시
161
+ st.subheader("제목")
162
  st.write(title)
163
 
164
  st.subheader("본문 내용")
 
176
  title, content, comments = get_comments(url)
177
 
178
  # 결과 표시
179
+ st.subheader("제목")
180
  st.write(title)
181
 
182
  st.subheader("본문 내용")
 
194
  title, content, comments = get_comments(url)
195
 
196
  # 결과 표시
197
+ st.subheader("제목")
198
  st.write(title)
199
 
200
  st.subheader("본문 내용")