Spaces:

duwing
/

comment_filter

Sleeping

App Files Files Community

duwing commited on Nov 30, 2024

Commit

6bc398e

verified ·

1 Parent(s): abb19c8

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -47

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ from tensorflow.python.client import device_lib
 import requests
 from bs4 import BeautifulSoup
 import time
 PATH = './checkpoint-18750/'
 SEQ_LEN = 128
@@ -58,53 +60,62 @@ def evaluation_predict(sentence):
     return predict_answer
 def get_comments(news_url):
-    # oid, aid 추출
-    list = news_url.split("/")
-    oid = list[-2]
-    aid = list[-1]
-    if len(aid) > 10:
-        aid = aid[:10]
-    # API URL 구성
-    api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json"
-    params = {
-        "ticket": "news",
-        "templateId": "default_society",
-        "pool": "cbox5",
-        "lang": "ko",
-        "country": "KR",
-        "objectId": f"news{oid},{aid}",
-        "pageSize": 100,
-        "indexSize": 10,
-        "page": 1,
-        "sort": "FAVORITE" # 'NEW'(최신순), 'FAVORITE'(순공감순)
-    }
-    headers = {
-        "User-Agent": "Mozilla/5.0",
-        "Referer": news_url
-    }
-    # API 호출 및 데이터 처리
-    response = requests.get(api_url, params=params, headers=headers)
-    content = response.text.replace("_callback(", "").replace(");", "")
-    json_data = json.loads(content)
-    response = requests.get(news_url)
-    article_soup = BeautifulSoup(response.text, "html.parser")
-    # 제목 추출
-    title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
-    if title is None:
-        title = article_soup.select_one("#content > div.end_ct > div > h2")
-    # 본문 추출
-    article = article_soup.select_one("#dic_area")
-    if article is None:
-        article = article_soup.select_one("#articeBody")
-    return title.text.strip(), article.text.strip(), processing_data(json_data['result']['commentList'])
 def processing_data(comments):
     comment_list = []
@@ -134,7 +145,7 @@ def main():
                     title, content, comments = get_comments(url)
                     # 결과 표시
-                    st.subheader("기사 제목")
                     st.write(title)
                     st.subheader("본문 내용")
@@ -147,7 +158,7 @@ def main():
             # 결과 표시
-            st.subheader("기사 제목")
             st.write(title)
             st.subheader("본문 내용")
@@ -165,7 +176,7 @@ def main():
                     title, content, comments = get_comments(url)
                     # 결과 표시
-                    st.subheader("기사 제목")
                     st.write(title)
                     st.subheader("본문 내용")
@@ -183,7 +194,7 @@ def main():
                 title, content, comments = get_comments(url)
                 # 결과 표시
-                st.subheader("기사 제목")
                 st.write(title)
                 st.subheader("본문 내용")

 import requests
 from bs4 import BeautifulSoup
 import time
+import instaloader
+from instaloader import Post
 PATH = './checkpoint-18750/'
 SEQ_LEN = 128
     return predict_answer
 def get_comments(news_url):
+    if ('naver' in news_url):
+        # oid, aid 추출
+        list = news_url.split("/")
+        oid = list[-2]
+        aid = list[-1]
+        if len(aid) > 10:
+            aid = aid[:10]
+        # API URL 구성
+        api_url = "https://apis.naver.com/commentBox/cbox/web_naver_list_jsonp.json"
+        params = {
+            "ticket": "news",
+            "templateId": "default_society",
+            "pool": "cbox5",
+            "lang": "ko",
+            "country": "KR",
+            "objectId": f"news{oid},{aid}",
+            "pageSize": 100,
+            "indexSize": 10,
+            "page": 1,
+            "sort": "FAVORITE" # 'NEW'(최신순), 'FAVORITE'(순공감순)
+        }
+        headers = {
+            "User-Agent": "Mozilla/5.0",
+            "Referer": news_url
+        }
+        # API 호출 및 데이터 처리
+        response = requests.get(api_url, params=params, headers=headers)
+        content = response.text.replace("_callback(", "").replace(");", "")
+        json_data = json.loads(content)
+        response = requests.get(news_url)
+        article_soup = BeautifulSoup(response.text, "html.parser")
+        # 제목 추출
+        title = article_soup.select_one("#ct > div.media_end_head.go_trans > div.media_end_head_title > h2")
+        if title is None:
+            title = article_soup.select_one("#content > div.end_ct > div > h2")
+        # 본문 추출
+        article = article_soup.select_one("#dic_area")
+        if article is None:
+            article = article_soup.select_one("#articeBody")
+        return title.text.strip(), article.text.strip(), processing_data(json_data['result']['commentList'])
+    else if ('insta' in news_url):
+        list = news_url.split('/')
+        pid = list[-2] + list[-1]
+        L = instaloader.Instaloader()
+        post = Post.from_shortcode(L.context, pid)
+        return '', post.caption, post.comments
 def processing_data(comments):
     comment_list = []
                     title, content, comments = get_comments(url)
                     # 결과 표시
+                    st.subheader("제목")
                     st.write(title)
                     st.subheader("본문 내용")
             # 결과 표시
+            st.subheader("제목")
             st.write(title)
             st.subheader("본문 내용")
                     title, content, comments = get_comments(url)
                     # 결과 표시
+                    st.subheader("제목")
                     st.write(title)
                     st.subheader("본문 내용")
                 title, content, comments = get_comments(url)
                 # 결과 표시
+                st.subheader("제목")
                 st.write(title)
                 st.subheader("본문 내용")