import streamlit as st from rag_app import WebRAG import time import os # Page Configuration st.set_page_config( page_title="Web RAG Assistant", page_icon="🌐", layout="wide" ) st.markdown( """ """, unsafe_allow_html=True ) # Initialize session state if 'rag' not in st.session_state: st.session_state.rag = WebRAG() if 'chat_history' not in st.session_state: st.session_state.chat_history = [] if 'url_processed' not in st.session_state: st.session_state.url_processed = False if 'current_url' not in st.session_state: st.session_state.current_url = "" # Function to reset chat history def reset_chat_history(): st.session_state.chat_history = [] st.session_state.current_url = "" # Sidebar on the extreme left with st.sidebar: st.header("⚙️ Settings") url = st.text_input("Enter webpage URL:") scraping_method = st.selectbox( "Select Scraping Method", ["beautifulsoup", "scrapegraph", "crawl4ai"], help=""" - BeautifulSoup: Basic HTML parsing, faster but less sophisticated - ScrapeGraph: AI-powered scraping, better at understanding content but slower - Crawl4ai: Advanced async crawler with good JavaScript support """ ) if st.button("🚀 Process URL"): if url: if url != st.session_state.current_url: reset_chat_history() st.session_state.current_url = url with st.spinner("Processing URL... Please wait."): try: st.session_state.rag.crawl_and_process(url, scraping_method) st.session_state.url_processed = True st.success("✅ URL processed successfully!") st.rerun() except Exception as e: st.error(f"❌ Error processing URL: {str(e)}") else: st.warning("⚠️ Please enter a URL") st.divider() st.markdown("### 📌 How to use") st.markdown( """ 1. Enter a webpage URL 2. Click 'Process URL' to analyze the content 3. Ask questions about the webpage 4. Receive AI-powered answers """ ) # Main Content st.markdown('