import streamlit as st import tempfile from backend.pdf_utils import extract_chunks_with_langchain from backend.indexer import setup_collections, index_documents from backend.search import search_and_rerank st.title("Qdrant PDF Search") if "indexed" not in st.session_state: st.session_state.indexed = False uploaded = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded: with tempfile.NamedTemporaryFile(delete=False) as tmp: tmp.write(uploaded.read()) pdf_path = tmp.name st.success("PDF uploaded!") chunks = extract_chunks_with_langchain(pdf_path) if st.button("Index PDF in Qdrant Cloud"): with st.spinner("Indexing..."): setup_collections() index_documents(chunks) st.session_state.indexed = True # Mark as indexed st.success("Indexed successfully!") # Only show query input *after* indexing is done if st.session_state.indexed: query = st.text_input("Enter your search query:") if query: results = search_and_rerank(query) st.subheader("Raw Dense Results") for chunk, score in results["raw"]: st.markdown(f"**{score:.3f}** - {chunk[:200]}...") st.subheader("Cross-Encoder Reranked") for chunk, score in results["cross"]: st.markdown(f"**{score:.3f}** - {chunk[:200]}...") st.subheader("ColBERT Reranked") for chunk, score in results["colbert"]: st.markdown(f"**{score:.3f}** - {chunk[:200]}...") else: st.info("Please upload and index a PDF before searching.")