Spaces:

Vallabhpatil777
/

PDF_Search_Qdrant

Paused

File size: 1,578 Bytes

833b888

import streamlit as st
import tempfile
from backend.pdf_utils import extract_chunks_with_langchain
from backend.indexer import setup_collections, index_documents
from backend.search import search_and_rerank

st.title("Qdrant PDF Search")

if "indexed" not in st.session_state:
    st.session_state.indexed = False

uploaded = st.file_uploader("Upload a PDF", type=["pdf"])

if uploaded:
    with tempfile.NamedTemporaryFile(delete=False) as tmp:
        tmp.write(uploaded.read())
        pdf_path = tmp.name

    st.success("PDF uploaded!")
    chunks = extract_chunks_with_langchain(pdf_path)

    if st.button("Index PDF in Qdrant Cloud"):
        with st.spinner("Indexing..."):
            setup_collections()
            index_documents(chunks)
            st.session_state.indexed = True  # Mark as indexed
        st.success("Indexed successfully!")

# Only show query input *after* indexing is done
if st.session_state.indexed:
    query = st.text_input("Enter your search query:")

    if query:
        results = search_and_rerank(query)

        st.subheader("Raw Dense Results")
        for chunk, score in results["raw"]:
            st.markdown(f"**{score:.3f}** - {chunk[:200]}...")

        st.subheader("Cross-Encoder Reranked")
        for chunk, score in results["cross"]:
            st.markdown(f"**{score:.3f}** - {chunk[:200]}...")

        st.subheader("ColBERT Reranked")
        for chunk, score in results["colbert"]:
            st.markdown(f"**{score:.3f}** - {chunk[:200]}...")
else:
    st.info("Please upload and index a PDF before searching.")