File size: 1,578 Bytes
833b888 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import streamlit as st
import tempfile
from backend.pdf_utils import extract_chunks_with_langchain
from backend.indexer import setup_collections, index_documents
from backend.search import search_and_rerank
st.title("Qdrant PDF Search")
if "indexed" not in st.session_state:
st.session_state.indexed = False
uploaded = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded:
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp.write(uploaded.read())
pdf_path = tmp.name
st.success("PDF uploaded!")
chunks = extract_chunks_with_langchain(pdf_path)
if st.button("Index PDF in Qdrant Cloud"):
with st.spinner("Indexing..."):
setup_collections()
index_documents(chunks)
st.session_state.indexed = True # Mark as indexed
st.success("Indexed successfully!")
# Only show query input *after* indexing is done
if st.session_state.indexed:
query = st.text_input("Enter your search query:")
if query:
results = search_and_rerank(query)
st.subheader("Raw Dense Results")
for chunk, score in results["raw"]:
st.markdown(f"**{score:.3f}** - {chunk[:200]}...")
st.subheader("Cross-Encoder Reranked")
for chunk, score in results["cross"]:
st.markdown(f"**{score:.3f}** - {chunk[:200]}...")
st.subheader("ColBERT Reranked")
for chunk, score in results["colbert"]:
st.markdown(f"**{score:.3f}** - {chunk[:200]}...")
else:
st.info("Please upload and index a PDF before searching.")
|