import os import pandas as pd import google.generativeai as genai ## Configure Genai Key from dotenv import load_dotenv ## load all the environemnt variables load_dotenv() import matplotlib.pyplot as plt import streamlit as st from pytrie import SortedStringTrie import sys from io import StringIO output_stream = StringIO() sys.stdout = output_stream ## Configure Genai Key genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # build auto complete model class TextSuggestionTool: def __init__(self): self.trie = SortedStringTrie() def add_word(self, word): if word not in self.trie: self.trie[word] = True def build_from_corpus(self, corpus): for word in corpus: sub_word = "" for letter in word: sub_word += letter self.add_word(sub_word) def auto_words(self, prefix): suggestions = [] for key, value in self.trie.items(prefix=prefix): suggestions.append(key) return suggestions ## gernerate response of the user query def get_gemini_response(question,prompt): model=genai.GenerativeModel('gemini-pro') response=model.generate_content([prompt,question]) return response.text ## Streamlit App df= pd.DataFrame() suggestion_tool = TextSuggestionTool() string_of_csv = [] auto_complete_corpus = [] st.set_page_config(page_title="I can Retrieve Any SQL query") st.header("Data analysis") # upload data file uploaded_file = st.file_uploader("Upload data file", type=["csv"], key="uploaded_file") if uploaded_file is not None: df = pd.read_csv(uploaded_file) df = df.dropna(axis=1, how="all") df = df.dropna(axis=0, how="all") df = df.fillna('nan') df = df.map(lambda x: x.lower() if isinstance(x, str) else x) object_columns = df.select_dtypes(include='object') lists_of_columns = [df[column].tolist() for column in object_columns.columns] string_of_csv = [element for sublist in lists_of_columns for element in sublist] for element in string_of_csv: auto_complete_corpus.append(str(element)) df.columns = df.columns.str.lower() df.columns = df.columns.str.strip() # all columns append in list column_names = df.columns.tolist() column = ', '.join(column_names) columns_and_types = dict(zip(df.columns, df.dtypes)) columns_and_types = ', '.join(columns_and_types) # buid auto complete data structure suggestion_tool.build_from_corpus(auto_complete_corpus) # show the dataframe columns st.text(column) #dashboard split into two part col1, col2 = st.columns([1,1]) with col1: st.subheader("User Input for data extraction") question1=st.text_input("Input: ",key="input_extraction") # generated suggest word autocomplete1 = suggestion_tool.auto_words(question1.lower()) autocomplete1.reverse() # show suggested word st.subheader("Suggessions") if autocomplete1: for word in autocomplete1[:5]: st.write(word) else: st.write("No suggestions found.") # submit button 1 submit1=st.button("Data Extract") with col2: st.subheader("User Input for data visualization") question2=st.text_input("Input: ",key="input_visualization") # generated suggest word autocomplete2 = suggestion_tool.auto_words(question2.lower()) # show suggested word st.subheader("Suggessions") autocomplete2.reverse() if autocomplete2: for word in autocomplete2[:5]: st.write(word) else: st.write("No suggestions found.") #submit button 2 submit2=st.button("Data Data Visualization") if submit1: prompt1 = f""" You are an expert in generating pandas code from a pandas dataframe to user question. now your task is to generate pandas code from given dataframe based on english questions without any variable name. The given pandas DataFrame name is df and the columns are {column}, now generate pandas code from the df dataframe based on englsih question please add print all the time For example : For example 1 : english query : how many rows ? python code : print(len(df.index)) For example 2 : english query : how many columns ? python code : print(len(df.columns)) """ # get response from model response=get_gemini_response(question1,prompt1) response = response.replace('`','') response = response.replace('python','') st.subheader("Statiscal analysis of data") # st.text(response) st.set_option('deprecation.showPyplotGlobalUse', False) # show the plot on the user side try: exec(response, globals(), locals()) result = output_stream.getvalue() st.code(result) except Exception as e: st.write("Error:", e) finally: # Reset sys.stdout to its original value sys.stdout = sys.__stdout__ if submit2: prompt2 = f""" You are an expert in generating python code for statistical chart like bar, pie, histogram, scatter from a pandas dataframe. now your task is to generate python code for statistical chart from given dataframe based on english questions. The given pandas DataFrame name is df and the columns are {column} and columns data types are {columns_and_types} now generate Python code from the df dataframe based on englsih question For example : For example 1 : english query : generate code for pie chart of gender distribution python code : gender_counts = df['gender'].value_counts() # Plotting a Pie Chart plt.figure(figsize=(8, 8)) plt.pie(gender_counts, labels=gender_counts.index, autopct='%1.1f%%', startangle=90, colors=['skyblue', 'lightcoral']) plt.title('Distribution of Gender Among Students') plt.show() For example 2 : english query : generate python code for histogram of age based on name python code : # Plotting the histogram plt.hist(df['Age'], bins=10, edgecolor='black') plt.xlabel('Age') plt.ylabel('Frequency') plt.title('Histogram of Age based on Names') plt.grid(True) plt.show() """ # get response from model response=get_gemini_response(question2,prompt2) response = response.replace('`','') response = response.replace('python','') print(response) st.subheader("Statiscal analysis of data") # st.text(response) st.set_option('deprecation.showPyplotGlobalUse', False) # show the plot on the user side try: fig = exec(response) st.pyplot(fig) except Exception as e: print(f"Error executing code: {e}")