Spaces:
Build error
Build error
| import os | |
| import pandas as pd | |
| import google.generativeai as genai | |
| ## Configure Genai Key | |
| from dotenv import load_dotenv | |
| ## load all the environemnt variables | |
| load_dotenv() | |
| import matplotlib.pyplot as plt | |
| import streamlit as st | |
| from pytrie import SortedStringTrie | |
| import sys | |
| from io import StringIO | |
| output_stream = StringIO() | |
| sys.stdout = output_stream | |
| ## Configure Genai Key | |
| genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
| # build auto complete model | |
| class TextSuggestionTool: | |
| def __init__(self): | |
| self.trie = SortedStringTrie() | |
| def add_word(self, word): | |
| if word not in self.trie: | |
| self.trie[word] = True | |
| def build_from_corpus(self, corpus): | |
| for word in corpus: | |
| sub_word = "" | |
| for letter in word: | |
| sub_word += letter | |
| self.add_word(sub_word) | |
| def auto_words(self, prefix): | |
| suggestions = [] | |
| for key, value in self.trie.items(prefix=prefix): | |
| suggestions.append(key) | |
| return suggestions | |
| ## gernerate response of the user query | |
| def get_gemini_response(question,prompt): | |
| model=genai.GenerativeModel('gemini-pro') | |
| response=model.generate_content([prompt,question]) | |
| return response.text | |
| ## Streamlit App | |
| df= pd.DataFrame() | |
| suggestion_tool = TextSuggestionTool() | |
| string_of_csv = [] | |
| auto_complete_corpus = [] | |
| st.set_page_config(page_title="I can Retrieve Any SQL query") | |
| st.header("Data analysis") | |
| # upload data file | |
| uploaded_file = st.file_uploader("Upload data file", type=["csv"], key="uploaded_file") | |
| if uploaded_file is not None: | |
| df = pd.read_csv(uploaded_file) | |
| df = df.dropna(axis=1, how="all") | |
| df = df.dropna(axis=0, how="all") | |
| df = df.fillna('nan') | |
| df = df.map(lambda x: x.lower() if isinstance(x, str) else x) | |
| object_columns = df.select_dtypes(include='object') | |
| lists_of_columns = [df[column].tolist() for column in object_columns.columns] | |
| string_of_csv = [element for sublist in lists_of_columns for element in sublist] | |
| for element in string_of_csv: | |
| auto_complete_corpus.append(str(element)) | |
| df.columns = df.columns.str.lower() | |
| df.columns = df.columns.str.strip() | |
| # all columns append in list | |
| column_names = df.columns.tolist() | |
| column = ', '.join(column_names) | |
| columns_and_types = dict(zip(df.columns, df.dtypes)) | |
| columns_and_types = ', '.join(columns_and_types) | |
| # buid auto complete data structure | |
| suggestion_tool.build_from_corpus(auto_complete_corpus) | |
| # show the dataframe columns | |
| st.text(column) | |
| #dashboard split into two part | |
| col1, col2 = st.columns([1,1]) | |
| with col1: | |
| st.subheader("User Input for data extraction") | |
| question1=st.text_input("Input: ",key="input_extraction") | |
| # generated suggest word | |
| autocomplete1 = suggestion_tool.auto_words(question1.lower()) | |
| autocomplete1.reverse() | |
| # show suggested word | |
| st.subheader("Suggessions") | |
| if autocomplete1: | |
| for word in autocomplete1[:5]: | |
| st.write(word) | |
| else: | |
| st.write("No suggestions found.") | |
| # submit button 1 | |
| submit1=st.button("Data Extract") | |
| with col2: | |
| st.subheader("User Input for data visualization") | |
| question2=st.text_input("Input: ",key="input_visualization") | |
| # generated suggest word | |
| autocomplete2 = suggestion_tool.auto_words(question2.lower()) | |
| # show suggested word | |
| st.subheader("Suggessions") | |
| autocomplete2.reverse() | |
| if autocomplete2: | |
| for word in autocomplete2[:5]: | |
| st.write(word) | |
| else: | |
| st.write("No suggestions found.") | |
| #submit button 2 | |
| submit2=st.button("Data Data Visualization") | |
| if submit1: | |
| prompt1 = f""" | |
| You are an expert in generating pandas code from a pandas dataframe to user question. | |
| now your task is to generate pandas code from given dataframe based on english questions without any variable name. | |
| The given pandas DataFrame name is df and the columns are {column}, | |
| now generate pandas code from the df dataframe based on englsih question | |
| please add print all the time | |
| For example : | |
| For example 1 : | |
| english query : how many rows ? | |
| python code : print(len(df.index)) | |
| For example 2 : | |
| english query : how many columns ? | |
| python code : print(len(df.columns)) | |
| """ | |
| # get response from model | |
| response=get_gemini_response(question1,prompt1) | |
| response = response.replace('`','') | |
| response = response.replace('python','') | |
| st.subheader("Statiscal analysis of data") | |
| # st.text(response) | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| # show the plot on the user side | |
| try: | |
| exec(response, globals(), locals()) | |
| result = output_stream.getvalue() | |
| st.code(result) | |
| except Exception as e: | |
| st.write("Error:", e) | |
| finally: | |
| # Reset sys.stdout to its original value | |
| sys.stdout = sys.__stdout__ | |
| if submit2: | |
| prompt2 = f""" | |
| You are an expert in generating python code for statistical chart like bar, pie, histogram, scatter from a pandas dataframe. | |
| now your task is to generate python code for statistical chart from given dataframe based on english questions. | |
| The given pandas DataFrame name is df and the columns are {column} and columns data types are {columns_and_types} | |
| now generate Python code from the df dataframe based on englsih question | |
| For example : | |
| For example 1 : | |
| english query : generate code for pie chart of gender distribution | |
| python code : gender_counts = df['gender'].value_counts() | |
| # Plotting a Pie Chart | |
| plt.figure(figsize=(8, 8)) | |
| plt.pie(gender_counts, labels=gender_counts.index, autopct='%1.1f%%', startangle=90, colors=['skyblue', 'lightcoral']) | |
| plt.title('Distribution of Gender Among Students') | |
| plt.show() | |
| For example 2 : | |
| english query : generate python code for histogram of age based on name | |
| python code : | |
| # Plotting the histogram | |
| plt.hist(df['Age'], bins=10, edgecolor='black') | |
| plt.xlabel('Age') | |
| plt.ylabel('Frequency') | |
| plt.title('Histogram of Age based on Names') | |
| plt.grid(True) | |
| plt.show() | |
| """ | |
| # get response from model | |
| response=get_gemini_response(question2,prompt2) | |
| response = response.replace('`','') | |
| response = response.replace('python','') | |
| print(response) | |
| st.subheader("Statiscal analysis of data") | |
| # st.text(response) | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| # show the plot on the user side | |
| try: | |
| fig = exec(response) | |
| st.pyplot(fig) | |
| except Exception as e: | |
| print(f"Error executing code: {e}") |