-
Notifications
You must be signed in to change notification settings - Fork 0
/
CSVxplore.py
107 lines (91 loc) · 3.94 KB
/
CSVxplore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import base64
import numpy as np
from pandas_profiling import ProfileReport
from io import BytesIO
# Set page title
st.title('CSV and Excel File Viewer')
# Cache uploaded file
@st.cache_data
def load_data(uploaded_file):
if uploaded_file.name.endswith('.csv'):
return pd.read_csv(uploaded_file)
elif uploaded_file.name.endswith('.xls') or uploaded_file.name.endswith('.xlsx'):
return pd.read_excel(uploaded_file)
else:
raise ValueError("Invalid file format. Please upload a CSV or Excel file.")
# Generate download link
def generate_download_link(df, file_format='csv'):
if file_format == 'csv':
csv_file = df.to_csv(index=False)
b64 = base64.b64encode(csv_file.encode()).decode()
href = f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download CSV File</a>'
elif file_format == 'excel':
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
writer.save()
processed_data = output.getvalue()
b64 = base64.b64encode(processed_data).decode()
href = f'<a href="data:file/xlsx;base64,{b64}" download="data.xlsx">Download Excel File</a>'
else:
raise ValueError("Unsupported file format for download.")
return href
# Upload file
uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xls', 'xlsx'])
if uploaded_file is not None:
try:
# Load data
df = load_data(uploaded_file)
# Show dataframe
st.write('## Data Preview')
st.dataframe(df.head(10))
# Show descriptive statistics
st.write('## Descriptive Statistics')
st.write(df.describe())
# Plot data
st.write('## Data Visualization')
columns = st.multiselect('Select columns for plotting', df.columns.tolist())
plot_type = st.selectbox('Select plot type', ['Histogram', 'Boxplot', 'Line', 'Scatter'])
if columns:
for column in columns:
plt.figure(figsize=(10, 6))
plt.title(f'{column} - {plot_type}')
if plot_type == 'Histogram':
plt.hist(df[column], bins=30, edgecolor='black')
elif plot_type == 'Boxplot':
plt.boxplot(df[column].dropna(), vert=False)
elif plot_type == 'Line':
plt.plot(df[column])
elif plot_type == 'Scatter' and len(columns) > 1:
st.warning("Scatter plot requires two columns.")
break
st.pyplot(plt)
# Download data options
st.write('## Download Data')
download_format = st.selectbox('Select download format', ['CSV', 'Excel'])
if st.button('Generate Download Link'):
href = generate_download_link(df, file_format=download_format.lower())
st.markdown(href, unsafe_allow_html=True)
# Additional functionality
with st.expander('Additional Data Insights'):
st.write('### Data Summary')
buffer = BytesIO()
df.info(buf=buffer)
s = buffer.getvalue().decode('utf-8')
st.text(s)
st.write('### Correlation Matrix')
st.write(df.corr())
st.write('### Missing Values')
st.write(df.isnull().sum())
# Pandas Profiling
with st.expander('Pandas Profiling Report'):
profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
st_profile_report = profile.to_html()
st.components.v1.html(st_profile_report, height=800, scrolling=True)
except Exception as e:
st.error(f"An error occurred: {e}")
else:
st.write("Please upload a file to get started.")