Python 数据可视化 boxplot
Python 数据可视化 boxplot
import pandas as pd import matplotlib.pyplot as plt import numpy as np import seaborn as sns # 读取 TSV 文件 df = pd.read_csv('result.tsv', sep='\t') normal_df = df[df["sample_name"].str.contains("normal")] tumor_df = df[df["sample_name"].str.contains("tumor")] # sns.boxplot(x='down_level', y='loci_median_depth', data=tumor_df) # sns.boxplot(x='down_level', y='loci_average_depth', data=tumor_df) def box_plot_1(df): # 提取数据 sample_data = {} # 遍历 DataFrame,根据样本和级别组织数据 for index, row in df.iterrows(): sample = row['sample_name'] level = row['down_level'] data = row['loci_median_depth'] # 这里假设你想绘制 loci_median_depth 列的箱线图 # 如果样本不在字典中,则将其添加为新的键,并将数据存储为列表 if sample not in sample_data: sample_data[sample] = {} if level not in sample_data[sample]: sample_data[sample][level] = [] sample_data[sample][level].append(data) # 绘制箱线图 plt.figure(figsize=(100, 60)) # 遍历样本和级别,绘制箱线图 position = 1 for sample, levels in sample_data.items(): for level, data in levels.items(): label = f"{sample} - {level}" plt.boxplot(data, positions=[position], labels=[label]) position += 1 plt.ylabel('Depth') plt.title('Box Plot of Depth Data by Sample and Level') plt.grid(True) plt.xticks(rotation=45) # 保存箱线图为文件 plt.savefig('boxplot.png') plt.show() def box_plot_2(df, target_header_list): lvl_list = ["ori", '40', '36', '32', '28', '24', '20'] for level_to_plot in lvl_list: filtered_df = df[df['down_level'] == level_to_plot] plt.figure(figsize=(20, 15)) # plt.boxplot(filtered_df[' loci_median_depth']) plt.boxplot([filtered_df[i] for i in target_header_list], labels=target_header_list) plt.ylabel('Depth') # plt.xlabel(level_to_plot) plt.title(f'Box Plot of Depth Data for {level_to_plot} Level') plt.grid(True) plt.xticks(rotation=45) # 在箱线图上绘制每个数据点 for i, col in enumerate(target_header_list): x = [i + 1] * len(filtered_df[col]) plt.plot(x, filtered_df[col], 'ro', alpha=0.5) # 保存箱线图为文件 plt.savefig(f'boxplot_{level_to_plot}.png') # 显示箱线图 plt.show() def box_plot_3(df, target_header_list): # 选择要包含在 y 轴中的列 y_columns = target_header_list # 将这些列数据整合到一个单独的 DataFrame 中 y_data = df[y_columns] # 使用 pd.melt() 函数将其转换为适合绘制箱线图的格式 melted_df = pd.melt(df, id_vars=['down_level'], value_vars=y_columns, var_name='Depth_Type', value_name='Depth') # 使用 seaborn 绘制箱线图 plt.figure(figsize=(12, 8)) sns.boxplot(x='down_level', y='Depth', hue='Depth_Type', data=melted_df, dodge=True) plt.xlabel('Down Level (G)') plt.ylabel('Depth') plt.title('Box Plot of Depth Data by Down Level') plt.legend(title='Depth Type', loc='upper right') plt.grid(True) plt.savefig(f'boxplot.png') print() lvl_list = ["ori", '40', '36', '32', '28', '24', '20'] target_header_list = ["loci_median_depth", "loci_average_depth", "dedup_loci_median_depth", "dedup_loci_average_depth", "average_depth", "median_depth", "dedup_average_depth", "dedup_median_depth"] # box_plot(tumor_df, target_header_list) # box_plot_3(normal_df, target_header_list) # box_plot_2(normal_df, target_header_list) box_plot_3(normal_df, target_header_list) # box_plot_1(normal_df)
参考:
https://blog.csdn.net/Artoria_QZH/article/details/102790740
R:https://www.modb.pro/db/451162
文章版权声明:除非注明,否则均为主机测评原创文章,转载或复制请以超链接形式并注明出处。