Python 数据可视化 boxplot
Python 数据可视化 boxplot
import pandas as pd import matplotlib.pyplot as plt import numpy as np import seaborn as sns # 读取 TSV 文件 df = pd.read_csv('result.tsv', sep='\t') normal_df = df[df["sample_name"].str.contains("normal")] tumor_df = df[df["sample_name"].str.contains("tumor")] # sns.boxplot(x='down_level', y='loci_median_depth', data=tumor_df) # sns.boxplot(x='down_level', y='loci_average_depth', data=tumor_df) def box_plot_1(df): # 提取数据 sample_data = {} # 遍历 DataFrame,根据样本和级别组织数据 for index, row in df.iterrows(): sample = row['sample_name'] level = row['down_level'] data = row['loci_median_depth'] # 这里假设你想绘制 loci_median_depth 列的箱线图 # 如果样本不在字典中,则将其添加为新的键,并将数据存储为列表 if sample not in sample_data: sample_data[sample] = {} if level not in sample_data[sample]: sample_data[sample][level] = [] sample_data[sample][level].append(data) # 绘制箱线图 plt.figure(figsize=(100, 60)) # 遍历样本和级别,绘制箱线图 position = 1 for sample, levels in sample_data.items(): for level, data in levels.items(): label = f"{sample} - {level}" plt.boxplot(data, positions=[position], labels=[label]) position += 1 plt.ylabel('Depth') plt.title('Box Plot of Depth Data by Sample and Level') plt.grid(True) plt.xticks(rotation=45) # 保存箱线图为文件 plt.savefig('boxplot.png') plt.show() def box_plot_2(df, target_header_list): lvl_list = ["ori", '40', '36', '32', '28', '24', '20'] for level_to_plot in lvl_list: filtered_df = df[df['down_level'] == level_to_plot] plt.figure(figsize=(20, 15)) # plt.boxplot(filtered_df[' loci_median_depth']) plt.boxplot([filtered_df[i] for i in target_header_list], labels=target_header_list) plt.ylabel('Depth') # plt.xlabel(level_to_plot) plt.title(f'Box Plot of Depth Data for {level_to_plot} Level') plt.grid(True) plt.xticks(rotation=45) # 在箱线图上绘制每个数据点 for i, col in enumerate(target_header_list): x = [i + 1] * len(filtered_df[col]) plt.plot(x, filtered_df[col], 'ro', alpha=0.5) # 保存箱线图为文件 plt.savefig(f'boxplot_{level_to_plot}.png') # 显示箱线图 plt.show() def box_plot_3(df, target_header_list): # 选择要包含在 y 轴中的列 y_columns = target_header_list # 将这些列数据整合到一个单独的 DataFrame 中 y_data = df[y_columns] # 使用 pd.melt() 函数将其转换为适合绘制箱线图的格式 melted_df = pd.melt(df, id_vars=['down_level'], value_vars=y_columns, var_name='Depth_Type', value_name='Depth') # 使用 seaborn 绘制箱线图 plt.figure(figsize=(12, 8)) sns.boxplot(x='down_level', y='Depth', hue='Depth_Type', data=melted_df, dodge=True) plt.xlabel('Down Level (G)') plt.ylabel('Depth') plt.title('Box Plot of Depth Data by Down Level') plt.legend(title='Depth Type', loc='upper right') plt.grid(True) plt.savefig(f'boxplot.png') print() lvl_list = ["ori", '40', '36', '32', '28', '24', '20'] target_header_list = ["loci_median_depth", "loci_average_depth", "dedup_loci_median_depth", "dedup_loci_average_depth", "average_depth", "median_depth", "dedup_average_depth", "dedup_median_depth"] # box_plot(tumor_df, target_header_list) # box_plot_3(normal_df, target_header_list) # box_plot_2(normal_df, target_header_list) box_plot_3(normal_df, target_header_list) # box_plot_1(normal_df)
参考:
https://blog.csdn.net/Artoria_QZH/article/details/102790740
R:https://www.modb.pro/db/451162
免责声明:我们致力于保护作者版权,注重分享,被刊用文章因无法核实真实出处,未能及时与作者取得联系,或有版权异议的,请联系管理员,我们会立即处理! 部分文章是来自自研大数据AI进行生成,内容摘自(百度百科,百度知道,头条百科,中国民法典,刑法,牛津词典,新华词典,汉语词典,国家院校,科普平台)等数据,内容仅供学习参考,不准确地方联系删除处理! 图片声明:本站部分配图来自人工智能系统AI生成,觅知网授权图片,PxHere摄影无版权图库和百度,360,搜狗等多加搜索引擎自动关键词搜索配图,如有侵权的图片,请第一时间联系我们,邮箱:ciyunidc@ciyunshuju.com。本站只作为美观性配图使用,无任何非法侵犯第三方意图,一切解释权归图片著作权方,本站不承担任何责任。如有恶意碰瓷者,必当奉陪到底严惩不贷!