In [8]:
import pandas as pd
import numpy as np

First you import pandas as numpy

In [9]:
data=pd.read_excel("gtex_integrin_7_organs.xlsx")

this you read in the excel file that was provided, which contained Gene expression for Integrins (27 integrins) from Gtex data.
the gene expression is measured from the tissues taken from 7 different sites, brain, lung, ovary etc.

In [10]:
data
Out[10]:
Unnamed: 0 primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
0 GTEX-13QIC-0011-R1a-SM-5O9CJ Brain 0.5763 -6.5064 2.2573 0.7832 1.0363 4.6035 2.5731 -2.8262 ... 2.8562 1.3846 5.8430 1.1316 -0.7108 3.5387 -0.0725 -0.4521 0.2029 -2.8262
1 GTEX-1399S-1726-SM-5L3DI Lung 4.9137 -3.6259 4.7307 7.1584 1.7702 4.9556 1.9149 2.6067 ... 4.2412 4.1211 7.7256 4.4900 2.9281 6.1483 5.1867 2.6185 4.7856 -0.0277
2 GTEX-PWCY-1326-SM-48TCU Ovary 2.3953 -5.0116 1.4547 4.2593 -0.7346 4.4149 0.2642 1.5216 ... 3.6816 1.5465 7.2964 -0.9406 2.7742 5.0414 2.0325 0.7579 2.2573 1.2516
3 GTEX-QXCU-0626-SM-2TC69 Lung 4.0541 -2.3147 4.5053 7.5651 4.1788 4.1772 5.3695 1.8444 ... 4.9631 1.9149 7.9947 3.3911 2.8462 6.7683 4.1636 2.7951 5.3284 1.2147
4 GTEX-ZA64-1526-SM-5CVMD Breast 2.0569 -2.4659 3.3993 3.1311 3.0074 4.4977 -1.7809 2.7139 ... 4.7340 0.6332 7.3496 -0.9406 2.5338 6.5696 1.7229 -0.6416 3.1195 1.1050
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1982 GTEX-QMRM-0826-SM-3NB33 Lung 5.3067 -3.8160 4.9065 7.5810 5.8714 4.7345 2.6185 3.1095 ... 5.6080 3.7324 8.2849 4.6201 3.6440 6.7052 5.1094 3.3364 5.8153 1.6604
1983 GTEX-YFCO-1626-SM-4W1Z3 Prostate 2.9581 -4.6082 1.1641 4.6938 1.5902 5.8625 -0.5125 1.7617 ... 3.8798 -1.4699 7.5163 -0.3752 2.9562 5.3035 4.4304 -0.9406 3.6136 0.4233
1984 GTEX-1117F-2826-SM-5GZXL Breast 4.3184 -6.5064 1.0433 4.8440 3.5498 4.6809 1.0293 3.3478 ... 5.3256 -0.0725 7.7516 1.1382 2.1411 7.1132 0.3796 0.0854 3.8650 1.0151
1985 GTEX-Q2AG-2826-SM-2HMJQ Brain 3.4622 -5.5735 1.5013 5.4835 1.7702 4.7517 0.6790 -3.1714 ... 1.1960 4.1740 4.3002 0.5470 -0.9971 3.7982 -0.2498 1.4808 -0.5125 -0.5125
1986 GTEX-XV7Q-0426-SM-4BRVN Lung 2.5585 -1.7809 6.7916 6.5865 2.7051 4.9519 4.3618 3.1892 ... 3.5779 2.8974 7.7685 4.8294 1.9149 5.9989 2.4117 2.4198 4.2080 1.0007

1987 rows × 29 columns

In [ ]:
 
In [11]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [34]:
data= pd.read_excel("gtex_integrin_7_organs.xlsx")
In [35]:
data
Out[35]:
Unnamed: 0 primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
0 GTEX-13QIC-0011-R1a-SM-5O9CJ Brain 0.5763 -6.5064 2.2573 0.7832 1.0363 4.6035 2.5731 -2.8262 ... 2.8562 1.3846 5.8430 1.1316 -0.7108 3.5387 -0.0725 -0.4521 0.2029 -2.8262
1 GTEX-1399S-1726-SM-5L3DI Lung 4.9137 -3.6259 4.7307 7.1584 1.7702 4.9556 1.9149 2.6067 ... 4.2412 4.1211 7.7256 4.4900 2.9281 6.1483 5.1867 2.6185 4.7856 -0.0277
2 GTEX-PWCY-1326-SM-48TCU Ovary 2.3953 -5.0116 1.4547 4.2593 -0.7346 4.4149 0.2642 1.5216 ... 3.6816 1.5465 7.2964 -0.9406 2.7742 5.0414 2.0325 0.7579 2.2573 1.2516
3 GTEX-QXCU-0626-SM-2TC69 Lung 4.0541 -2.3147 4.5053 7.5651 4.1788 4.1772 5.3695 1.8444 ... 4.9631 1.9149 7.9947 3.3911 2.8462 6.7683 4.1636 2.7951 5.3284 1.2147
4 GTEX-ZA64-1526-SM-5CVMD Breast 2.0569 -2.4659 3.3993 3.1311 3.0074 4.4977 -1.7809 2.7139 ... 4.7340 0.6332 7.3496 -0.9406 2.5338 6.5696 1.7229 -0.6416 3.1195 1.1050
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1982 GTEX-QMRM-0826-SM-3NB33 Lung 5.3067 -3.8160 4.9065 7.5810 5.8714 4.7345 2.6185 3.1095 ... 5.6080 3.7324 8.2849 4.6201 3.6440 6.7052 5.1094 3.3364 5.8153 1.6604
1983 GTEX-YFCO-1626-SM-4W1Z3 Prostate 2.9581 -4.6082 1.1641 4.6938 1.5902 5.8625 -0.5125 1.7617 ... 3.8798 -1.4699 7.5163 -0.3752 2.9562 5.3035 4.4304 -0.9406 3.6136 0.4233
1984 GTEX-1117F-2826-SM-5GZXL Breast 4.3184 -6.5064 1.0433 4.8440 3.5498 4.6809 1.0293 3.3478 ... 5.3256 -0.0725 7.7516 1.1382 2.1411 7.1132 0.3796 0.0854 3.8650 1.0151
1985 GTEX-Q2AG-2826-SM-2HMJQ Brain 3.4622 -5.5735 1.5013 5.4835 1.7702 4.7517 0.6790 -3.1714 ... 1.1960 4.1740 4.3002 0.5470 -0.9971 3.7982 -0.2498 1.4808 -0.5125 -0.5125
1986 GTEX-XV7Q-0426-SM-4BRVN Lung 2.5585 -1.7809 6.7916 6.5865 2.7051 4.9519 4.3618 3.1892 ... 3.5779 2.8974 7.7685 4.8294 1.9149 5.9989 2.4117 2.4198 4.2080 1.0007

1987 rows × 29 columns

In [22]:
 
In [36]:
brain_data = data[data['primary_site'] == 'Brain']
In [37]:
brain_data
Out[37]:
Unnamed: 0 primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
0 GTEX-13QIC-0011-R1a-SM-5O9CJ Brain 0.5763 -6.5064 2.2573 0.7832 1.0363 4.6035 2.5731 -2.8262 ... 2.8562 1.3846 5.8430 1.1316 -0.7108 3.5387 -0.0725 -0.4521 0.2029 -2.8262
8 GTEX-N7MS-2526-SM-26GMA Brain 2.2960 -9.9658 0.6608 5.2840 0.4233 4.8510 -0.2671 -0.1031 ... 1.5415 4.6623 3.4687 0.5666 -0.0130 3.0654 0.7916 1.0433 -0.7346 -0.7588
10 GTEX-N7MS-2526-SM-26GMR Brain -0.2498 -9.9658 -0.8863 3.1685 -1.6394 2.8158 -0.4719 -1.1488 ... 1.6045 0.9268 2.8055 -0.5973 0.4657 1.8918 0.3460 0.3907 -1.9942 -1.5522
12 GTEX-NPJ7-0011-R6a-SM-2I3G7 Brain 1.6045 -6.5064 2.3193 3.6335 -2.3147 5.0670 -0.8863 -0.8084 ... 3.2018 1.7575 4.6894 0.4125 -0.6643 3.6916 -0.6193 -2.2447 1.2023 -1.9942
14 GTEX-132Q8-3026-SM-5PNVG Brain 2.8974 -6.5064 1.9601 4.1836 -0.8084 4.5892 -0.5543 0.3460 ... 3.6018 2.7931 4.7274 -0.0574 1.2271 4.3793 0.8488 -0.2159 2.1378 -0.6416
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1977 GTEX-13G51-0011-R6b-SM-5LZX4 Brain -0.3383 -6.5064 1.6234 2.7487 -2.2447 5.2415 -0.8863 -2.9324 ... 2.1988 0.4016 4.5142 -1.1811 -0.8084 3.9983 -1.0862 -3.1714 -0.7588 -1.9379
1978 GTEX-YFC4-0011-R10a-SM-4SOK5 Brain 0.4447 -5.5735 0.3231 3.5237 -1.5105 4.9016 0.9419 -2.7274 ... 2.8178 1.3567 4.4621 -0.2845 1.0222 3.3336 0.1903 -1.0559 0.0300 -0.4719
1980 GTEX-13112-0011-R4b-SM-5DUXL Brain 0.6969 -6.5064 -0.9686 2.3760 -2.2447 4.0739 -0.6193 -4.0350 ... 2.7357 1.5806 4.6882 -0.9971 -0.5756 3.5136 0.9343 -1.0862 0.4340 -2.2447
1981 GTEX-1313W-0011-R1b-SM-5EQ4A Brain 0.1124 -5.0116 2.2482 2.8897 -0.5125 4.6445 0.3115 -3.6259 ... 2.1147 0.9716 5.1202 0.6608 0.4761 3.2343 0.8408 -0.0574 -0.1828 -2.5479
1985 GTEX-Q2AG-2826-SM-2HMJQ Brain 3.4622 -5.5735 1.5013 5.4835 1.7702 4.7517 0.6790 -3.1714 ... 1.1960 4.1740 4.3002 0.5470 -0.9971 3.7982 -0.2498 1.4808 -0.5125 -0.5125

1152 rows × 29 columns

In [38]:
#making a violin plot 
# Step 2: Select all ITG gene columns
itgs = [col for col in brain_df.columns if col.startswith("ITG")]

# Step 3: Melt the DataFrame into long format
melted = brain_data.melt(id_vars=["Unnamed: 0", "primary_site"],
                       value_vars=itgs,
                       var_name='Gene', value_name='Expression')

# Step 4: Create the violin plot
plt.figure(figsize=(16, 6))
sns.violinplot(data=melted, x='Gene', y='Expression', scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Brain Samples')
plt.tight_layout()
plt.show()
/var/folders/y1/vp0_kzn53l53f5jp4xdgycp00000gn/T/ipykernel_20969/1264212371.py:12: FutureWarning: 

The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect.
  sns.violinplot(data=melted, x='Gene', y='Expression', scale='width', inner='box')
No description has been provided for this image
In [46]:
data['primary_site'].unique()
Out[46]:
array(['Brain', 'Lung', 'Ovary', 'Breast', 'Liver', 'Bone Marrow',
       'Prostate'], dtype=object)
In [39]:
data['primary_site'].value_counts()
Out[39]:
primary_site
Brain          1152
Lung            288
Breast          179
Liver           110
Prostate        100
Ovary            88
Bone Marrow      70
Name: count, dtype: int64
In [15]:
data_Lung=df[df['primary_site']=='Lung']
In [53]:
data_Lung
Out[53]:
Unnamed: 0 primary_site ITGA10 ITGAD ITGAM ITGA3 ITGBL1 ITGAE ITGA2 ITGB3 ... ITGA6 ITGA2B ITGB1 ITGAL ITGA9 ITGB5 ITGA8 ITGA4 ITGA1 ITGA11
1 GTEX-1399S-1726-SM-5L3DI Lung 4.9137 -3.6259 4.7307 7.1584 1.7702 4.9556 1.9149 2.6067 ... 4.2412 4.1211 7.7256 4.4900 2.9281 6.1483 5.1867 2.6185 4.7856 -0.0277
3 GTEX-QXCU-0626-SM-2TC69 Lung 4.0541 -2.3147 4.5053 7.5651 4.1788 4.1772 5.3695 1.8444 ... 4.9631 1.9149 7.9947 3.3911 2.8462 6.7683 4.1636 2.7951 5.3284 1.2147
5 GTEX-11EI6-0826-SM-5985V Lung 6.0732 -2.4659 3.9901 7.3945 4.7688 5.1157 4.3356 2.3366 ... 3.7378 4.7247 7.5016 5.1396 2.5036 6.5443 4.6531 3.8136 5.8679 0.7407
6 GTEX-S341-0326-SM-2XCAU Lung 4.2510 -5.0116 3.3076 6.1715 3.1129 5.2954 2.2960 1.1184 ... 4.7104 2.7530 7.5022 4.0730 2.6325 6.0483 5.0562 2.6962 5.1611 0.9343
7 GTEX-WY7C-0426-SM-3NB3C Lung 3.3633 -2.5479 4.8340 6.6864 3.0585 4.8294 2.6464 0.7999 ... 5.1190 1.5013 8.0260 3.6635 3.2435 5.8503 5.2991 2.8076 4.7571 -0.1345
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1962 GTEX-Q2AH-0426-SM-2I3EP Lung 5.9644 -1.3921 5.1061 6.9470 3.8973 4.8630 3.6089 3.9765 ... 5.1115 4.9041 7.9145 4.5559 3.7138 6.5782 4.7512 2.9710 5.0777 1.8444
1970 GTEX-RWS6-0226-SM-2XCA9 Lung 6.0830 -0.5756 4.3889 6.7302 4.6053 5.1065 2.8321 0.9716 ... 5.8176 2.5437 7.7929 4.9012 2.7993 6.7510 5.2204 2.8422 5.0951 -0.3201
1975 GTEX-131XE-0726-SM-5HL9K Lung 3.7971 -1.9379 4.8555 6.4052 3.9561 5.4263 3.2959 4.5199 ... 4.6697 6.5777 7.5114 5.2130 2.3816 6.6225 3.7389 3.7248 5.6809 0.8488
1982 GTEX-QMRM-0826-SM-3NB33 Lung 5.3067 -3.8160 4.9065 7.5810 5.8714 4.7345 2.6185 3.1095 ... 5.6080 3.7324 8.2849 4.6201 3.6440 6.7052 5.1094 3.3364 5.8153 1.6604
1986 GTEX-XV7Q-0426-SM-4BRVN Lung 2.5585 -1.7809 6.7916 6.5865 2.7051 4.9519 4.3618 3.1892 ... 3.5779 2.8974 7.7685 4.8294 1.9149 5.9989 2.4117 2.4198 4.2080 1.0007

288 rows × 29 columns

In [57]:
#making a violin plot 
# Step 2: Select all ITG gene columns
itgs = [col for col in data_Lung.columns if col.startswith("ITG")]

# Step 3: Melt the DataFrame into long format
melted = data_Lung.melt(id_vars=["Unnamed: 0", "primary_site"],
                       value_vars=itgs,
                       var_name='Gene', value_name='Expression')

gene_order_lung = melted.groupby('Gene')['Expression'].mean().sort_values(ascending=False).index
# Step 4: Create the violin plot
plt.figure(figsize=(16, 6))
sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_lung,scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Lung Samples')
plt.tight_layout()
plt.show()
/var/folders/y1/vp0_kzn53l53f5jp4xdgycp00000gn/T/ipykernel_15024/2840735527.py:13: FutureWarning: 

The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect.
  sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_lung,scale='width', inner='box')
No description has been provided for this image
In [1]:
import matplotlib.pyplot as plt
In [16]:
plt.figure(figsize = (16, 6))
sns.violinplot(data = data_Lung)
plt.title("Integrin Genes of the lung")
plt.xlabel("Integrin Genes")
plt.ylabel("Gene Expressions Levels")
plt.show()
No description has been provided for this image
In [31]:
brain_Lung_data = data[data['primary_site'].isin(['Brain', 'Lung'])]     #filter data by organ, display both brain and liver data

#rearrange data
brain_Lung_vertical = brain_Lung_expression_only.melt(id_vars = 'primary_site', var_name = 'integrin_gene', value_name = 'expression_levels')
In [29]:
brain_Lung_expression_only=brain_Lung_data.iloc[:,1:]
In [32]:
brain_Lung_vertical
Out[32]:
primary_site integrin_gene expression_levels
0 Brain ITGA10 0.5763
1 Lung ITGA10 4.9137
2 Lung ITGA10 4.0541
3 Lung ITGA10 6.0732
4 Lung ITGA10 4.2510
... ... ... ...
38875 Brain ITGA11 -2.2447
38876 Brain ITGA11 -2.5479
38877 Lung ITGA11 1.6604
38878 Brain ITGA11 -0.5125
38879 Lung ITGA11 1.0007

38880 rows × 3 columns

In [33]:
plt.figure(figsize=(16, 6))
sns.violinplot(x = 'integrin_gene', y = 'expression_levels', hue = 'primary_site', data = brain_Lung_vertical, split = True, inner = 'quartile')
plt.title("Integrin Genes of the Brain vs. the Lung")
plt.xlabel("Integrin Gene")
plt.ylabel("Gene Expression Levels")
plt.legend(title = 'primary_site')
plt.show()
No description has been provided for this image
In [ ]: