In [8]:
import pandas as pd
import numpy as np
First you import pandas as numpy
In [9]:
data=pd.read_excel("gtex_integrin_7_organs.xlsx")
this you read in the excel file that was provided, which contained Gene expression for Integrins (27 integrins) from Gtex data.
the gene expression is measured from the tissues taken from 7 different sites, brain, lung, ovary etc.
In [10]:
data
Out[10]:
Unnamed: 0 | primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTEX-13QIC-0011-R1a-SM-5O9CJ | Brain | 0.5763 | -6.5064 | 2.2573 | 0.7832 | 1.0363 | 4.6035 | 2.5731 | -2.8262 | ... | 2.8562 | 1.3846 | 5.8430 | 1.1316 | -0.7108 | 3.5387 | -0.0725 | -0.4521 | 0.2029 | -2.8262 |
1 | GTEX-1399S-1726-SM-5L3DI | Lung | 4.9137 | -3.6259 | 4.7307 | 7.1584 | 1.7702 | 4.9556 | 1.9149 | 2.6067 | ... | 4.2412 | 4.1211 | 7.7256 | 4.4900 | 2.9281 | 6.1483 | 5.1867 | 2.6185 | 4.7856 | -0.0277 |
2 | GTEX-PWCY-1326-SM-48TCU | Ovary | 2.3953 | -5.0116 | 1.4547 | 4.2593 | -0.7346 | 4.4149 | 0.2642 | 1.5216 | ... | 3.6816 | 1.5465 | 7.2964 | -0.9406 | 2.7742 | 5.0414 | 2.0325 | 0.7579 | 2.2573 | 1.2516 |
3 | GTEX-QXCU-0626-SM-2TC69 | Lung | 4.0541 | -2.3147 | 4.5053 | 7.5651 | 4.1788 | 4.1772 | 5.3695 | 1.8444 | ... | 4.9631 | 1.9149 | 7.9947 | 3.3911 | 2.8462 | 6.7683 | 4.1636 | 2.7951 | 5.3284 | 1.2147 |
4 | GTEX-ZA64-1526-SM-5CVMD | Breast | 2.0569 | -2.4659 | 3.3993 | 3.1311 | 3.0074 | 4.4977 | -1.7809 | 2.7139 | ... | 4.7340 | 0.6332 | 7.3496 | -0.9406 | 2.5338 | 6.5696 | 1.7229 | -0.6416 | 3.1195 | 1.1050 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1982 | GTEX-QMRM-0826-SM-3NB33 | Lung | 5.3067 | -3.8160 | 4.9065 | 7.5810 | 5.8714 | 4.7345 | 2.6185 | 3.1095 | ... | 5.6080 | 3.7324 | 8.2849 | 4.6201 | 3.6440 | 6.7052 | 5.1094 | 3.3364 | 5.8153 | 1.6604 |
1983 | GTEX-YFCO-1626-SM-4W1Z3 | Prostate | 2.9581 | -4.6082 | 1.1641 | 4.6938 | 1.5902 | 5.8625 | -0.5125 | 1.7617 | ... | 3.8798 | -1.4699 | 7.5163 | -0.3752 | 2.9562 | 5.3035 | 4.4304 | -0.9406 | 3.6136 | 0.4233 |
1984 | GTEX-1117F-2826-SM-5GZXL | Breast | 4.3184 | -6.5064 | 1.0433 | 4.8440 | 3.5498 | 4.6809 | 1.0293 | 3.3478 | ... | 5.3256 | -0.0725 | 7.7516 | 1.1382 | 2.1411 | 7.1132 | 0.3796 | 0.0854 | 3.8650 | 1.0151 |
1985 | GTEX-Q2AG-2826-SM-2HMJQ | Brain | 3.4622 | -5.5735 | 1.5013 | 5.4835 | 1.7702 | 4.7517 | 0.6790 | -3.1714 | ... | 1.1960 | 4.1740 | 4.3002 | 0.5470 | -0.9971 | 3.7982 | -0.2498 | 1.4808 | -0.5125 | -0.5125 |
1986 | GTEX-XV7Q-0426-SM-4BRVN | Lung | 2.5585 | -1.7809 | 6.7916 | 6.5865 | 2.7051 | 4.9519 | 4.3618 | 3.1892 | ... | 3.5779 | 2.8974 | 7.7685 | 4.8294 | 1.9149 | 5.9989 | 2.4117 | 2.4198 | 4.2080 | 1.0007 |
1987 rows × 29 columns
In [ ]:
In [11]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [34]:
data= pd.read_excel("gtex_integrin_7_organs.xlsx")
In [35]:
data
Out[35]:
Unnamed: 0 | primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTEX-13QIC-0011-R1a-SM-5O9CJ | Brain | 0.5763 | -6.5064 | 2.2573 | 0.7832 | 1.0363 | 4.6035 | 2.5731 | -2.8262 | ... | 2.8562 | 1.3846 | 5.8430 | 1.1316 | -0.7108 | 3.5387 | -0.0725 | -0.4521 | 0.2029 | -2.8262 |
1 | GTEX-1399S-1726-SM-5L3DI | Lung | 4.9137 | -3.6259 | 4.7307 | 7.1584 | 1.7702 | 4.9556 | 1.9149 | 2.6067 | ... | 4.2412 | 4.1211 | 7.7256 | 4.4900 | 2.9281 | 6.1483 | 5.1867 | 2.6185 | 4.7856 | -0.0277 |
2 | GTEX-PWCY-1326-SM-48TCU | Ovary | 2.3953 | -5.0116 | 1.4547 | 4.2593 | -0.7346 | 4.4149 | 0.2642 | 1.5216 | ... | 3.6816 | 1.5465 | 7.2964 | -0.9406 | 2.7742 | 5.0414 | 2.0325 | 0.7579 | 2.2573 | 1.2516 |
3 | GTEX-QXCU-0626-SM-2TC69 | Lung | 4.0541 | -2.3147 | 4.5053 | 7.5651 | 4.1788 | 4.1772 | 5.3695 | 1.8444 | ... | 4.9631 | 1.9149 | 7.9947 | 3.3911 | 2.8462 | 6.7683 | 4.1636 | 2.7951 | 5.3284 | 1.2147 |
4 | GTEX-ZA64-1526-SM-5CVMD | Breast | 2.0569 | -2.4659 | 3.3993 | 3.1311 | 3.0074 | 4.4977 | -1.7809 | 2.7139 | ... | 4.7340 | 0.6332 | 7.3496 | -0.9406 | 2.5338 | 6.5696 | 1.7229 | -0.6416 | 3.1195 | 1.1050 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1982 | GTEX-QMRM-0826-SM-3NB33 | Lung | 5.3067 | -3.8160 | 4.9065 | 7.5810 | 5.8714 | 4.7345 | 2.6185 | 3.1095 | ... | 5.6080 | 3.7324 | 8.2849 | 4.6201 | 3.6440 | 6.7052 | 5.1094 | 3.3364 | 5.8153 | 1.6604 |
1983 | GTEX-YFCO-1626-SM-4W1Z3 | Prostate | 2.9581 | -4.6082 | 1.1641 | 4.6938 | 1.5902 | 5.8625 | -0.5125 | 1.7617 | ... | 3.8798 | -1.4699 | 7.5163 | -0.3752 | 2.9562 | 5.3035 | 4.4304 | -0.9406 | 3.6136 | 0.4233 |
1984 | GTEX-1117F-2826-SM-5GZXL | Breast | 4.3184 | -6.5064 | 1.0433 | 4.8440 | 3.5498 | 4.6809 | 1.0293 | 3.3478 | ... | 5.3256 | -0.0725 | 7.7516 | 1.1382 | 2.1411 | 7.1132 | 0.3796 | 0.0854 | 3.8650 | 1.0151 |
1985 | GTEX-Q2AG-2826-SM-2HMJQ | Brain | 3.4622 | -5.5735 | 1.5013 | 5.4835 | 1.7702 | 4.7517 | 0.6790 | -3.1714 | ... | 1.1960 | 4.1740 | 4.3002 | 0.5470 | -0.9971 | 3.7982 | -0.2498 | 1.4808 | -0.5125 | -0.5125 |
1986 | GTEX-XV7Q-0426-SM-4BRVN | Lung | 2.5585 | -1.7809 | 6.7916 | 6.5865 | 2.7051 | 4.9519 | 4.3618 | 3.1892 | ... | 3.5779 | 2.8974 | 7.7685 | 4.8294 | 1.9149 | 5.9989 | 2.4117 | 2.4198 | 4.2080 | 1.0007 |
1987 rows × 29 columns
In [22]:
In [36]:
brain_data = data[data['primary_site'] == 'Brain']
In [37]:
brain_data
Out[37]:
Unnamed: 0 | primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GTEX-13QIC-0011-R1a-SM-5O9CJ | Brain | 0.5763 | -6.5064 | 2.2573 | 0.7832 | 1.0363 | 4.6035 | 2.5731 | -2.8262 | ... | 2.8562 | 1.3846 | 5.8430 | 1.1316 | -0.7108 | 3.5387 | -0.0725 | -0.4521 | 0.2029 | -2.8262 |
8 | GTEX-N7MS-2526-SM-26GMA | Brain | 2.2960 | -9.9658 | 0.6608 | 5.2840 | 0.4233 | 4.8510 | -0.2671 | -0.1031 | ... | 1.5415 | 4.6623 | 3.4687 | 0.5666 | -0.0130 | 3.0654 | 0.7916 | 1.0433 | -0.7346 | -0.7588 |
10 | GTEX-N7MS-2526-SM-26GMR | Brain | -0.2498 | -9.9658 | -0.8863 | 3.1685 | -1.6394 | 2.8158 | -0.4719 | -1.1488 | ... | 1.6045 | 0.9268 | 2.8055 | -0.5973 | 0.4657 | 1.8918 | 0.3460 | 0.3907 | -1.9942 | -1.5522 |
12 | GTEX-NPJ7-0011-R6a-SM-2I3G7 | Brain | 1.6045 | -6.5064 | 2.3193 | 3.6335 | -2.3147 | 5.0670 | -0.8863 | -0.8084 | ... | 3.2018 | 1.7575 | 4.6894 | 0.4125 | -0.6643 | 3.6916 | -0.6193 | -2.2447 | 1.2023 | -1.9942 |
14 | GTEX-132Q8-3026-SM-5PNVG | Brain | 2.8974 | -6.5064 | 1.9601 | 4.1836 | -0.8084 | 4.5892 | -0.5543 | 0.3460 | ... | 3.6018 | 2.7931 | 4.7274 | -0.0574 | 1.2271 | 4.3793 | 0.8488 | -0.2159 | 2.1378 | -0.6416 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1977 | GTEX-13G51-0011-R6b-SM-5LZX4 | Brain | -0.3383 | -6.5064 | 1.6234 | 2.7487 | -2.2447 | 5.2415 | -0.8863 | -2.9324 | ... | 2.1988 | 0.4016 | 4.5142 | -1.1811 | -0.8084 | 3.9983 | -1.0862 | -3.1714 | -0.7588 | -1.9379 |
1978 | GTEX-YFC4-0011-R10a-SM-4SOK5 | Brain | 0.4447 | -5.5735 | 0.3231 | 3.5237 | -1.5105 | 4.9016 | 0.9419 | -2.7274 | ... | 2.8178 | 1.3567 | 4.4621 | -0.2845 | 1.0222 | 3.3336 | 0.1903 | -1.0559 | 0.0300 | -0.4719 |
1980 | GTEX-13112-0011-R4b-SM-5DUXL | Brain | 0.6969 | -6.5064 | -0.9686 | 2.3760 | -2.2447 | 4.0739 | -0.6193 | -4.0350 | ... | 2.7357 | 1.5806 | 4.6882 | -0.9971 | -0.5756 | 3.5136 | 0.9343 | -1.0862 | 0.4340 | -2.2447 |
1981 | GTEX-1313W-0011-R1b-SM-5EQ4A | Brain | 0.1124 | -5.0116 | 2.2482 | 2.8897 | -0.5125 | 4.6445 | 0.3115 | -3.6259 | ... | 2.1147 | 0.9716 | 5.1202 | 0.6608 | 0.4761 | 3.2343 | 0.8408 | -0.0574 | -0.1828 | -2.5479 |
1985 | GTEX-Q2AG-2826-SM-2HMJQ | Brain | 3.4622 | -5.5735 | 1.5013 | 5.4835 | 1.7702 | 4.7517 | 0.6790 | -3.1714 | ... | 1.1960 | 4.1740 | 4.3002 | 0.5470 | -0.9971 | 3.7982 | -0.2498 | 1.4808 | -0.5125 | -0.5125 |
1152 rows × 29 columns
In [38]:
#making a violin plot
# Step 2: Select all ITG gene columns
itgs = [col for col in brain_df.columns if col.startswith("ITG")]
# Step 3: Melt the DataFrame into long format
melted = brain_data.melt(id_vars=["Unnamed: 0", "primary_site"],
value_vars=itgs,
var_name='Gene', value_name='Expression')
# Step 4: Create the violin plot
plt.figure(figsize=(16, 6))
sns.violinplot(data=melted, x='Gene', y='Expression', scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Brain Samples')
plt.tight_layout()
plt.show()
/var/folders/y1/vp0_kzn53l53f5jp4xdgycp00000gn/T/ipykernel_20969/1264212371.py:12: FutureWarning: The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect. sns.violinplot(data=melted, x='Gene', y='Expression', scale='width', inner='box')
In [46]:
data['primary_site'].unique()
Out[46]:
array(['Brain', 'Lung', 'Ovary', 'Breast', 'Liver', 'Bone Marrow', 'Prostate'], dtype=object)
In [39]:
data['primary_site'].value_counts()
Out[39]:
primary_site Brain 1152 Lung 288 Breast 179 Liver 110 Prostate 100 Ovary 88 Bone Marrow 70 Name: count, dtype: int64
In [15]:
data_Lung=df[df['primary_site']=='Lung']
In [53]:
data_Lung
Out[53]:
Unnamed: 0 | primary_site | ITGA10 | ITGAD | ITGAM | ITGA3 | ITGBL1 | ITGAE | ITGA2 | ITGB3 | ... | ITGA6 | ITGA2B | ITGB1 | ITGAL | ITGA9 | ITGB5 | ITGA8 | ITGA4 | ITGA1 | ITGA11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | GTEX-1399S-1726-SM-5L3DI | Lung | 4.9137 | -3.6259 | 4.7307 | 7.1584 | 1.7702 | 4.9556 | 1.9149 | 2.6067 | ... | 4.2412 | 4.1211 | 7.7256 | 4.4900 | 2.9281 | 6.1483 | 5.1867 | 2.6185 | 4.7856 | -0.0277 |
3 | GTEX-QXCU-0626-SM-2TC69 | Lung | 4.0541 | -2.3147 | 4.5053 | 7.5651 | 4.1788 | 4.1772 | 5.3695 | 1.8444 | ... | 4.9631 | 1.9149 | 7.9947 | 3.3911 | 2.8462 | 6.7683 | 4.1636 | 2.7951 | 5.3284 | 1.2147 |
5 | GTEX-11EI6-0826-SM-5985V | Lung | 6.0732 | -2.4659 | 3.9901 | 7.3945 | 4.7688 | 5.1157 | 4.3356 | 2.3366 | ... | 3.7378 | 4.7247 | 7.5016 | 5.1396 | 2.5036 | 6.5443 | 4.6531 | 3.8136 | 5.8679 | 0.7407 |
6 | GTEX-S341-0326-SM-2XCAU | Lung | 4.2510 | -5.0116 | 3.3076 | 6.1715 | 3.1129 | 5.2954 | 2.2960 | 1.1184 | ... | 4.7104 | 2.7530 | 7.5022 | 4.0730 | 2.6325 | 6.0483 | 5.0562 | 2.6962 | 5.1611 | 0.9343 |
7 | GTEX-WY7C-0426-SM-3NB3C | Lung | 3.3633 | -2.5479 | 4.8340 | 6.6864 | 3.0585 | 4.8294 | 2.6464 | 0.7999 | ... | 5.1190 | 1.5013 | 8.0260 | 3.6635 | 3.2435 | 5.8503 | 5.2991 | 2.8076 | 4.7571 | -0.1345 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1962 | GTEX-Q2AH-0426-SM-2I3EP | Lung | 5.9644 | -1.3921 | 5.1061 | 6.9470 | 3.8973 | 4.8630 | 3.6089 | 3.9765 | ... | 5.1115 | 4.9041 | 7.9145 | 4.5559 | 3.7138 | 6.5782 | 4.7512 | 2.9710 | 5.0777 | 1.8444 |
1970 | GTEX-RWS6-0226-SM-2XCA9 | Lung | 6.0830 | -0.5756 | 4.3889 | 6.7302 | 4.6053 | 5.1065 | 2.8321 | 0.9716 | ... | 5.8176 | 2.5437 | 7.7929 | 4.9012 | 2.7993 | 6.7510 | 5.2204 | 2.8422 | 5.0951 | -0.3201 |
1975 | GTEX-131XE-0726-SM-5HL9K | Lung | 3.7971 | -1.9379 | 4.8555 | 6.4052 | 3.9561 | 5.4263 | 3.2959 | 4.5199 | ... | 4.6697 | 6.5777 | 7.5114 | 5.2130 | 2.3816 | 6.6225 | 3.7389 | 3.7248 | 5.6809 | 0.8488 |
1982 | GTEX-QMRM-0826-SM-3NB33 | Lung | 5.3067 | -3.8160 | 4.9065 | 7.5810 | 5.8714 | 4.7345 | 2.6185 | 3.1095 | ... | 5.6080 | 3.7324 | 8.2849 | 4.6201 | 3.6440 | 6.7052 | 5.1094 | 3.3364 | 5.8153 | 1.6604 |
1986 | GTEX-XV7Q-0426-SM-4BRVN | Lung | 2.5585 | -1.7809 | 6.7916 | 6.5865 | 2.7051 | 4.9519 | 4.3618 | 3.1892 | ... | 3.5779 | 2.8974 | 7.7685 | 4.8294 | 1.9149 | 5.9989 | 2.4117 | 2.4198 | 4.2080 | 1.0007 |
288 rows × 29 columns
In [57]:
#making a violin plot
# Step 2: Select all ITG gene columns
itgs = [col for col in data_Lung.columns if col.startswith("ITG")]
# Step 3: Melt the DataFrame into long format
melted = data_Lung.melt(id_vars=["Unnamed: 0", "primary_site"],
value_vars=itgs,
var_name='Gene', value_name='Expression')
gene_order_lung = melted.groupby('Gene')['Expression'].mean().sort_values(ascending=False).index
# Step 4: Create the violin plot
plt.figure(figsize=(16, 6))
sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_lung,scale='width', inner='box')
plt.xticks(rotation=90)
plt.title('Expression of All ITG Genes in Lung Samples')
plt.tight_layout()
plt.show()
/var/folders/y1/vp0_kzn53l53f5jp4xdgycp00000gn/T/ipykernel_15024/2840735527.py:13: FutureWarning: The `scale` parameter has been renamed and will be removed in v0.15.0. Pass `density_norm='width'` for the same effect. sns.violinplot(data=melted, x='Gene', y='Expression', order=gene_order_lung,scale='width', inner='box')
In [1]:
import matplotlib.pyplot as plt
In [16]:
plt.figure(figsize = (16, 6))
sns.violinplot(data = data_Lung)
plt.title("Integrin Genes of the lung")
plt.xlabel("Integrin Genes")
plt.ylabel("Gene Expressions Levels")
plt.show()
In [31]:
brain_Lung_data = data[data['primary_site'].isin(['Brain', 'Lung'])] #filter data by organ, display both brain and liver data
#rearrange data
brain_Lung_vertical = brain_Lung_expression_only.melt(id_vars = 'primary_site', var_name = 'integrin_gene', value_name = 'expression_levels')
In [29]:
brain_Lung_expression_only=brain_Lung_data.iloc[:,1:]
In [32]:
brain_Lung_vertical
Out[32]:
primary_site | integrin_gene | expression_levels | |
---|---|---|---|
0 | Brain | ITGA10 | 0.5763 |
1 | Lung | ITGA10 | 4.9137 |
2 | Lung | ITGA10 | 4.0541 |
3 | Lung | ITGA10 | 6.0732 |
4 | Lung | ITGA10 | 4.2510 |
... | ... | ... | ... |
38875 | Brain | ITGA11 | -2.2447 |
38876 | Brain | ITGA11 | -2.5479 |
38877 | Lung | ITGA11 | 1.6604 |
38878 | Brain | ITGA11 | -0.5125 |
38879 | Lung | ITGA11 | 1.0007 |
38880 rows × 3 columns
In [33]:
plt.figure(figsize=(16, 6))
sns.violinplot(x = 'integrin_gene', y = 'expression_levels', hue = 'primary_site', data = brain_Lung_vertical, split = True, inner = 'quartile')
plt.title("Integrin Genes of the Brain vs. the Lung")
plt.xlabel("Integrin Gene")
plt.ylabel("Gene Expression Levels")
plt.legend(title = 'primary_site')
plt.show()
In [ ]: