In [1]:
import pandas as pd
import numpy as np
In [2]:
import os
print(os.getcwd())
/Users/levilei/classAi
In [4]:
CCLE_all=pd.read_csv("Model.csv")
In [5]:
CCLE_all
Out[5]:
ModelID PatientID CellLineName StrippedCellLineName DepmapModelType OncotreeLineage OncotreePrimaryDisease OncotreeSubtype OncotreeCode PatientSubtypeFeatures ... PublicComments CCLEName HCMIID PediatricModelType ModelAvailableInDbgap ModelSubtypeFeatures WTSIMasterCellID SangerModelID COSMICID ModelIDAlias
0 ACH-000001 PT-gj46wT NIH:OVCAR-3 NIHOVCAR3 HGSOC Ovary/Fallopian Tube Ovarian Epithelial Tumor High-Grade Serous Ovarian Cancer HGSOC NaN ... NaN NIHOVCAR3_OVARY NaN False Approved for public sharing - CCLE NaN 2201.0 SIDM00105 905933.0 NaN
1 ACH-000002 PT-5qa3uk HL-60 HL60 AMLMRC Myeloid Acute Myeloid Leukemia AML with Myelodysplasia-Related Changes AMLMRC TP53(del), CDKN2A and NRAS mutations [PubMed=2... ... NaN HL60_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE NRAS, BCOR and CDKN2A 55.0 SIDM00829 905938.0 NaN
2 ACH-000003 PT-puKIyc CACO2 CACO2 COAD Bowel Colorectal Adenocarcinoma Colon Adenocarcinoma COAD NaN ... NaN CACO2_LARGE_INTESTINE NaN False Approved for public sharing - CCLE NaN NaN SIDM00891 NaN NaN
3 ACH-000004 PT-q4K2cp HEL HEL AMLNOS Myeloid Acute Myeloid Leukemia AML, NOS AMLNOS JAK2 and TP53 mutations ... NaN HEL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE JAK2 and TP53 783.0 SIDM00594 907053.0 NaN
4 ACH-000005 PT-q4K2cp HEL 92.1.7 HEL9217 AML Myeloid Acute Myeloid Leukemia Acute Myeloid Leukemia AML JAK2 and TP53 mutations ... NaN HEL9217_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE NaN True Approved for public sharing - CCLE NaN NaN SIDM00593 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2111 ACH-003473 PT-fG5tCh CCLF_PEDS_0013_T CCLFPEDS0013T ERMS Soft Tissue Rhabdomyosarcoma Embryonal Rhabdomyosarcoma ERMS NaN ... NaN NaN HCM-BROD-0006-C49 True NaN NaN NaN NaN NaN NaN
2112 ACH-003474 PT-WxfjG3 CCLF_HNSC_0001_T CCLFHNSC0001T HNSC Head and Neck Head and Neck Squamous Cell Carcinoma Head and Neck Squamous Cell Carcinoma HNSC NaN ... NaN NaN HCM-BROD-1131-C06 False NaN NaN NaN NaN NaN NaN
2113 ACH-003475 PT-ce6oqw CCLF_HNSC_0003_T CCLFHNSC0003T HNSC Head and Neck Head and Neck Squamous Cell Carcinoma Head and Neck Squamous Cell Carcinoma HNSC NaN ... NaN NaN NaN False NaN NaN NaN NaN NaN NaN
2114 ACH-003476 PT-ce6oqw CCLF_HNSC_0002_T CCLFHNSC0002T ESCC Esophagus/Stomach Esophageal Squamous Cell Carcinoma Esophageal Squamous Cell Carcinoma ESCC NaN ... NaN NaN HCM-BROD-1130-C06 False NaN NaN NaN NaN NaN NaN
2115 ACH-003480 PT-D6v5Dz CCLF_THYR_0001_T CCLFTHYR0001T THAP Thyroid Anaplastic Thyroid Cancer Anaplastic Thyroid Cancer THAP NaN ... NaN NaN NaN False NaN NaN NaN NaN NaN NaN

2116 rows × 49 columns

In [8]:
CCLE_prostate=CCLE_all[CCLE_all['OncotreeLineage']=='Prostate']
In [9]:
CCLE_prostate
Out[9]:
ModelID PatientID CellLineName StrippedCellLineName DepmapModelType OncotreeLineage OncotreePrimaryDisease OncotreeSubtype OncotreeCode PatientSubtypeFeatures ... PublicComments CCLEName HCMIID PediatricModelType ModelAvailableInDbgap ModelSubtypeFeatures WTSIMasterCellID SangerModelID COSMICID ModelIDAlias
88 ACH-000090 PT-j0lUB5 PC-3 PC3 PRAD Prostate Prostate Adenocarcinoma Prostate Adenocarcinoma PRAD NaN ... NaN PC3_PROSTATE NaN False Approved for public sharing - CCLE NaN 911.0 SIDM00088 905934.0 NaN
113 ACH-000115 PT-O15HlH VCaP VCAP PRAD Prostate Prostate Adenocarcinoma Prostate Adenocarcinoma PRAD NaN ... NaN VCAP_PROSTATE NaN False Approved for public sharing - CCLE NaN 2154.0 SIDM01077 1299075.0 NaN
168 ACH-000170 PT-fkfBUv PrEC LH PRECLH ZIMMEPCP Prostate Non-Cancerous Immortalized Epithelial Cells, Prostate NaN NaN ... NaN PRECLH_PROSTATE NaN False Approved for public sharing - CCLE NaN NaN NaN NaN NaN
175 ACH-000177 PT-Noh10Y NCI-H660 NCIH660 PRSCC Prostate Prostate Small Cell Carcinoma Prostate Small Cell Carcinoma PRSCC NaN ... NaN NCIH660_PROSTATE NaN False Approved for public sharing - CCLE NaN 87.0 SIDM01123 1330975.0 NaN
948 ACH-000952 PT-4e8Oiw MDA PCa 2b MDAPCA2B PRAD Prostate Prostate Adenocarcinoma Prostate Adenocarcinoma PRAD NaN ... NaN MDAPCA2B_PROSTATE NaN False Approved for public sharing - CCLE NaN NaN NaN NaN NaN
952 ACH-000956 PT-FFSOW0 22Rv1 22RV1 PRAD Prostate Prostate Adenocarcinoma Prostate Adenocarcinoma PRAD NaN ... NaN 22RV1_PROSTATE NaN False Approved for public sharing - CCLE NaN 1027.0 SIDM00499 924100.0 NaN
973 ACH-000977 PT-tY34fU LNCaP clone FGC LNCAPCLONEFGC PRAD Prostate Prostate Adenocarcinoma Prostate Adenocarcinoma PRAD NaN ... NaN LNCAPCLONEFGC_PROSTATE NaN False Approved for public sharing - CCLE NaN 2080.0 SIDM00683 907788.0 NaN
975 ACH-000979 PT-05dldv DU 145 DU145 PRAD Prostate Prostate Adenocarcinoma Prostate Adenocarcinoma PRAD NaN ... NaN DU145_PROSTATE NaN False Approved for public sharing - CCLE NaN 872.0 SIDM00120 905935.0 NaN
1210 ACH-001383 PT-w5hVvu PWR-1E PWR1E ZIMMPRO Prostate Non-Cancerous Immortalized Prostate Cells NaN NaN ... NaN PWR1E_PROSTATE NaN False Approved for dbGaP - HMB MDS NaN 23.0 SIDM01114 1330993.0 NaN
1244 ACH-001422 PT-EhPRzn WPE1-NA22 WPE1NA22 ZIMMEPCP Prostate Non-Cancerous Immortalized Epithelial Cells, Prostate NaN NaN ... NaN WPE1NA22_PROSTATE NaN False Approved for dbGaP - HMB MDS NaN NaN NaN NaN NaN
1265 ACH-001453 PT-pIyNqk BPH-1 BPH1 ZHYPP Prostate Non-Cancerous Hyperplasia, Prostate NaN NaN ... NaN BPH1_PROSTATE NaN False Approved for dbGaP - HMB MDS NaN 1223.0 SIDM00964 924105.0 NaN
1367 ACH-001627 PT-a2ltr5 P4E6 P4E6 ZIMMEPCP Prostate Non-Cancerous Immortalized Epithelial Cells, Prostate NaN NaN ... NaN P4E6_PROSTATE NaN False Approved for dbGaP - HMB MDS NaN NaN NaN NaN NaN
1380 ACH-001648 PT-F8MgcH Shmac 4 SHMAC4 ZIMMEPCP Prostate Non-Cancerous Immortalized Epithelial Cells, Prostate NaN NaN ... NaN SHMAC4_PROSTATE NaN False Approved for dbGaP - HMB MDS NaN NaN NaN NaN NaN
1381 ACH-001649 PT-iy858F Shmac 5 SHMAC5 ZIMMEPCP Prostate Non-Cancerous Immortalized Epithelial Cells, Prostate NaN NaN ... NaN SHMAC5_PROSTATE NaN False Approved for dbGaP - HMB MDS NaN NaN NaN NaN NaN
2029 ACH-003019 PT-mdeGej LASCPC-01 LASCPC01 PRNE Prostate Prostate Neuroendocrine Carcinoma Prostate Neuroendocrine Carcinoma PRNE NaN ... NaN LASCPC01_Prostate NaN False Approved for dbGaP - HMB MDS NaN NaN NaN NaN NaN

15 rows × 49 columns

In [10]:
CCLE_breast=CCLE_all[CCLE_all['OncotreeLineage']=='Breast']
In [11]:
CCLE_breast
Out[11]:
ModelID PatientID CellLineName StrippedCellLineName DepmapModelType OncotreeLineage OncotreePrimaryDisease OncotreeSubtype OncotreeCode PatientSubtypeFeatures ... PublicComments CCLEName HCMIID PediatricModelType ModelAvailableInDbgap ModelSubtypeFeatures WTSIMasterCellID SangerModelID COSMICID ModelIDAlias
15 ACH-000017 PT-8CE6ah SK-BR-3 SKBR3 BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN SKBR3_BREAST NaN False Approved for public sharing - CCLE HER2+ NaN SIDM00897 NaN NaN
17 ACH-000019 PT-viJKnw MCF7 MCF7 BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN MCF7_BREAST NaN False Approved for public sharing - CCLE ER+ 588.0 SIDM00148 905946.0 NaN
26 ACH-000028 PT-viJKnw KPL-1 KPL1 BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... SNP fingerprinting confirms that this line is ... KPL1_BREAST NaN False Approved for public sharing - CCLE ER+ NaN SIDM00147 NaN NaN
42 ACH-000044 PT-HMBfbj MDA-MB-134-VI MDAMB134VI ILC Breast Invasive Breast Carcinoma Breast Invasive Lobular Carcinoma ILC NaN ... NaN MDAMB134VI_BREAST NaN False Approved for public sharing - CCLE luminal ER+ NaN SIDM00005 NaN NaN
95 ACH-000097 PT-k1TO7o ZR-75-1 ZR751 IDC Breast Invasive Breast Carcinoma Breast Invasive Ductal Carcinoma IDC NaN ... NaN ZR751_BREAST NaN False Approved for public sharing - CCLE luminal ER, PR+ NaN SIDM00314 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2003 ACH-002884 PT-N65Hvq IPM-BO-053R IPMBO053R ILC Breast Invasive Breast Carcinoma Breast Invasive Lobular Carcinoma ILC NaN ... NaN NaN NaN False NaN HER2+ NaN NaN NaN NaN
2004 ACH-002885 PT-pPJgGC IPM-BO-056 IPMBO056 IDC Breast Invasive Breast Carcinoma Breast Invasive Ductal Carcinoma IDC NaN ... NaN NaN NaN False NaN ER, PR+ NaN NaN NaN NaN
2006 ACH-002921 PT-NvrCXN BCK4 BCK4 ILC Breast Invasive Breast Carcinoma Breast Invasive Lobular Carcinoma ILC NaN ... NaN NaN NaN False In Progress NaN NaN NaN NaN NaN
2018 ACH-002950 PT-hxORJf NH93T NH93T BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN NaN NaN False NaN TNBC NaN NaN NaN NaN
2019 ACH-002951 PT-URKT9Y NH84T NH84T BRCA Breast Invasive Breast Carcinoma Invasive Breast Carcinoma BRCA NaN ... NaN NaN NaN False NaN TNBC NaN NaN NaN NaN

96 rows × 49 columns

In [13]:
CCLE_prostate['OncotreeSubtype'].value_counts()
Out[13]:
OncotreeSubtype
Prostate Adenocarcinoma                    6
Immortalized Epithelial Cells, Prostate    5
Prostate Small Cell Carcinoma              1
Immortalized Prostate Cells                1
Hyperplasia, Prostate                      1
Prostate Neuroendocrine Carcinoma          1
Name: count, dtype: int64
In [14]:
drug_screen=pd.read_csv("Drug_sensitivity_replicate-level_dose_(Sanger_GDSC2)_subsetted.csv")
/var/folders/y1/vp0_kzn53l53f5jp4xdgycp00000gn/T/ipykernel_994/1001691071.py:1: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.
  drug_screen=pd.read_csv("Drug_sensitivity_replicate-level_dose_(Sanger_GDSC2)_subsetted.csv")
In [15]:
drug_screen
Out[15]:
depmap_id cell_line_display_name lineage_1 lineage_2 lineage_3 lineage_6 lineage_4 (+)-CAMPTOTHECIN (GDSC2:1003) 0.1μM rep1 PD 0325901 (GDSC2:1060) 0.25μM rep1 ENTINOSTAT (GDSC2:1593) 10.0μM rep1 ... KU-57788 (GDSC2:1038) 5.0μM rep35 KU-57788 (GDSC2:1038) 10.0μM rep35 KU-57788 (GDSC2:1038) 2.5μM rep36 KU-57788 (GDSC2:1038) 5.0μM rep36 KU-57788 (GDSC2:1038) 10.0μM rep36 GSK 269962A (GDSC2:1192) 0.0390625μM rep5 GSK 269962A (GDSC2:1192) 0.0390625μM rep6 ENTINOSTAT (GDSC2:1593) 0.03125μM rep8 ENTINOSTAT (GDSC2:1593) 0.5μM rep8 RO-3306 (GDSC2:1052) 0.0390625μM rep7
0 ACH-000973 639V Bladder/Urinary Tract Urethral Cancer Urethral Urothelial Carcinoma NaN NaN 0.083677 0.919947 0.848779 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 ACH-000757 A427 Lung Non-Small Cell Lung Cancer Lung Adenocarcinoma NaN NaN 0.664791 0.637017 0.861281 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 ACH-002209 ALLPO Lymphoid B-Cell Acute Lymphoblastic Leukemia B-Cell Acute Lymphoblastic Leukemia KMT2A NaN 0.145044 0.748603 0.014993 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 ACH-002210 ARH77 Myeloid Non-Cancerous Immortalized Blood NaN NaN 0.074499 0.626689 0.145854 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 ACH-000248 AU565 Breast Invasive Breast Carcinoma Invasive Breast Carcinoma HER2+ NaN 0.292193 0.986368 0.107727 ... 0.629204 0.379062 0.844173 0.664378 0.300903 NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
784 ACH-000452 TE8 Esophagus/Stomach Esophageal Squamous Cell Carcinoma Esophageal Squamous Cell Carcinoma NaN NaN 0.440621 0.843134 0.335483 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
785 ACH-000030 PC14 Lung Non-Small Cell Lung Cancer Lung Adenocarcinoma NaN NaN 0.508382 0.860545 0.421495 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
786 ACH-000626 U266B1 Lymphoid Mature B-Cell Neoplasms Plasma Cell Myeloma NaN NaN 0.671349 0.595165 0.176174 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
787 ACH-000835 GCT Soft Tissue Undifferentiated Pleomorphic Sarcoma/Malignant... Undifferentiated Pleomorphic Sarcoma/Malignant... NaN NaN 0.406913 0.768039 0.334517 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
788 ACH-000579 UACC257 Skin Melanoma Melanoma NaN NaN 0.999540 0.369727 0.290306 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

789 rows × 30261 columns

In [ ]: