Skip to content

Commit 86fc3cd

Browse files
authored
Finish uniprot.py (#21)
* Update uniprot.py * Update uniprot.py * Update uniprot.py * Update uniprot.py * Update uniprot.py
1 parent 0031801 commit 86fc3cd

File tree

1 file changed

+20
-36
lines changed

1 file changed

+20
-36
lines changed

opendata/uniprot.py

+20-36
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,21 @@
1+
import pandas as pd
2+
df = pd.read_csv('ECOLI_83333_idmapping.dat', sep='\t', header=None)
3+
14
thisdata2identifiersorg = {
2-
'UniProtKB-ID':None,
3-
'Gene_Name':None,
4-
'Gene_OrderedLocusName':None,
5-
'GI':None,
6-
'UniRef100':None,
7-
'UniRef90':None,
8-
'UniRef50':None,
9-
'UniParc':'uniparc',
10-
'EMBL':None,
11-
'EMBL-CDS':None,
12-
'NCBI_TaxID':'taxonomy',
13-
'RefSeq':'refseq',
14-
'RefSeq_NT':None,
15-
'PDB':'pdb',
16-
'BioGRID':'biogrid',
17-
'DIP':'dip',
18-
'STRING':'string',
19-
'ChEMBL':'chembl',
20-
'DrugBank':'drugbank',
21-
'EnsemblGenome':'ensembl',
22-
'EnsemblGenome_TRS':None,
23-
'EnsemblGenome_PRO':None,
24-
'GeneID':None,
25-
'KEGG':None,
26-
'PATRIC':None,
27-
'EchoBASE':'echobase',
28-
'eggNOG':'eggnog',
29-
'HOGENOM':'hogenom',
30-
'OMA':'oma.protein',
31-
'BioCyc':'biocyc',
32-
'CRC64':None,
33-
'Gene_Synonym':None,
34-
'UniPathway':None,
35-
'Gene_ORFName':None,
36-
'MEROPS':'merops',
37-
'TCDB':'tcdb',}
5+
'UniProtKB-ID':None, 'Gene_Name':None,'Gene_OrderedLocusName':None, 'GI':None, 'UniRef100':None,
6+
'UniRef90':None, 'UniRef50':None, 'UniParc':'uniparc', 'EMBL':None, 'EMBL-CDS':None,
7+
'NCBI_TaxID':'taxonomy', 'RefSeq':'refseq', 'RefSeq_NT':None, 'PDB':'pdb', 'BioGRID':'biogrid',
8+
'DIP':'dip', 'STRING':'string', 'ChEMBL':'chembl', 'DrugBank':'drugbank', 'EnsemblGenome':'ensembl',
9+
'EnsemblGenome_TRS':None, 'EnsemblGenome_PRO':None, 'GeneID':None, 'KEGG':None, 'PATRIC':None,
10+
'EchoBASE':'echobase', 'eggNOG':'eggnog', 'HOGENOM':'hogenom', 'OMA':'oma.protein', 'BioCyc':'biocyc',
11+
'CRC64':None, 'Gene_Synonym':None, 'UniPathway':None, 'Gene_ORFName':None, 'MEROPS':'merops', 'TCDB':'tcdb',
12+
'ComplexPortal':'complexportal', 'DNASU':None, 'MINT':None, 'ESTHER':None, 'PeroxiBase':'peroxibase', 'CollecTF':None,
13+
'PHI-base':None, 'Allergome':'allergome', 'DisProt':'disprot', 'REBASE':'rebase', 'SwissLipids':None, 'Reactome':'reactome'
14+
}
15+
16+
df.replace({1: thisdata2identifiersorg}, inplace=True)
17+
df.drop(df[df[1].isnull()].index, inplace=True)
18+
df['identifiersorgprefix1'] = 'uniprot'
19+
df.rename(columns={0:'id1', 1:'identifiersorgprefix2', 2:'id2'}, inplace=True)
20+
df = df[['identifiersorgprefix1', 'id1', 'identifiersorgprefix2', 'id2']]
21+
df.to_csv('uniprot.csv', index=False)

0 commit comments

Comments
 (0)