29
29
#================================================================================
30
30
31
31
import os
32
+ import subprocess
32
33
import sys
33
34
import time
34
35
print ("using python version %s" % sys .version )
55
56
projectFolder = '/grail/projects/chordoma/'
56
57
57
58
59
+ # Path to data path on server
60
+ # torta.bcm.genialis.com -> /grail/genialis/data/
61
+ # bcm.genialis.com -> /storage/genialis/bcm.genialis.com/data/
62
+ data_folder_path = '/grail/genialis/data/'
63
+ # Address used to connect to server via ssh
64
+ # It can also contains username (i.e. <username>@<hostname>)
65
+ ssh_hostname = 'torta.bcmt.bcm.edu'
66
+
67
+
58
68
#================================================================================
59
69
#===========================DEFINING THE CLASSES=================================
60
70
#================================================================================
@@ -73,6 +83,9 @@ class ResCollection(object):
73
83
#: list of processing objects that need to be downloaded
74
84
to_download = []
75
85
86
+ #: ssh connection to the server
87
+ ssh_connection = None
88
+
76
89
# this __init__section is called from the get go
77
90
def __init__ (self , collection_slug , genome , relationship_file = None ):
78
91
print ('Loading collection %s' % (collection_slug ))
@@ -190,6 +203,57 @@ def download(self, output=''):
190
203
191
204
time .sleep (1 )
192
205
206
+ def _create_local_link (self , src , dest ):
207
+ dest_dir = os .path .dirname (dest )
208
+ if not os .path .isdir (dest_dir ):
209
+ os .makedirs (dest_dir )
210
+
211
+ if os .path .isfile (dest ):
212
+ os .remove (dest )
213
+
214
+ os .symlink (src , dest )
215
+
216
+ def _create_ssh_link (self , src , dest ):
217
+ if self .ssh_connection is None :
218
+ self .ssh_connection = subprocess .Popen (
219
+ ['ssh' , '-tt' , ssh_hostname ],
220
+ stdin = subprocess .PIPE ,
221
+ stdout = subprocess .PIPE ,
222
+ universal_newlines = True ,
223
+ bufsize = 0
224
+ )
225
+
226
+ dest_dir = os .path .dirname (dest )
227
+
228
+ self .ssh_connection .stdin .write ('mkdir -p "{}"\n ' .format (dest_dir ))
229
+ self .ssh_connection .stdin .write ('ln -sf "{}" "{}"\n ' .format (src , dest ))
230
+
231
+ def create_links (self , links = [], path = 'resdk_results' ):
232
+ print ('Linking results...' )
233
+ for link in links :
234
+ for data in self ._collection .data .filter (status = 'OK' , type = link ['type' ]):
235
+ for file_name in data .files (field_name = link ['field' ]):
236
+ file_path = os .path .join (data_folder_path , str (data .id ), file_name )
237
+
238
+ link_name = '{:05}_{}_{}' .format (
239
+ data .id ,
240
+ data .sample .slug if data .sample else data .slug ,
241
+ genome
242
+ )
243
+ if '.' in file_name :
244
+ link_extension = file_name .split ('.' , 1 )[1 ]
245
+ link_name = '{}.{}' .format (link_name , link_extension )
246
+
247
+ link_path = os .path .join (path , link ['subfolder' ], link_name )
248
+
249
+ if os .path .isfile (file_path ):
250
+ self ._create_local_link (file_path , link_path )
251
+ else :
252
+ self ._create_ssh_link (file_path , link_path )
253
+
254
+ if self .ssh_connection is not None :
255
+ self .ssh_connection .stdin .close ()
256
+
193
257
def run_macs (self , sample_name , useBackground = True , p_value = '1e-9' , watch = False ):
194
258
sample = self ._sample_dict [sample_name ]['sample' ]
195
259
@@ -394,8 +458,20 @@ def main():
394
458
res_collection .run_bamplot (sample_names = h3k27ac_list , input_region = gff_region , watch = True , title = 'h3k27ac_list' )
395
459
res_collection .run_cuffnorm (sample_names = all_samples , watch = True )
396
460
397
- res_collection .download (output = '/grail/genialis/pipeline_resdk' )
398
-
461
+ # Wait for analysis, runed with `watch=True`, to finish and download
462
+ # their results.
463
+ # res_collection.download(output='/grail/genialis/pipeline_resdk')
464
+
465
+ # Links can only be created when analysis is finished. So you have
466
+ # to wait before running this step or run the script one more time
467
+ # when all analysis are finished.
468
+ res_collection .create_links ([
469
+ {'type' : 'data:alignment:bam:bowtie2:' , 'field' : 'bam' , 'subfolder' : 'bams' },
470
+ {'type' : 'data:alignment:bam:bowtie2:' , 'field' : 'bai' , 'subfolder' : 'bams' },
471
+ {'type' : 'data:chipseq:macs14:' , 'field' : 'peaks_bed' , 'subfolder' : 'macs' },
472
+ {'type' : 'data:chipseq:macs14:' , 'field' : 'peaks_xls' , 'subfolder' : 'macs' },
473
+ {'type' : 'data:chipseq:rose2:' , 'field' : 'all_enhancers' , 'subfolder' : 'roses' },
474
+ ])
399
475
400
476
#retrieve an arbitrary macs output
401
477
#macs_list = res_collection.get;acs(sample_name)
0 commit comments