Skip to content

Commit 08618d6

Browse files
committed
Add create_links method
1 parent a4cfe38 commit 08618d6

File tree

1 file changed

+78
-2
lines changed

1 file changed

+78
-2
lines changed

pipeline_resdk/pipeline_resdk.py pipeline_resdk.py

+78-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#================================================================================
3030

3131
import os
32+
import subprocess
3233
import sys
3334
import time
3435
print("using python version %s" % sys.version)
@@ -55,6 +56,15 @@
5556
projectFolder = '/grail/projects/chordoma/'
5657

5758

59+
# Path to data path on server
60+
# torta.bcm.genialis.com -> /grail/genialis/data/
61+
# bcm.genialis.com -> /storage/genialis/bcm.genialis.com/data/
62+
data_folder_path = '/grail/genialis/data/'
63+
# Address used to connect to server via ssh
64+
# It can also contains username (i.e. <username>@<hostname>)
65+
ssh_hostname = 'torta.bcmt.bcm.edu'
66+
67+
5868
#================================================================================
5969
#===========================DEFINING THE CLASSES=================================
6070
#================================================================================
@@ -73,6 +83,9 @@ class ResCollection(object):
7383
#: list of processing objects that need to be downloaded
7484
to_download = []
7585

86+
#: ssh connection to the server
87+
ssh_connection = None
88+
7689
# this __init__section is called from the get go
7790
def __init__(self, collection_slug, genome, relationship_file=None):
7891
print('Loading collection %s' % (collection_slug))
@@ -190,6 +203,57 @@ def download(self, output=''):
190203

191204
time.sleep(1)
192205

206+
def _create_local_link(self, src, dest):
207+
dest_dir = os.path.dirname(dest)
208+
if not os.path.isdir(dest_dir):
209+
os.makedirs(dest_dir)
210+
211+
if os.path.isfile(dest):
212+
os.remove(dest)
213+
214+
os.symlink(src, dest)
215+
216+
def _create_ssh_link(self, src, dest):
217+
if self.ssh_connection is None:
218+
self.ssh_connection = subprocess.Popen(
219+
['ssh', '-tt', ssh_hostname],
220+
stdin=subprocess.PIPE,
221+
stdout=subprocess.PIPE,
222+
universal_newlines=True,
223+
bufsize=0
224+
)
225+
226+
dest_dir = os.path.dirname(dest)
227+
228+
self.ssh_connection.stdin.write('mkdir -p "{}"\n'.format(dest_dir))
229+
self.ssh_connection.stdin.write('ln -sf "{}" "{}"\n'.format(src, dest))
230+
231+
def create_links(self, links=[], path='resdk_results'):
232+
print('Linking results...')
233+
for link in links:
234+
for data in self._collection.data.filter(status='OK', type=link['type']):
235+
for file_name in data.files(field_name=link['field']):
236+
file_path = os.path.join(data_folder_path, str(data.id), file_name)
237+
238+
link_name = '{:05}_{}_{}'.format(
239+
data.id,
240+
data.sample.slug if data.sample else data.slug,
241+
genome
242+
)
243+
if '.' in file_name:
244+
link_extension = file_name.split('.', 1)[1]
245+
link_name = '{}.{}'.format(link_name, link_extension)
246+
247+
link_path = os.path.join(path, link['subfolder'], link_name)
248+
249+
if os.path.isfile(file_path):
250+
self._create_local_link(file_path, link_path)
251+
else:
252+
self._create_ssh_link(file_path, link_path)
253+
254+
if self.ssh_connection is not None:
255+
self.ssh_connection.stdin.close()
256+
193257
def run_macs(self, sample_name, useBackground=True, p_value='1e-9', watch=False):
194258
sample = self._sample_dict[sample_name]['sample']
195259

@@ -394,8 +458,20 @@ def main():
394458
res_collection.run_bamplot(sample_names=h3k27ac_list, input_region=gff_region, watch=True, title='h3k27ac_list')
395459
res_collection.run_cuffnorm(sample_names=all_samples, watch=True)
396460

397-
res_collection.download(output='/grail/genialis/pipeline_resdk')
398-
461+
# Wait for analysis, runed with `watch=True`, to finish and download
462+
# their results.
463+
# res_collection.download(output='/grail/genialis/pipeline_resdk')
464+
465+
# Links can only be created when analysis is finished. So you have
466+
# to wait before running this step or run the script one more time
467+
# when all analysis are finished.
468+
res_collection.create_links([
469+
{'type': 'data:alignment:bam:bowtie2:', 'field': 'bam', 'subfolder': 'bams'},
470+
{'type': 'data:alignment:bam:bowtie2:', 'field': 'bai', 'subfolder': 'bams'},
471+
{'type': 'data:chipseq:macs14:', 'field': 'peaks_bed', 'subfolder': 'macs'},
472+
{'type': 'data:chipseq:macs14:', 'field': 'peaks_xls', 'subfolder': 'macs'},
473+
{'type': 'data:chipseq:rose2:', 'field': 'all_enhancers', 'subfolder': 'roses'},
474+
])
399475

400476
#retrieve an arbitrary macs output
401477
#macs_list = res_collection.get;acs(sample_name)

0 commit comments

Comments
 (0)