Skip to content

Commit a4cfe38

Browse files
committedJan 24, 2017
Add cuffnorm and cuffquant
1 parent 7b6e85e commit a4cfe38

File tree

1 file changed

+82
-12
lines changed

1 file changed

+82
-12
lines changed
 

‎pipeline_resdk.py ‎pipeline_resdk/pipeline_resdk.py

+82-12
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ def getBam(self, name):
163163
sample = self._sample_dict[name]['sample']
164164
return sample.get_bam()
165165

166+
def getCuffquant(self, name):
167+
sample = self._sample_dict[name]['sample']
168+
return sample.get_cuffquant()
169+
166170
def download(self, output=''):
167171
print("Waiting for analysis to finish...")
168172
while True:
@@ -263,6 +267,61 @@ def run_bamplot(self, sample_names, input_gff=None, input_region=None, stretch_i
263267

264268
return bamplot
265269

270+
def run_cuffquant(self, sample_name, gff, watch=False):
271+
sample = self._sample_dict[sample_name]['sample']
272+
273+
cuffquant = sample.run_cuffquant(gff)
274+
275+
if watch:
276+
self.to_download.append(cuffquant)
277+
278+
self._collection.add_data(cuffquant)
279+
280+
return cuffquant
281+
282+
def run_cuffnorm(self, sample_names, watch=False):
283+
284+
replicates = []
285+
labels = []
286+
cuffquants = []
287+
288+
dict_replicates = {}
289+
290+
annotation = None
291+
292+
for sample_name in sample_names:
293+
cuffquant = self.getCuffquant(sample_name)
294+
group = self.getGroup(sample_name)
295+
296+
if annotation is None:
297+
annotation = cuffquant.input['gff']
298+
elif annotation != cuffquant.input['gff']:
299+
raise RuntimeError('Cuffquants objects have different annotations, '
300+
'please select cuffquants with same annotation.')
301+
302+
if group not in dict_replicates:
303+
dict_replicates[group] = str(len(dict_replicates))
304+
labels.append(group)
305+
306+
replicates.append(dict_replicates[group])
307+
cuffquants.append(cuffquant)
308+
309+
inputs = {
310+
'cuffquant': cuffquants,
311+
'annotation': annotation,
312+
'replicates': replicates,
313+
'labels': labels,
314+
}
315+
316+
cuffnorm = res.run_cuffnorm(**inputs)
317+
318+
if watch:
319+
self.to_download.append(cuffnorm)
320+
321+
self._collection.add_data(cuffnorm)
322+
323+
return cuffnorm
324+
266325

267326
def main():
268327

@@ -281,36 +340,38 @@ def main():
281340
# all of the datasets that we have from the k27ac group
282341
h3k27ac_list = [name for name in res_collection.names() if res_collection.getGroup(name) == 'H3K27AC']
283342

343+
all_samples = [name for name in res_collection.names()]
344+
284345
#=======================
285346
#schema for how we want to run, record and get back data for any processor
286-
sample_name = h3k27ac_list[0]
287-
print(sample_name)
347+
#sample_name = h3k27ac_list[0]
348+
#print(sample_name)
288349
#test macs on this and make sure the collection sample dict is appropriately updated
289350
#this makes sure you can return the macs data id when you run it
290351
#also want to make sure that the collection gets updated appropriately
291-
macs = res_collection.run_macs(sample_name,useBackground=True,p_value='1e-9', watch=True)
292-
rose = res_collection.run_rose2(sample_name, useBackground=True, tss=0, stitch=None, macs_params={'p_value': '1e-9'}, watch=True)
293-
res_collection.download(output='/grail/genialis/pipeline_resdk')
352+
#macs = res_collection.run_macs(sample_name,useBackground=True,p_value='1e-9', watch=True)
353+
#rose = res_collection.run_rose2(sample_name, useBackground=True, tss=0, stitch=None, macs_params={'p_value': '1e-9'}, watch=True)
354+
#res_collection.download(output='/grail/genialis/pipeline_resdk')
294355

295356
#gff = res.run('upload-gtf', input={'src':'<path/to/gff'}) # upload gff file
296357
#gff = res.data.get() # get gff file onece it is uploaded
297358
#bed = res.run('upload-bed', input={'src':'<path/to/bed>'})
298359
gff_region = 'chr17:+:41468594-41566948'
299-
bamplot = res_collection.run_bamplot(sample_names=h3k27ac_list, input_region=gff_region, watch=True)
300-
res_collection.download(output='/grail/genialis/pipeline_resdk')
360+
#bamplot = res_collection.run_bamplot(sample_names=h3k27ac_list, input_region=gff_region, watch=True)
361+
#res_collection.download(output='/grail/genialis/pipeline_resdk')
301362

302363
# if we would like to use bams that are not part of the res_collection
303364
#bam = res.run('upload-bam', input='src':'<path/to/bam>')
304365
#bam1 = res.run('upload-bam', input='src':'<path/to/bam1>')
305366
#bamplot = res.run_bamplot(bam=[bam.id, bam1.id], genome='',... )
306367

307-
print(macs.id)
308-
print(rose.id)
309-
print(res_collection._sample_dict[sample_name])
368+
#print(macs.id)
369+
#print(rose.id)
370+
#print(res_collection._sample_dict[sample_name])
310371

311372
#now we should be able to retrieve the macs output easily by doing
312373

313-
print(macs.files())
374+
#print(macs.files())
314375

315376
#========================
316377
# #run macs on everybody w/ background at p of 1e-9 and download to a folder
@@ -320,10 +381,19 @@ def main():
320381
# #macs_folder = utils.formatFolder('%s%s_MACS14/' % (macs_parent_folder,sample_name),True)
321382
# #res_collection = run_macs14(res_collection,sample_name,useBackground=True,p_value='1e-9',output=macs_folder)
322383
# res_collection = run_macs14(res_collection,sample_name,useBackground=True,p_value='1e-9')
384+
# gtf = res.run('upload-gtf', input={'src':'/grail/genialis/Homo_sapiens.GRCh38.86.gtf.gz', 'source':'NCBI'}) # upload gff file
385+
# add hg19 annotation file
386+
gtf = res.data.get('hg19gtf-3')
323387

324388
for sample_name in h3k27ac_list:
325389
res_collection.run_rose2(sample_name, useBackground=True, tss=0, stitch=None, macs_params={'p_value': '1e-9'}, watch=True)
326-
res_collection.run_bamplot(sample_names=h3k27ac_list, input_region=gff_region, watch=True, title='h3k27ac_list')
390+
391+
for sample_name in all_samples:
392+
res_collection.run_cuffquant(sample_name, gff=gtf, watch=True)
393+
394+
res_collection.run_bamplot(sample_names=h3k27ac_list, input_region=gff_region, watch=True, title='h3k27ac_list')
395+
res_collection.run_cuffnorm(sample_names=all_samples, watch=True)
396+
327397
res_collection.download(output='/grail/genialis/pipeline_resdk')
328398

329399

0 commit comments

Comments
 (0)