RNAlib-2.4.17
Python Examples

MFE Prediction (flat interface)

import RNA
# The RNA sequence
seq = "GAGUAGUGGAACCAGGCUAUGUUUGUGACUCGCAGACUAACA"
# compute minimum free energy (MFE) and corresponding structure
(ss, mfe) = RNA.fold(seq)
# print output
print "%s\n%s [ %6.2f ]" % (seq, ss, mfe)

MFE Prediction (object oriented interface)

import RNA;
sequence = "CGCAGGGAUACCCGCG"
# create new fold_compound object
fc = RNA.fold_compound(sequence)
# compute minimum free energy (mfe) and corresponding structure
(ss, mfe) = fc.mfe()
# print output
print "%s [ %6.2f ]" % (ss, mfe)

Suboptimal Structure Prediction

import RNA
sequence = "GGGGAAAACCCC"
# Set global switch for unique ML decomposition
RNA.cvar.uniq_ML = 1
subopt_data = { 'counter' : 1, 'sequence' : sequence }
# Print a subopt result as FASTA record
def print_subopt_result(structure, energy, data):
if not structure == None:
print ">subopt %d" % data['counter']
print "%s" % data['sequence']
print "%s [%6.2f]" % (structure, energy)
# increase structure counter
data['counter'] = data['counter'] + 1
# Create a 'fold_compound' for our sequence
a = RNA.fold_compound(sequence)
# Enumerate all structures 500 dacal/mol = 5 kcal/mol arround
# the MFE and print each structure using the function above
a.subopt_cb(500, print_subopt_result, subopt_data);

Boltzmann Sampling (a.k.a. Probabilistic Backtracing)

import RNA
sequence = "UGGGAAUAGUCUCUUCCGAGUCUCGCGGGCGACGGGCGAUCUUCGAAAGUGGAAUCCGUACUUAUACCGCCUGUGCGGACUACUAUCCUGACCACAUAGU"
"""
A simple callback function that stores
a structure sample into a list
"""
def store_structure(s, data):
if s:
data.append(s)
"""
First we prepare a fold_compound object
"""
# create model details
md = RNA.md()
# activate unique multibranch loop decomposition
md.uniq_ML = 1
# create fold compound object
fc = RNA.fold_compound(sequence, md)
# compute MFE
(ss, mfe) = fc.mfe()
# rescale Boltzmann factors according to MFE
fc.exp_params_rescale(mfe)
# compute partition function to fill DP matrices
fc.pf()
"""
Now we are ready to perform Boltzmann sampling
"""
# 1. backtrace a single sub-structure of length 10
print "%s" % fc.pbacktrack5(10)
# 2. backtrace a single sub-structure of length 50
print "%s" % fc.pbacktrack5(50)
# 3. backtrace multiple sub-structures of length 10 at once
for s in fc.pbacktrack5(20, 10):
print "%s" % s
# 4. backtrace multiple sub-structures of length 50 at once
for s in fc.pbacktrack5(100, 50):
print "%s" % s
# 5. backtrace a single structure (full length)
print "%s" % fc.pbacktrack()
# 6. backtrace multiple structures at once
for s in fc.pbacktrack(100):
print "%s" % s
# 7. backtrace multiple structures non-redundantly
for s in fc.pbacktrack(100, RNA.PBACKTRACK_NON_REDUNDANT):
print "%s" % s
# 8. backtrace multiple structures non-redundantly (with resume option)
num_samples = 500
iterations = 15
d = None # pbacktrack memory object
s_list = list()
for i in range(0, iterations):
d, ss = fc.pbacktrack(num_samples, d, RNA.PBACKTRACK_NON_REDUNDANT)
s_list = s_list + list(ss)
for s in s_list:
print "%s" % s
# 9. backtrace multiple sub-structures of length 50 in callback mode
ss = list()
i = fc.pbacktrack5(100, 50, store_structure, ss)
for s in ss:
print "%s" % s
# 10. backtrace multiple full-length structures in callback mode
ss = list()
i = fc.pbacktrack(100, store_structure, ss)
for s in ss:
print "%s" % s
# 11. non-redundantly backtrace multiple full-length structures in callback mode
ss = list()
i = fc.pbacktrack(100, store_structure, ss, RNA.PBACKTRACK_NON_REDUNDANT)
for s in ss:
print "%s" % s
# 12. non-redundantly backtrace multiple full length structures
# in callback mode with resume option
ss = list()
d = None # pbacktrack memory object
for i in range(0, iterations):
d, i = fc.pbacktrack(num_samples, store_structure, ss, d, RNA.PBACKTRACK_NON_REDUNDANT)
for s in ss:
print "%s" % s

RNAfold -p –MEA equivalent

#!/usr/bin/python
#
import RNA
seq = "AUUUCCACUAGAGAAGGUCUAGAGUGUUUGUCGUUUGUCAGAAGUCCCUAUUCCAGGUACGAACACGGUGGAUAUGUUCGACGACAGGAUCGGCGCACUA"
# create fold_compound data structure (required for all subsequently applied algorithms)
fc = RNA.fold_compound(seq)
# compute MFE and MFE structure
(mfe_struct, mfe) = fc.mfe()
# rescale Boltzmann factors for partition function computation
fc.exp_params_rescale(mfe)
# compute partition function
(pp, pf) = fc.pf()
# compute centroid structure
(centroid_struct, dist) = fc.centroid()
# compute free energy of centroid structure
centroid_en = fc.eval_structure(centroid_struct)
# compute MEA structure
(MEA_struct, MEA) = fc.MEA()
# compute free energy of MEA structure
MEA_en = fc.eval_structure(MEA_struct)
# print everything like RNAfold -p --MEA
print("%s\n%s (%6.2f)" % (seq, mfe_struct, mfe))
print("%s [%6.2f]" % (pp, pf))
print("%s {%6.2f d=%.2f}" % (centroid_struct, centroid_en, dist))
print("%s {%6.2f MEA=%.2f}" % (MEA_struct, MEA_en, MEA))
print(" frequency of mfe structure in ensemble %g; ensemble diversity %-6.2f" % (fc.pr_structure(mfe_struct), fc.mean_bp_distance()))

Fun with Soft Constraints

import RNA
seq1 = "CUCGUCGCCUUAAUCCAGUGCGGGCGCUAGACAUCUAGUUAUCGCCGCAA"
# Turn-off dangles globally
RNA.cvar.dangles = 0
# Data structure that will be passed to our MaximumMatching() callback with two components:
# 1. a 'dummy' fold_compound to evaluate loop energies w/o constraints, 2. a fresh set of energy parameters
mm_data = { 'dummy': RNA.fold_compound(seq1), 'params': RNA.param() }
# Nearest Neighbor Parameter reversal functions
revert_NN = {
RNA.DECOMP_PAIR_HP: lambda i, j, k, l, f, p: - f.eval_hp_loop(i, j) - 100,
RNA.DECOMP_PAIR_IL: lambda i, j, k, l, f, p: - f.eval_int_loop(i, j, k, l) - 100,
RNA.DECOMP_PAIR_ML: lambda i, j, k, l, f, p: - p.MLclosing - p.MLintern[0] - (j - i - k + l - 2) * p.MLbase - 100,
RNA.DECOMP_ML_ML_STEM: lambda i, j, k, l, f, p: - p.MLintern[0] - (l - k - 1) * p.MLbase,
RNA.DECOMP_ML_STEM: lambda i, j, k, l, f, p: - p.MLintern[0] - (j - i - k + l) * p.MLbase,
RNA.DECOMP_ML_ML: lambda i, j, k, l, f, p: - (j - i - k + l) * p.MLbase,
RNA.DECOMP_ML_ML_ML: lambda i, j, k, l, f, p: 0,
RNA.DECOMP_ML_UP: lambda i, j, k, l, f, p: - (j - i + 1) * p.MLbase,
RNA.DECOMP_EXT_STEM: lambda i, j, k, l, f, p: - f.E_ext_loop(k, l),
RNA.DECOMP_EXT_EXT: lambda i, j, k, l, f, p: 0,
RNA.DECOMP_EXT_STEM_EXT: lambda i, j, k, l, f, p: - f.E_ext_loop(i, k),
RNA.DECOMP_EXT_EXT_STEM: lambda i, j, k, l, f, p: - f.E_ext_loop(l, j),
RNA.DECOMP_EXT_EXT_STEM1: lambda i, j, k, l, f, p: - f.E_ext_loop(l, j-1),
}
# Maximum Matching callback function (will be called by RNAlib in each decomposition step)
def MaximumMatching(i, j, k, l, d, data):
return revert_NN[d](i, j, k, l, data['dummy'], data['params'])
# Create a 'fold_compound' for our sequence
fc = RNA.fold_compound(seq1)
# Add maximum matching soft-constraints
fc.sc_add_f(MaximumMatching)
fc.sc_add_data(mm_data, None)
# Call MFE algorithm
(s, mm) = fc.mfe()
# print result
print "%s\n%s (MM: %d)\n" % (seq1, s, -mm)