-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathprotein_degradation.py
238 lines (192 loc) · 7.9 KB
/
protein_degradation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
"""
===================
Protein Degradation
===================
This process accounts for the degradation of protein monomers.
Specific proteins to be degraded are selected as a Poisson process.
TODO:
- protein complexes
- add protease functionality
"""
import numpy as np
from vivarium.core.composition import simulate_process
from ecoli.library.data_predicates import (
monotonically_increasing,
monotonically_decreasing,
all_nonnegative,
)
from ecoli.library.schema import numpy_schema, counts, bulk_name_to_idx
from ecoli.processes.registries import topology_registry
from ecoli.processes.partition import PartitionedProcess
# Register default topology for this process, associating it with process name
NAME = "ecoli-protein-degradation"
TOPOLOGY = {"bulk": ("bulk",), "timestep": ("timestep",)}
topology_registry.register(NAME, TOPOLOGY)
class ProteinDegradation(PartitionedProcess):
"""Protein Degradation PartitionedProcess"""
name = NAME
topology = TOPOLOGY
defaults = {
"raw_degradation_rate": [],
"water_id": "h2o",
"amino_acid_ids": [],
"amino_acid_counts": [],
"protein_ids": [],
"protein_lengths": [],
"seed": 0,
"time_step": 1,
}
# Constructor
def __init__(self, parameters=None):
super().__init__(parameters)
self.raw_degradation_rate = self.parameters["raw_degradation_rate"]
self.water_id = self.parameters["water_id"]
self.amino_acid_ids = self.parameters["amino_acid_ids"]
self.amino_acid_counts = self.parameters["amino_acid_counts"]
self.metabolite_ids = self.amino_acid_ids + [self.water_id]
self.amino_acid_indexes = np.arange(0, len(self.amino_acid_ids))
self.water_index = self.metabolite_ids.index(self.water_id)
# Build protein IDs for S matrix
self.protein_ids = self.parameters["protein_ids"]
self.protein_lengths = self.parameters["protein_lengths"]
self.seed = self.parameters["seed"]
self.random_state = np.random.RandomState(seed=self.seed)
self.metabolite_idx = None
# Build S matrix
self.degradation_matrix = np.zeros(
(len(self.metabolite_ids), len(self.protein_ids)), np.int64
)
self.degradation_matrix[self.amino_acid_indexes, :] = np.transpose(
self.amino_acid_counts
)
# Assuming N-1 H2O is required per peptide chain length N
self.degradation_matrix[self.water_index, :] = -(
np.sum(self.degradation_matrix[self.amino_acid_indexes, :], axis=0) - 1
)
def ports_schema(self):
return {
"bulk": numpy_schema("bulk"),
"timestep": {"_default": self.parameters["time_step"]},
}
def calculate_request(self, timestep, states):
# In first timestep, convert all strings to indices
if self.metabolite_idx is None:
self.water_idx = bulk_name_to_idx(self.water_id, states["bulk"]["id"])
self.protein_idx = bulk_name_to_idx(self.protein_ids, states["bulk"]["id"])
self.metabolite_idx = bulk_name_to_idx(
self.metabolite_ids, states["bulk"]["id"]
)
protein_data = counts(states["bulk"], self.protein_idx)
# Determine how many proteins to degrade based on the degradation rates
# and counts of each protein
nProteinsToDegrade = np.fmin(
self.random_state.poisson(
self._proteinDegRates(states["timestep"]) * protein_data
),
protein_data,
)
# Determine the number of hydrolysis reactions
# TODO(vivarium): Missing asNumber() and other unit-related things
nReactions = np.dot(self.protein_lengths, nProteinsToDegrade)
# Determine the amount of water required to degrade the selected proteins
# Assuming one N-1 H2O is required per peptide chain length N
requests = {
"bulk": [
(self.protein_idx, nProteinsToDegrade),
(self.water_idx, nReactions - np.sum(nProteinsToDegrade)),
]
}
return requests
def evolve_state(self, timestep, states):
# Degrade selected proteins, release amino acids from those proteins
# back into the cell, and consume H_2O that is required for the
# degradation process
allocated_proteins = counts(states["bulk"], self.protein_idx)
metabolites_delta = np.dot(self.degradation_matrix, allocated_proteins)
update = {
"bulk": [
(self.metabolite_idx, metabolites_delta),
(self.protein_idx, -allocated_proteins),
]
}
return update
def _proteinDegRates(self, timestep):
return self.raw_degradation_rate * timestep
def test_protein_degradation(return_data=False):
test_config = {
"raw_degradation_rate": np.array([0.05, 0.08, 0.13, 0.21]),
"water_id": "H2O",
"amino_acid_ids": ["A", "B", "C"],
"amino_acid_counts": np.array([[5, 7, 13], [1, 3, 5], [4, 4, 4], [13, 11, 5]]),
"protein_ids": ["w", "x", "y", "z"],
"protein_lengths": np.array([25, 9, 12, 29]),
}
protein_degradation = ProteinDegradation(test_config)
state = {
"bulk": np.array(
[
("A", 10),
("B", 20),
("C", 30),
("w", 50),
("x", 60),
("y", 70),
("z", 80),
("H2O", 10000),
],
dtype=[("id", "U40"), ("count", int)],
)
}
settings = {"total_time": 100, "initial_state": state}
data = simulate_process(protein_degradation, settings)
# Assertions =======================================================
bulk_timeseries = np.array(data["bulk"])
protein_data = bulk_timeseries[:, 3:7]
protein_delta = protein_data[1:] - protein_data[:-1]
aa_data = bulk_timeseries[:, :3]
aa_delta = aa_data[1:] - aa_data[:-1]
h2o_data = bulk_timeseries[:, 7]
h2o_delta = h2o_data[1:] - h2o_data[:-1]
# Proteins are monotonically decreasing, never <0:
for i in range(protein_data.shape[1]):
assert monotonically_decreasing(
protein_data[:, i]
), f"Protein {test_config['protein_ids'][i]} is not monotonically decreasing."
assert all_nonnegative(
protein_data
), f"Protein {test_config['protein_ids'][i]} falls below 0."
# Amino acids are monotonically increasing
for i in range(aa_data.shape[1]):
assert monotonically_increasing(
aa_data[:, i]
), f"Amino acid {test_config['amino_acid_ids'][i]} is not monotonically increasing."
# H2O is monotonically decreasing, never < 0
assert monotonically_decreasing(h2o_data), "H2O is not monotonically decreasing."
assert all_nonnegative(h2o_data), "H2O falls below 0."
# Amino acids are released in specified numbers whenever a protein is degraded
aa_delta_expected = map(
lambda i: [test_config["amino_acid_counts"].T @ -protein_delta[i, :]],
range(protein_delta.shape[0]),
)
aa_delta_expected = np.concatenate(list(aa_delta_expected))
np.testing.assert_array_equal(
aa_delta,
aa_delta_expected,
"Mismatch between expected release of amino acids, and counts actually released.",
)
# N-1 molecules H2O is consumed whenever a protein of length N is degraded
h2o_delta_expected = (protein_delta * (test_config["protein_lengths"] - 1)).T
h2o_delta_expected = np.sum(h2o_delta_expected, axis=0)
np.testing.assert_array_equal(
h2o_delta,
h2o_delta_expected,
(
"Mismatch between number of water molecules consumed\n"
"and expected to be consumed in degradation."
),
)
print("Passed all tests.")
if return_data:
return data
if __name__ == "__main__":
test_protein_degradation()