DeepPrime _ pegRNA efficiency pr

2023-10-17 本文已影响0人重拾生活信心

DeepPrime

code : https://github.com/hkimlab/DeepPrime
web tools : http://deepcrispr.info/DeepPrime/page/help_src
paper ： https://doi.org/10.1016/j.cell.2023.03.034

Installation

## Create and activate virtual environment
conda create -n dprime python=3.8
conda activate dprime

##  Install Required Python Packages
pip install tensorflow==2.8.0     #Use pip linked to the above python installation
pip install torch==1.10.0+cu113 torchvision==0.11.1+cu113 torchaudio===0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
pip install biopython==1.78 
pip install pandas regex silence-tensorflow 

## Install ViennaRNA
pip install ViennaRNA

##  Download Source Code
wget https://github.com/hkimlab/DeepPrime/archive/main.zip
unzip main.zip
cd DeepPrime-main

python DeepPrime.py -h

Test

python DeepPrime.py -f ./example_input/dp_code_test.csv

Usage

python DeepPrime.py 
[-h] 
[-f INPUT_FILE] 
[-n NAME] 
[-p {PE2,PE2max,PE2max-e,PE4max,PE4max-e,NRCH_PE2,NRCH_PE2max,NRCH_PE4max}] 
[--cell_type {HEK293T,A549,DLD1,HCT116,HeLa,MDA-MB-231,NIH3T3}] 
[--pbs_min PBS_MIN] 
[--pbs_max PBS_MAX]
[--jobs JOBS] 
[--progress]

optional arguments:
  -h, --help            show this help message and exit
  -f INPUT_FILE, --input_file INPUT_FILE
                        Input file containing target sequence and edit type
  -n NAME, --name NAME  Sample name for your input (default='Sample')
  -p {PE2,PE2max,PE2max-e,PE4max,PE4max-e,NRCH_PE2,NRCH_PE2max,NRCH_PE4max,PE-off}, --pe_type {PE2,PE2max,PE2max-e,PE4max,PE4max-e,NRCH_PE2,NRCH_PE2max,NRCH_PE4max,PE-off}
                        PE type parameter (default=PE2max)
  --cell_type {HEK293T,A549,DLD1,HCT116,HeLa,MDA-MB-231,NIH3T3}
                        Cell type parameter. (default=HEK293T)
  --pbs_min PBS_MIN     PBS minimum length parameter (default=1)
  --pbs_max PBS_MAX     PBS maximun length parameter (default=17)
  --jobs JOBS           Number of cores for computing (default=1)
  --progress            Show processing message

Input

ID,RefSeq,Edited Seq,EditType
BRCA1e17_pos34_tat_CAT,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAATATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAACATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,sub1
BRCA1e17_pos34_tat_CCA,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAATATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAACCATTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,sub3
BRCA1e17_pos34

Output

results/ 有每个input ID对应的所有peg的csv
Statistics.csv
Top4_pegRNAs.csv ：各输出四个最优

Scripts

for ct in  {"HEK293T","A549"} 
do 
echo $ct
 python DeepPrime.py \
-f input.csv \
-p PE4max-e \
--pbs_min 8 \
--pbs_max 15 \
--cell_type ${ct} \
-n ${ct} \
--jobs 8 >> log_${ct} 
done

trainning data PE 系统和对应细胞系

Error & Correction 记录

File "/home/vg3/yijia/DeepPrime-main/src/dspcas9.py", line 145, in calculate_DeepSpCas9_score
    sess.run(tf.comapt.v1.global_variables_initializer())
AttributeError: module 'tensorflow' has no attribute 'comapt'

Correct src/dspcas9.py

tensorflow 版本问题
/home/vg3/yijia/DeepPrime-main/src/dspcas9.py
修改后的 src/dspcas9.py ：

import os, sys
import numpy as np
from src.utils import preprocess_seq
from silence_tensorflow import silence_tensorflow
silence_tensorflow()

import tensorflow as tf


class Deep_SpCas9(object):
    def __init__(self, filter_size, filter_num, node_1=80, node_2=60, l_rate=0.005):
        length = 30
        self.inputs      = tf.compat.v1.placeholder(tf.float32, [None, 1, length, 4])
        self.targets     = tf.compat.v1.placeholder(tf.float32, [None, 1])
        self.is_training = tf.compat.v1.placeholder(tf.bool)

        def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
            # setup the filter input shape for tf.compat.nn.conv_2d
            conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
                               num_filters]

            # initialise weights and bias for the filter
            w = tf.compat.v1.Variable(tf.compat.v1.truncated_normal(conv_filt_shape, stddev=0.03), name=name + '_W')
            b = tf.compat.v1.Variable(tf.compat.v1.truncated_normal([num_filters]), name=name + '_b')

            # setup the convolutional layer operation
            out_layer = tf.nn.conv2d(input_data, w, [1, 1, 1, 1], padding='VALID')

            # add the bias
            out_layer += b

            # apply a ReLU non-linear activation
            out_layer = tf.keras.layers.Dropout(rate=0.3)(tf.nn.relu(out_layer))

            # now perform max pooling
            ksize     = [1, pool_shape[0], pool_shape[1], 1]
            strides   = [1, 1, 2, 1]
            out_layer = tf.nn.avg_pool(out_layer, ksize=ksize, strides=strides, padding='SAME')

            return out_layer

        # def end: create_new_conv_layer

        L_pool_0 = create_new_conv_layer(self.inputs, 4, filter_num[0], [1, filter_size[0]], [1, 2], name='conv1')
        L_pool_1 = create_new_conv_layer(self.inputs, 4, filter_num[1], [1, filter_size[1]], [1, 2], name='conv2')
        L_pool_2 = create_new_conv_layer(self.inputs, 4, filter_num[2], [1, filter_size[2]], [1, 2], name='conv3')

        with tf.compat.v1.variable_scope('Fully_Connected_Layer1'):
            layer_node_0 = int((length - filter_size[0]) / 2) + 1
            node_num_0   = layer_node_0 * filter_num[0]
            layer_node_1 = int((length - filter_size[1]) / 2) + 1
            node_num_1   = layer_node_1 * filter_num[1]
            layer_node_2 = int((length - filter_size[2]) / 2) + 1
            node_num_2   = layer_node_2 * filter_num[2]

            L_flatten_0  = tf.reshape(L_pool_0, [-1, node_num_0])
            L_flatten_1  = tf.reshape(L_pool_1, [-1, node_num_1])
            L_flatten_2  = tf.reshape(L_pool_2, [-1, node_num_2])
            L_flatten    = tf.concat([L_flatten_0, L_flatten_1, L_flatten_2], 1, name='concat')

            node_num     = node_num_0 + node_num_1 + node_num_2
            W_fcl1       = tf.compat.v1.get_variable("W_fcl1", shape=[node_num, node_1])
            B_fcl1       = tf.compat.v1.get_variable("B_fcl1", shape=[node_1])
            L_fcl1_pre   = tf.nn.bias_add(tf.matmul(L_flatten, W_fcl1), B_fcl1)
            L_fcl1       = tf.nn.relu(L_fcl1_pre)
            L_fcl1_drop  = tf.keras.layers.Dropout(rate=0.3)(L_fcl1)

        with tf.compat.v1.variable_scope('Fully_Connected_Layer2'):
            W_fcl2       = tf.compat.v1.get_variable("W_fcl2", shape=[node_1, node_2])
            B_fcl2       = tf.compat.v1.get_variable("B_fcl2", shape=[node_2])
            L_fcl2_pre   = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
            L_fcl2       = tf.nn.relu(L_fcl2_pre)
            L_fcl2_drop  = tf.keras.layers.Dropout(rate=0.3)(L_fcl2)

        with tf.compat.v1.variable_scope('Output_Layer'):
            W_out        = tf.compat.v1.get_variable("W_out", shape=[node_2, 1])
            B_out        = tf.compat.v1.get_variable("B_out", shape=[1])
            self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)

        # Define loss function and optimizer
        self.obj_loss    = tf.reduce_mean(tf.square(self.targets - self.outputs))
        self.optimizer   = tf.compat.v1.train.AdamOptimizer(l_rate).minimize(self.obj_loss)

    # def end: def __init__
# class end: Deep_xCas9


def Model_Finaltest(sess, TEST_X, model):
    test_batch = 500
    test_spearman = 0.0
    optimizer = model.optimizer
    TEST_Z = np.zeros((TEST_X.shape[0], 1), dtype=float)

    for i in range(int(np.ceil(float(TEST_X.shape[0]) / float(test_batch)))):
        Dict = {model.inputs: TEST_X[i * test_batch:(i + 1) * test_batch], model.is_training: False}
        TEST_Z[i * test_batch:(i + 1) * test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]

    list_score = sum(TEST_Z.tolist(), [])

    return list_score


# def end: Model_Finaltest


def calculate_DeepSpCas9_score(sBase_DIR, list_target30):
    # TensorFlow config
    conf = tf.compat.v1.ConfigProto()
    conf.gpu_options.allow_growth = True
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    best_model_cv = 0.0

    TEST_X = preprocess_seq(list_target30, 30)
    TEST_X_nohot = list_target30

    best_model_path = '%s/models/DeepSpCas9' % sBase_DIR
    best_model = 'PreTrain-Final-3-5-7-100-70-40-0.001-550-80-60'
    valuelist = best_model.split('-')
    fulllist = []

    for value in valuelist:
        if value == 'True':
            value = True
        elif value == 'False':
            value = False
        else:
            try:
                value = int(value)
            except:
                try:
                    value = float(value)
                except:
                    pass
        fulllist.append(value)
    # loop end: value

    filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[
                                                                                                                                  2:]
    filter_size = [filter_size_1, filter_size_2, filter_size_3]
    filter_num = [filter_num_1, filter_num_2, filter_num_3]
    if3d = False
    inception = False
    args = [filter_size, filter_num, l_rate, load_episode]
    tf.compat.v1.reset_default_graph()
    with tf.compat.v1.Session(config=conf) as sess:
        sess.run(tf.compat.v1.global_variables_initializer())
        model = Deep_SpCas9(filter_size, filter_num, node_1, node_2, args[2])

        saver = tf.compat.v1.train.Saver()
        saver.restore(sess, best_model_path + '/' + best_model)
        list_score = Model_Finaltest(sess, TEST_X, model)

    return list_score


def main():
    print('This is DeepSpCas9 model script')


if __name__ == '__main__':
    if len(sys.argv) == 1:
        main()
    else:
        function_name = sys.argv[1]
        function_parameters = sys.argv[2:]
        if function_name in locals().keys():
            locals()[function_name](*function_parameters)
        else:
            sys.exit('ERROR: function_name=%s, parameters=%s' % (function_name, function_parameters))
    # if END: len(sys.argv)
# if END: __name__

DeepPrime _ pegRNA efficiency pr

DeepPrime

Installation

Test

Usage

Input

Output

Scripts

Error & Correction 记录

猜你喜欢

热点阅读