Add files via upload

Add initial project files
2023-08-04 14:32:47 -04:00
parent 1aed8b655a
commit e4145441eb
31 changed files with 266571 additions and 0 deletions
--- a/src/model/clip/load.rs
+++ b/src/model/clip/load.rs
@@ -0,0 +1,86 @@
+use std::error::Error;
+use burn::tensor::ElementConversion;
+
+use burn::{
+    config::Config, 
+    module::{Module, Param},
+    nn,
+    tensor::{
+        backend::Backend,
+        Tensor,
+    },
+};
+
+use super::*;
+use crate::model::load::*;
+
+pub fn load_mlp<B: Backend>(path: &str, device: &B::Device) -> Result<MLP<B>, Box<dyn Error>> {
+    let fc1 = load_linear(&format!("{}/{}", path, "fc1"), device)?;
+    let gelu = QuickGELU::new();
+    let fc2 = load_linear(&format!("{}/{}", path, "fc2"), device)?;
+
+    let mlp = MLP {
+        fc1: fc1,
+        gelu: gelu,
+        fc2: fc2,
+    };
+
+    Ok(mlp)
+}
+
+pub fn load_multi_head_self_attention<B: Backend>(path: &str, device: &B::Device) -> Result<MultiHeadSelfAttention<B>, Box<dyn Error>> {
+    let n_head = load_usize::<B>("n_head", path, device)?;
+    let query = load_linear(&format!("{}/{}", path, "query"), device)?;
+    let key = load_linear(&format!("{}/{}", path, "key"), device)?;
+    let value = load_linear(&format!("{}/{}", path, "value"), device)?;
+    let out = load_linear(&format!("{}/{}", path, "out"), device)?;
+
+    let mhsa = MultiHeadSelfAttention {
+        n_head: n_head,
+        query: query,
+        key: key,
+        value: value,
+        out: out,
+    };
+
+    Ok(mhsa)
+}
+
+pub fn load_residual_decoder_attention_block<B: Backend>(path: &str, device: &B::Device) -> Result<ResidualDecoderAttentionBlock<B>, Box<dyn Error>> {
+    let mlp = load_mlp(&format!("{}/{}", path, "mlp"), device)?;
+    let attn = load_multi_head_self_attention(&format!("{}/{}", path, "attn"), device)?;
+    let attn_ln = load_layer_norm(&format!("{}/{}", path, "attn_ln"), device)?;
+    let mlp_ln = load_layer_norm(&format!("{}/{}", path, "mlp_ln"), device)?;
+
+    let rdab = ResidualDecoderAttentionBlock {
+        attn: attn,
+        attn_ln: attn_ln,
+        mlp: mlp,
+        mlp_ln: mlp_ln,
+    };
+
+    Ok(rdab)
+}
+
+pub fn load_clip<B: Backend>(path: &str, device: &B::Device) -> Result<CLIP<B>, Box<dyn Error>> {
+    let token_embedding = load_embedding(&format!("{}/{}", path, "token_embedding"), device)?;
+    let position_embedding = load_tensor("weight", &format!("{}/position_embedding", path), device)?.into();
+
+    let n_layer = load_usize::<B>("n_layer", path, device)?;
+    let mut blocks = (0..n_layer)
+        .into_iter()
+        .map(|i| {
+            load_residual_decoder_attention_block::<B>(&format!("{}/blocks/{}", path, i), device)
+        }).collect::<Result<Vec<_>, _>>()?;
+    
+    let layer_norm = load_layer_norm(&format!("{}/{}", path, "layer_norm"), device)?;
+
+    let clip = CLIP {
+        token_embedding: token_embedding,
+        position_embedding: position_embedding,
+        blocks: blocks,
+        layer_norm: layer_norm,
+    };
+    
+    Ok(clip)
+}
--- a/src/model/clip/mod.rs
+++ b/src/model/clip/mod.rs
@@ -0,0 +1,220 @@
+pub mod load;
+
+use burn::{
+    config::Config, 
+    module::{Module, Param},
+    nn,
+    tensor::{
+        backend::Backend,
+        activation::{softmax, sigmoid}, 
+        module::embedding, 
+        Tensor,
+        Distribution, 
+        Int, 
+    },
+};
+
+use crate::model::attention::{qkv_attention, attn_decoder_mask};
+
+
+#[derive(Config)]
+pub struct CLIPConfig {
+    n_vocab: usize, 
+    n_state: usize, 
+    n_head: usize, 
+    n_ctx: usize, 
+    n_layer: usize, 
+}
+
+impl CLIPConfig {
+    pub fn init<B: Backend>(&self) -> CLIP<B> {
+        let token_embedding = nn::EmbeddingConfig::new(self.n_vocab, self.n_state).init();
+        let position_embedding = Tensor::random([self.n_ctx, self.n_state], Distribution::Normal(0.0, 1.0)).into();
+        let blocks = (0..self.n_layer)
+            .into_iter()
+            .map(|_| ResidualDecoderAttentionBlockConfig::new(self.n_state, self.n_head).init())
+            .collect();
+        let layer_norm = nn::LayerNormConfig::new(self.n_state).init();
+
+        CLIP {
+            token_embedding, 
+            position_embedding, 
+            blocks, 
+            layer_norm, 
+        }
+    }
+}
+
+
+  
+#[derive(Module, Debug)]
+pub struct CLIP<B: Backend> {
+    token_embedding: nn::Embedding<B>, 
+    position_embedding: Param<Tensor<B, 2>>, 
+    blocks: Vec<ResidualDecoderAttentionBlock<B>>, 
+    layer_norm: nn::LayerNorm<B>, 
+}
+
+impl<B: Backend> CLIP<B> {
+    pub fn forward(&self, x: Tensor<B, 2, Int>) -> Tensor<B, 3> {
+        let [n_batch, seq_len] = x.dims();
+        
+        let mask = attn_decoder_mask(seq_len);
+
+        let embedded = self.token_embedding.forward(x) 
+            + self.position_embedding.val().slice([0..seq_len]).unsqueeze();
+        
+        let mut x = embedded;
+        for block in &self.blocks {
+            x = block.forward(x, mask.clone());
+        }
+
+        self.layer_norm.forward(x)
+    }
+}
+
+
+
+#[derive(Config)]
+pub struct ResidualDecoderAttentionBlockConfig {
+    n_state: usize, 
+    n_head: usize, 
+}
+
+impl ResidualDecoderAttentionBlockConfig {
+    pub fn init<B: Backend>(&self) -> ResidualDecoderAttentionBlock<B> {
+        let attn = MultiHeadSelfAttentionConfig::new(self.n_state, self.n_head).init();
+        let attn_ln = nn::LayerNormConfig::new(self.n_state).init();
+        
+        let mlp = MLPConfig::new(self.n_state, 4 * self.n_state).init();
+        let mlp_ln = nn::LayerNormConfig::new(self.n_state).init();
+
+        ResidualDecoderAttentionBlock {
+            attn, 
+            attn_ln, 
+            mlp, 
+            mlp_ln, 
+        }
+    }
+}
+
+#[derive(Module, Debug)]
+pub struct ResidualDecoderAttentionBlock<B: Backend> {
+    attn: MultiHeadSelfAttention<B>, 
+    attn_ln: nn::LayerNorm<B>, 
+    mlp: MLP<B>, 
+    mlp_ln: nn::LayerNorm<B>, 
+}
+
+impl<B: Backend> ResidualDecoderAttentionBlock<B> {
+    fn forward(&self, x: Tensor<B, 3>, mask: Tensor<B, 2>) -> Tensor<B, 3> {
+        let x = x.clone() + self.attn.forward(self.attn_ln.forward(x), Some(mask));
+        let x = x.clone() + self.mlp.forward(self.mlp_ln.forward(x));
+        return x;
+    }
+}
+
+#[derive(Config)]
+pub struct MultiHeadSelfAttentionConfig {
+    n_state: usize,
+    n_head: usize,  
+}
+
+impl MultiHeadSelfAttentionConfig {
+    fn init<B: Backend>(&self) -> MultiHeadSelfAttention<B> {
+        assert!(self.n_state % self.n_head == 0, "State size {} must be a multiple of head size {}", self.n_state, self.n_head);
+
+        let n_head = self.n_head;
+        let query = nn::LinearConfig::new(self.n_state, self.n_state).init();
+        let key = nn::LinearConfig::new(self.n_state, self.n_state).init();
+        let value = nn::LinearConfig::new(self.n_state, self.n_state).init();
+        let out = nn::LinearConfig::new(self.n_state, self.n_state).init();
+
+        MultiHeadSelfAttention { 
+            n_head, 
+            query, 
+            key, 
+            value, 
+            out 
+        }
+    }
+}
+
+#[derive(Module, Debug)]
+pub struct MultiHeadSelfAttention<B: Backend> {
+    n_head: usize, 
+    query: nn::Linear<B>, 
+    key: nn::Linear<B>, 
+    value: nn::Linear<B>, 
+    out: nn::Linear<B>, 
+}
+
+impl<B: Backend> MultiHeadSelfAttention<B> {
+    pub fn forward(&self, x: Tensor<B, 3>, mask: Option<Tensor<B, 2>>) -> Tensor<B, 3> {
+        let q = self.query.forward(x.clone());
+        let k = self.key.forward(x.clone());
+        let v = self.value.forward(x);
+
+        let wv = qkv_attention(q, k, v, mask, self.n_head);
+
+        return self.out.forward(wv);
+    }
+}
+
+
+
+
+
+
+
+
+#[derive(Config, Debug)]
+pub struct MLPConfig {
+    input_size: usize, 
+    hidden_size: usize, 
+}
+
+impl MLPConfig {
+    fn init<B: Backend>(&self) -> MLP<B> {
+        let fc1 = nn::LinearConfig::new(self.input_size, self.hidden_size).init();
+        let gelu = QuickGELU::new();
+        let fc2 = nn::LinearConfig::new(self.hidden_size, self.input_size).init();
+
+        MLP {
+            fc1, 
+            gelu, 
+            fc2, 
+        }
+    }
+}
+
+#[derive(Module, Debug)]
+pub struct MLP<B: Backend> {
+    fc1: nn::Linear<B>, 
+    gelu: QuickGELU, 
+    fc2: nn::Linear<B>, 
+}
+
+impl<B: Backend> MLP<B> {
+    fn forward<const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
+        let x = self.fc1.forward(x);
+        let x = self.gelu.forward(x);
+        let x = self.fc2.forward(x);
+
+        x
+    }
+}
+
+#[derive(Module, Clone, Debug)]
+pub struct QuickGELU {}
+
+impl QuickGELU {
+    fn new() -> Self {
+        Self {}
+    }
+
+    fn forward<B: Backend, const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
+        x.clone() * sigmoid(x * 1.702)
+    }
+}
+