EleutherAI
diff --git a/‎elk/rwkv_lm/rwkv_hf.py‎
Lines changed: 6 additions & 4 deletions b/‎elk/rwkv_lm/rwkv_hf.py‎
Lines changed: 6 additions & 4 deletions
@@ -1,6 +1,8 @@
 import os
+import gc
 import torch
-from rwkv.model import RWKV
+# from rwkv.model import RWKV
+from .rwkv_hiddens import RWKV
 from huggingface_hub import hf_hub_download
 from transformers import AutoTokenizer, GPT2TokenizerFast, PreTrainedModel, PretrainedConfig
 from transformers.modeling_outputs import CausalLMOutput
@@ -12,7 +14,7 @@ class RWKVConfig(PretrainedConfig):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self.hidden_size = 2048
-        self.num_hidden_layers = 120
+        self.num_hidden_layers = 25
         self.is_encoder_decoder = False
         self.architectures = ["RWKV-LM"]
 
@@ -21,7 +23,7 @@ def __init__(self):
         super().__init__(RWKVConfig())
         weights_path = "/home/kyle/HF-MODEL/rwkv-4-pile-1b5/models--BlinkDL--rwkv-4-pile-1b5/snapshots/6ea995eaa87a17af560c9b41ce1a3d92355c5a49/RWKV-4-Pile-1B5-20220903-8040.pth"
         # weights_path = "/home/kyle/HF-MODEL/rwkv-4-pile-14b/models--BlinkDL--rwkv-4-pile-14b/snapshots/939b6851f96122b7b49bd00d446b3b49481214dd/RWKV-4-Pile-14B-20230213-8019.pth"
-        self.model = RWKV(model=weights_path, strategy='cuda bf16')
+        self.model = RWKV(model=weights_path, strategy='cuda fp16')
 
     def forward(
         self,
@@ -37,7 +39,7 @@ def forward(
         token, states = self.model.forward(inputs, None)
         mock_embedding_state = states[0].clone()
         output_states = [mock_embedding_state] + states
-        response = CausalLMOutput(logits=token, hidden_states=output_states)
+        response = CausalLMOutput(logits=token.detach().clone(), hidden_states=[state.detach() for state in output_states])
         return response
 
     # @staticmethod