From 3ed14b2cec0dfdad3f4b2831f2b4a86d11aef150 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sun, 27 Mar 2022 17:48:05 +0100 Subject: [PATCH] i know it doesn't look like much, but this kwarg was not used lol :D --- mingpt/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mingpt/model.py b/mingpt/model.py index b315a7f..f4cc661 100644 --- a/mingpt/model.py +++ b/mingpt/model.py @@ -58,7 +58,7 @@ class CausalSelfAttention(nn.Module): .view(1, 1, config.block_size, config.block_size)) self.n_head = config.n_head - def forward(self, x, layer_past=None): + def forward(self, x): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) # calculate query, key, values for all heads in batch and move head forward to be the batch dim