// ============================================================================ // Tools.js — now loads real GPT-2 vocab.json + merges.txt into fake model // ============================================================================ export class Tools { static async generateFakeModel() { console.log("[Tools] Loading vocab.json + merges.txt…"); // ------------------------------------------------------------ // Load vocabulary // ------------------------------------------------------------ const vocabResponse = await fetch("model/vocab.json"); // <-- adjust path const vocabularyList = await vocabResponse.json(); // ------------------------------------------------------------ // Load merges // ------------------------------------------------------------ const mergesResponse = await fetch("model/merges.txt"); // <-- adjust path const mergeRuleText = await mergesResponse.text(); const mergeRuleList = mergeRuleText .split("\n") .filter(line => line.trim().length > 0 && !line.startsWith("#")); console.log("[Tools] ✓ Loaded", Object.keys(vocabularyList).length, "vocab tokens,", mergeRuleList.length, "merge rules." ); // ------------------------------------------------------------ // Fake GPT-2 weights (hidden size = 8) // ------------------------------------------------------------ const fakeHiddenSize = 8; const fakeIntermediateSize = fakeHiddenSize * 4; const fakeSequenceLength = 4; const fakeNumberOfLayers = 2; function createFakeArray(size) { let arr = new Float32Array(size); for (let i = 0; i < size; i++) arr[i] = (i % 7) * 0.1; return arr; } // ------------------------------------------------------------ // Build fake model // ------------------------------------------------------------ let model = { configuration : { hiddenSize: fakeHiddenSize, numberOfTransformerLayers: fakeNumberOfLayers, numberOfAttentionHeads: 2, maximumSequenceLength: fakeSequenceLength, vocabularySize: Object.keys(vocabularyList).length, maximumPositionCount: 2048 }, // real tokenizer data: vocabularyList: vocabularyList, mergeRuleList: mergeRuleList, // fake embeddings: tokenEmbeddingTensor: createFakeArray(Object.keys(vocabularyList).length * fakeHiddenSize), positionEmbeddingTensor: createFakeArray(2048 * fakeHiddenSize), transformerLayerList: [], layerWeightFlatList: [], tokenIndexArray: null }; // ------------------------------------------------------------ // Create fake transformer layers // ------------------------------------------------------------ for (let layerIndex = 0; layerIndex < fakeNumberOfLayers; layerIndex++) { let layer = { firstNormalizationWeightTensor: createFakeArray(fakeHiddenSize), firstNormalizationBiasTensor: createFakeArray(fakeHiddenSize), queryWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize), keyWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize), valueWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize), queryBiasTensor: createFakeArray(fakeHiddenSize), keyBiasTensor: createFakeArray(fakeHiddenSize), valueBiasTensor: createFakeArray(fakeHiddenSize), attentionOutputProjectionWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize), attentionOutputProjectionBiasTensor: createFakeArray(fakeHiddenSize), secondNormalizationWeightTensor: createFakeArray(fakeHiddenSize), secondNormalizationBiasTensor: createFakeArray(fakeHiddenSize), feedForwardLayerOneWeightTensor: createFakeArray(fakeHiddenSize * fakeIntermediateSize), feedForwardLayerOneBiasTensor: createFakeArray(fakeIntermediateSize), feedForwardLayerTwoWeightTensor: createFakeArray(fakeIntermediateSize * fakeHiddenSize), feedForwardLayerTwoBiasTensor: createFakeArray(fakeHiddenSize) }; model.transformerLayerList.push(layer); } console.log("[Tools] ✓ Fake model ready."); return model; } /** * Packs a 1D typed array into a 2D RGBA Float texture. * * @param {TypedArray} data - Input values (Float32Array, Uint32Array, etc.) * @param {number} texWidth - Maximum texture width (e.g., 8192) * @returns {{texData: Float32Array, width: number, height: number, totalPixels: number}} */ static packIntoTextureRGBA(sourceData, texWidth = 8192) { const totalValues = sourceData.length; const totalPixels = Math.ceil(totalValues / 4); // 4 channels/pixel const width = texWidth; const height = Math.ceil(totalPixels / width); const data = new Float32Array(width * height * 4); for (let i = 0; i < totalValues; i++) { const pixelIndex = Math.floor(i / 4); const channel = i % 4; const x = pixelIndex % width; const y = Math.floor(pixelIndex / width); data[(y * width + x) * 4 + channel] = sourceData[i]; } return { data, width, height, totalPixels }; } /** * Packs a 2D tensor (rows × cols) into a 2D RGBA float texture. * * @param {TypedArray} tensor - 1D row-major data * @param {number} rows - number of rows * @param {number} cols - number of columns * @param {number} texWidth - max texture width (default 8192) */ static pack2DTensorIntoTexture( tensor, rows, cols, texWidth = 8192 ) { const flatLength = rows * cols; if (tensor.length !== flatLength) throw new Error("Tensor length does not match rows*cols"); const totalPixels = Math.ceil(flatLength / 4); const width = texWidth; const height = Math.ceil(totalPixels / width); const data = new Float32Array(width * height * 4); for (let index = 0; index < flatLength; index++) { const pixelIndex = index >> 2; // /4 const channel = index & 3; // %4 const x = pixelIndex % width; const y = (pixelIndex / width) | 0; data[(y * width + x) * 4 + channel] = tensor[index]; } return { data, width, height, totalPixels }; } }