206 lines
6.0 KiB
JavaScript
206 lines
6.0 KiB
JavaScript
|
|
// ============================================================================
|
|||
|
|
// Tools.js — now loads real GPT-2 vocab.json + merges.txt into fake model
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
export class Tools {
|
|||
|
|
|
|||
|
|
static async generateFakeModel() {
|
|||
|
|
|
|||
|
|
console.log("[Tools] Loading vocab.json + merges.txt…");
|
|||
|
|
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
// Load vocabulary
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
const vocabResponse =
|
|||
|
|
await fetch("model/vocab.json"); // <-- adjust path
|
|||
|
|
const vocabularyList =
|
|||
|
|
await vocabResponse.json();
|
|||
|
|
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
// Load merges
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
const mergesResponse =
|
|||
|
|
await fetch("model/merges.txt"); // <-- adjust path
|
|||
|
|
const mergeRuleText =
|
|||
|
|
await mergesResponse.text();
|
|||
|
|
|
|||
|
|
const mergeRuleList =
|
|||
|
|
mergeRuleText
|
|||
|
|
.split("\n")
|
|||
|
|
.filter(line => line.trim().length > 0 && !line.startsWith("#"));
|
|||
|
|
|
|||
|
|
console.log("[Tools] ✓ Loaded",
|
|||
|
|
Object.keys(vocabularyList).length, "vocab tokens,",
|
|||
|
|
mergeRuleList.length, "merge rules."
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
// Fake GPT-2 weights (hidden size = 8)
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
const fakeHiddenSize = 8;
|
|||
|
|
const fakeIntermediateSize = fakeHiddenSize * 4;
|
|||
|
|
const fakeSequenceLength = 4;
|
|||
|
|
const fakeNumberOfLayers = 2;
|
|||
|
|
|
|||
|
|
function createFakeArray(size) {
|
|||
|
|
let arr = new Float32Array(size);
|
|||
|
|
for (let i = 0; i < size; i++) arr[i] = (i % 7) * 0.1;
|
|||
|
|
return arr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
// Build fake model
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
let model = {
|
|||
|
|
|
|||
|
|
configuration : {
|
|||
|
|
hiddenSize: fakeHiddenSize,
|
|||
|
|
numberOfTransformerLayers: fakeNumberOfLayers,
|
|||
|
|
numberOfAttentionHeads: 2,
|
|||
|
|
maximumSequenceLength: fakeSequenceLength,
|
|||
|
|
vocabularySize: Object.keys(vocabularyList).length,
|
|||
|
|
maximumPositionCount: 2048
|
|||
|
|
},
|
|||
|
|
|
|||
|
|
// real tokenizer data:
|
|||
|
|
vocabularyList: vocabularyList,
|
|||
|
|
mergeRuleList: mergeRuleList,
|
|||
|
|
|
|||
|
|
// fake embeddings:
|
|||
|
|
tokenEmbeddingTensor:
|
|||
|
|
createFakeArray(Object.keys(vocabularyList).length * fakeHiddenSize),
|
|||
|
|
|
|||
|
|
positionEmbeddingTensor:
|
|||
|
|
createFakeArray(2048 * fakeHiddenSize),
|
|||
|
|
|
|||
|
|
transformerLayerList: [],
|
|||
|
|
layerWeightFlatList: [],
|
|||
|
|
tokenIndexArray: null
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
// Create fake transformer layers
|
|||
|
|
// ------------------------------------------------------------
|
|||
|
|
for (let layerIndex = 0; layerIndex < fakeNumberOfLayers; layerIndex++) {
|
|||
|
|
|
|||
|
|
let layer = {
|
|||
|
|
|
|||
|
|
firstNormalizationWeightTensor: createFakeArray(fakeHiddenSize),
|
|||
|
|
firstNormalizationBiasTensor: createFakeArray(fakeHiddenSize),
|
|||
|
|
|
|||
|
|
queryWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize),
|
|||
|
|
keyWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize),
|
|||
|
|
valueWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize),
|
|||
|
|
|
|||
|
|
queryBiasTensor: createFakeArray(fakeHiddenSize),
|
|||
|
|
keyBiasTensor: createFakeArray(fakeHiddenSize),
|
|||
|
|
valueBiasTensor: createFakeArray(fakeHiddenSize),
|
|||
|
|
|
|||
|
|
attentionOutputProjectionWeightTensor:
|
|||
|
|
createFakeArray(fakeHiddenSize * fakeHiddenSize),
|
|||
|
|
|
|||
|
|
attentionOutputProjectionBiasTensor:
|
|||
|
|
createFakeArray(fakeHiddenSize),
|
|||
|
|
|
|||
|
|
secondNormalizationWeightTensor:
|
|||
|
|
createFakeArray(fakeHiddenSize),
|
|||
|
|
|
|||
|
|
secondNormalizationBiasTensor:
|
|||
|
|
createFakeArray(fakeHiddenSize),
|
|||
|
|
|
|||
|
|
feedForwardLayerOneWeightTensor:
|
|||
|
|
createFakeArray(fakeHiddenSize * fakeIntermediateSize),
|
|||
|
|
|
|||
|
|
feedForwardLayerOneBiasTensor:
|
|||
|
|
createFakeArray(fakeIntermediateSize),
|
|||
|
|
|
|||
|
|
feedForwardLayerTwoWeightTensor:
|
|||
|
|
createFakeArray(fakeIntermediateSize * fakeHiddenSize),
|
|||
|
|
|
|||
|
|
feedForwardLayerTwoBiasTensor:
|
|||
|
|
createFakeArray(fakeHiddenSize)
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
model.transformerLayerList.push(layer);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
console.log("[Tools] ✓ Fake model ready.");
|
|||
|
|
return model;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Packs a 1D typed array into a 2D RGBA Float texture.
|
|||
|
|
*
|
|||
|
|
* @param {TypedArray} data - Input values (Float32Array, Uint32Array, etc.)
|
|||
|
|
* @param {number} texWidth - Maximum texture width (e.g., 8192)
|
|||
|
|
* @returns {{texData: Float32Array, width: number, height: number, totalPixels: number}}
|
|||
|
|
*/
|
|||
|
|
static packIntoTextureRGBA(sourceData, texWidth = 8192) {
|
|||
|
|
|
|||
|
|
const totalValues = sourceData.length;
|
|||
|
|
const totalPixels = Math.ceil(totalValues / 4); // 4 channels/pixel
|
|||
|
|
|
|||
|
|
const width = texWidth;
|
|||
|
|
const height = Math.ceil(totalPixels / width);
|
|||
|
|
|
|||
|
|
const data = new Float32Array(width * height * 4);
|
|||
|
|
|
|||
|
|
for (let i = 0; i < totalValues; i++) {
|
|||
|
|
|
|||
|
|
const pixelIndex = Math.floor(i / 4);
|
|||
|
|
const channel = i % 4;
|
|||
|
|
|
|||
|
|
const x = pixelIndex % width;
|
|||
|
|
const y = Math.floor(pixelIndex / width);
|
|||
|
|
|
|||
|
|
data[(y * width + x) * 4 + channel] = sourceData[i];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return { data, width, height, totalPixels };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Packs a 2D tensor (rows × cols) into a 2D RGBA float texture.
|
|||
|
|
*
|
|||
|
|
* @param {TypedArray} tensor - 1D row-major data
|
|||
|
|
* @param {number} rows - number of rows
|
|||
|
|
* @param {number} cols - number of columns
|
|||
|
|
* @param {number} texWidth - max texture width (default 8192)
|
|||
|
|
*/
|
|||
|
|
static pack2DTensorIntoTexture( tensor, rows, cols, texWidth = 8192 ) {
|
|||
|
|
|
|||
|
|
const flatLength = rows * cols;
|
|||
|
|
if (tensor.length !== flatLength)
|
|||
|
|
throw new Error("Tensor length does not match rows*cols");
|
|||
|
|
|
|||
|
|
const totalPixels = Math.ceil(flatLength / 4);
|
|||
|
|
const width = texWidth;
|
|||
|
|
const height = Math.ceil(totalPixels / width);
|
|||
|
|
|
|||
|
|
const data = new Float32Array(width * height * 4);
|
|||
|
|
|
|||
|
|
for (let index = 0; index < flatLength; index++) {
|
|||
|
|
|
|||
|
|
const pixelIndex = index >> 2; // /4
|
|||
|
|
const channel = index & 3; // %4
|
|||
|
|
|
|||
|
|
const x = pixelIndex % width;
|
|||
|
|
const y = (pixelIndex / width) | 0;
|
|||
|
|
|
|||
|
|
data[(y * width + x) * 4 + channel] = tensor[index];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return { data, width, height, totalPixels };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
}
|