first commit

2025-12-31 14:22:45 +01:00
commit c78a860098
73 changed files with 30137 additions and 0 deletions
--- a/framework/Tools.js
+++ b/framework/Tools.js
@@ -0,0 +1,205 @@
+// ============================================================================
+// Tools.js — now loads real GPT-2 vocab.json + merges.txt into fake model
+// ============================================================================
+
+export class Tools {
+
+	static async generateFakeModel() {
+
+		console.log("[Tools] Loading vocab.json + merges.txt…");
+
+		// ------------------------------------------------------------
+		// Load vocabulary
+		// ------------------------------------------------------------
+		const vocabResponse =
+			await fetch("model/vocab.json");   // <-- adjust path
+		const vocabularyList =
+			await vocabResponse.json();
+
+		// ------------------------------------------------------------
+		// Load merges
+		// ------------------------------------------------------------
+		const mergesResponse =
+			await fetch("model/merges.txt");   // <-- adjust path
+		const mergeRuleText =
+			await mergesResponse.text();
+
+		const mergeRuleList =
+			mergeRuleText
+				.split("\n")
+				.filter(line => line.trim().length > 0 && !line.startsWith("#"));
+
+		console.log("[Tools] ✓ Loaded", 
+			Object.keys(vocabularyList).length, "vocab tokens,",
+			mergeRuleList.length, "merge rules."
+		);
+
+		// ------------------------------------------------------------
+		// Fake GPT-2 weights (hidden size = 8)
+		// ------------------------------------------------------------
+		const fakeHiddenSize       = 8;
+		const fakeIntermediateSize = fakeHiddenSize * 4;
+		const fakeSequenceLength   = 4;
+		const fakeNumberOfLayers   = 2;
+
+		function createFakeArray(size) {
+			let arr = new Float32Array(size);
+			for (let i = 0; i < size; i++) arr[i] = (i % 7) * 0.1;
+			return arr;
+		}
+
+		// ------------------------------------------------------------
+		// Build fake model
+		// ------------------------------------------------------------
+		let model = {
+
+			configuration : {
+				hiddenSize: fakeHiddenSize,
+				numberOfTransformerLayers: fakeNumberOfLayers,
+				numberOfAttentionHeads: 2,
+				maximumSequenceLength: fakeSequenceLength,
+				vocabularySize: Object.keys(vocabularyList).length,
+				maximumPositionCount: 2048
+			},
+
+			// real tokenizer data:
+			vocabularyList: vocabularyList,
+			mergeRuleList: mergeRuleList,
+
+			// fake embeddings:
+			tokenEmbeddingTensor:
+				createFakeArray(Object.keys(vocabularyList).length * fakeHiddenSize),
+
+			positionEmbeddingTensor:
+				createFakeArray(2048 * fakeHiddenSize),
+
+			transformerLayerList: [],
+			layerWeightFlatList: [],
+			tokenIndexArray: null
+		};
+
+		// ------------------------------------------------------------
+		// Create fake transformer layers
+		// ------------------------------------------------------------
+		for (let layerIndex = 0; layerIndex < fakeNumberOfLayers; layerIndex++) {
+
+			let layer = {
+
+				firstNormalizationWeightTensor: createFakeArray(fakeHiddenSize),
+				firstNormalizationBiasTensor:   createFakeArray(fakeHiddenSize),
+
+				queryWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize),
+				keyWeightTensor:   createFakeArray(fakeHiddenSize * fakeHiddenSize),
+				valueWeightTensor: createFakeArray(fakeHiddenSize * fakeHiddenSize),
+
+				queryBiasTensor:   createFakeArray(fakeHiddenSize),
+				keyBiasTensor:     createFakeArray(fakeHiddenSize),
+				valueBiasTensor:   createFakeArray(fakeHiddenSize),
+
+				attentionOutputProjectionWeightTensor:
+					createFakeArray(fakeHiddenSize * fakeHiddenSize),
+
+				attentionOutputProjectionBiasTensor:
+					createFakeArray(fakeHiddenSize),
+
+				secondNormalizationWeightTensor:
+					createFakeArray(fakeHiddenSize),
+
+				secondNormalizationBiasTensor:
+					createFakeArray(fakeHiddenSize),
+
+				feedForwardLayerOneWeightTensor:
+					createFakeArray(fakeHiddenSize * fakeIntermediateSize),
+
+				feedForwardLayerOneBiasTensor:
+					createFakeArray(fakeIntermediateSize),
+
+				feedForwardLayerTwoWeightTensor:
+					createFakeArray(fakeIntermediateSize * fakeHiddenSize),
+
+				feedForwardLayerTwoBiasTensor:
+					createFakeArray(fakeHiddenSize)
+			};
+
+			model.transformerLayerList.push(layer);
+		}
+
+		console.log("[Tools] ✓ Fake model ready.");
+		return model;
+	}
+
+
+
+	/**
+	 * Packs a 1D typed array into a 2D RGBA Float texture.
+	 *
+	 * @param {TypedArray} data - Input values (Float32Array, Uint32Array, etc.)
+	 * @param {number} texWidth - Maximum texture width (e.g., 8192)
+	 * @returns {{texData: Float32Array, width: number, height: number, totalPixels: number}}
+	 */
+	static packIntoTextureRGBA(sourceData, texWidth = 8192) {
+
+	    const totalValues  = sourceData.length;
+	    const totalPixels  = Math.ceil(totalValues / 4);  // 4 channels/pixel
+
+	    const width  = texWidth;
+	    const height = Math.ceil(totalPixels / width);
+
+	    const data = new Float32Array(width * height * 4);
+
+	    for (let i = 0; i < totalValues; i++) {
+
+	        const pixelIndex = Math.floor(i / 4);
+	        const channel    = i % 4;
+
+	        const x = pixelIndex % width;
+	        const y = Math.floor(pixelIndex / width);
+
+	        data[(y * width + x) * 4 + channel] = sourceData[i];
+	    }
+
+	    return { data, width, height, totalPixels };
+	}
+
+
+
+	/**
+	 * Packs a 2D tensor (rows × cols) into a 2D RGBA float texture.
+	 *
+	 * @param {TypedArray} tensor - 1D row-major data
+	 * @param {number} rows       - number of rows
+	 * @param {number} cols       - number of columns
+	 * @param {number} texWidth   - max texture width (default 8192)
+	 */
+	static pack2DTensorIntoTexture( tensor, rows, cols, texWidth = 8192 ) {
+
+	    const flatLength = rows * cols;
+	    if (tensor.length !== flatLength)
+	        throw new Error("Tensor length does not match rows*cols");
+
+	    const totalPixels = Math.ceil(flatLength / 4);
+	    const width  = texWidth;
+	    const height = Math.ceil(totalPixels / width);
+
+	    const data = new Float32Array(width * height * 4);
+
+	    for (let index = 0; index < flatLength; index++) {
+
+	        const pixelIndex = index >> 2;   // /4
+	        const channel    = index & 3;    // %4
+
+	        const x = pixelIndex % width;
+	        const y = (pixelIndex / width) | 0;
+
+	        data[(y * width + x) * 4 + channel] = tensor[index];
+	    }
+
+	    return { data, width, height, totalPixels };
+	}
+
+
+
+
+
+	
+}