m96-chan Claude Opus 4.6 commited on
Commit ·
76a55fd
1
Parent(s): 360ca36
0xBitNet chat demo — npm install 0xbitnet, that's it
Browse filesCo-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- .gitignore +7 -22
- README.md +3 -77
- assets/index-gTdi7eoP.js +803 -0
- index.html +224 -0
- package.json +0 -39
- public/favicon.ico +0 -0
- public/index.html +0 -43
- public/logo192.png +0 -0
- public/logo512.png +0 -0
- public/manifest.json +0 -25
- public/robots.txt +0 -3
- src/App.css +0 -38
- src/App.js +0 -25
- src/App.test.js +0 -8
- src/index.css +0 -13
- src/index.js +0 -17
- src/logo.svg +0 -1
- src/reportWebVitals.js +0 -13
- src/setupTests.js +0 -5
.gitignore
CHANGED
|
@@ -1,23 +1,8 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
.
|
| 7 |
-
|
| 8 |
-
# testing
|
| 9 |
-
/coverage
|
| 10 |
-
|
| 11 |
-
# production
|
| 12 |
-
/build
|
| 13 |
-
|
| 14 |
-
# misc
|
| 15 |
.DS_Store
|
| 16 |
-
.env.local
|
| 17 |
-
.env.development.local
|
| 18 |
-
.env.test.local
|
| 19 |
-
.env.production.local
|
| 20 |
-
|
| 21 |
-
npm-debug.log*
|
| 22 |
-
yarn-debug.log*
|
| 23 |
-
yarn-error.log*
|
|
|
|
| 1 |
+
node_modules
|
| 2 |
+
dist
|
| 3 |
+
src
|
| 4 |
+
package.json
|
| 5 |
+
package-lock.json
|
| 6 |
+
tsconfig.json
|
| 7 |
+
vite.config.ts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,83 +1,9 @@
|
|
| 1 |
---
|
| 2 |
title: 0xBitNet
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
-
app_build_command: npm run build
|
| 9 |
-
app_file: build/index.html
|
| 10 |
license: mit
|
| 11 |
-
short_description: Pure WebGPU BitNet inference — run LLMs in your browser
|
| 12 |
---
|
| 13 |
-
|
| 14 |
-
# Getting Started with Create React App
|
| 15 |
-
|
| 16 |
-
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
|
| 17 |
-
|
| 18 |
-
## Available Scripts
|
| 19 |
-
|
| 20 |
-
In the project directory, you can run:
|
| 21 |
-
|
| 22 |
-
### `npm start`
|
| 23 |
-
|
| 24 |
-
Runs the app in the development mode.\
|
| 25 |
-
Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
|
| 26 |
-
|
| 27 |
-
The page will reload when you make changes.\
|
| 28 |
-
You may also see any lint errors in the console.
|
| 29 |
-
|
| 30 |
-
### `npm test`
|
| 31 |
-
|
| 32 |
-
Launches the test runner in the interactive watch mode.\
|
| 33 |
-
See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
|
| 34 |
-
|
| 35 |
-
### `npm run build`
|
| 36 |
-
|
| 37 |
-
Builds the app for production to the `build` folder.\
|
| 38 |
-
It correctly bundles React in production mode and optimizes the build for the best performance.
|
| 39 |
-
|
| 40 |
-
The build is minified and the filenames include the hashes.\
|
| 41 |
-
Your app is ready to be deployed!
|
| 42 |
-
|
| 43 |
-
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
|
| 44 |
-
|
| 45 |
-
### `npm run eject`
|
| 46 |
-
|
| 47 |
-
**Note: this is a one-way operation. Once you `eject`, you can't go back!**
|
| 48 |
-
|
| 49 |
-
If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
|
| 50 |
-
|
| 51 |
-
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
|
| 52 |
-
|
| 53 |
-
You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
|
| 54 |
-
|
| 55 |
-
## Learn More
|
| 56 |
-
|
| 57 |
-
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
|
| 58 |
-
|
| 59 |
-
To learn React, check out the [React documentation](https://reactjs.org/).
|
| 60 |
-
|
| 61 |
-
### Code Splitting
|
| 62 |
-
|
| 63 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
|
| 64 |
-
|
| 65 |
-
### Analyzing the Bundle Size
|
| 66 |
-
|
| 67 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
|
| 68 |
-
|
| 69 |
-
### Making a Progressive Web App
|
| 70 |
-
|
| 71 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
|
| 72 |
-
|
| 73 |
-
### Advanced Configuration
|
| 74 |
-
|
| 75 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
|
| 76 |
-
|
| 77 |
-
### Deployment
|
| 78 |
-
|
| 79 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
|
| 80 |
-
|
| 81 |
-
### `npm run build` fails to minify
|
| 82 |
-
|
| 83 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)
|
|
|
|
| 1 |
---
|
| 2 |
title: 0xBitNet
|
| 3 |
+
emoji: ⚡
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: gray
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
license: mit
|
|
|
|
| 9 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/index-gTdi7eoP.js
ADDED
|
@@ -0,0 +1,803 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
(function(){const e=document.createElement("link").relList;if(e&&e.supports&&e.supports("modulepreload"))return;for(const s of document.querySelectorAll('link[rel="modulepreload"]'))n(s);new MutationObserver(s=>{for(const r of s)if(r.type==="childList")for(const a of r.addedNodes)a.tagName==="LINK"&&a.rel==="modulepreload"&&n(a)}).observe(document,{childList:!0,subtree:!0});function t(s){const r={};return s.integrity&&(r.integrity=s.integrity),s.referrerPolicy&&(r.referrerPolicy=s.referrerPolicy),s.crossOrigin==="use-credentials"?r.credentials="include":s.crossOrigin==="anonymous"?r.credentials="omit":r.credentials="same-origin",r}function n(s){if(s.ep)return;s.ep=!0;const r=t(s);fetch(s.href,r)}})();var N=1179993927,oe=0,Q=1,ue=16,de=17,ce=18,fe=27,le=28,he=30,pe=34,me=35,R=36;function ge(i){switch(i){case oe:return 4;case Q:return 2;case ue:return 1;case de:return 2;case ce:return 4;case fe:return 8;case le:return 8;case he:return 2;case pe:return 54/256;case me:return 66/256;case R:return .25;default:throw new Error(`Unsupported GGML type: ${i}`)}}var _e=class{view;offset;textDecoder=new TextDecoder("utf-8");constructor(i){this.view=new DataView(i),this.offset=0}parse(){const i=this.readHeader(),e=this.readMetadata(Number(i.metadataKVCount)),t=this.readTensorInfos(Number(i.tensorCount)),n=e["general.alignment"]||32,s=Math.ceil(this.offset/n)*n;return{header:i,metadata:e,tensors:t,tensorDataOffset:s}}readHeader(){const i=this.readU32();if(i!==N)throw new Error(`Invalid GGUF magic: 0x${i.toString(16)} (expected 0x${N.toString(16)})`);const e=this.readU32();if(e<2||e>3)throw new Error(`Unsupported GGUF version: ${e}`);const t=this.readU64(),n=this.readU64();return{magic:i,version:e,tensorCount:t,metadataKVCount:n}}readMetadata(i){const e={};for(let t=0;t<i;t++){const n=this.readString(),s=this.readMetadataValue();e[n]=s}return e}readMetadataValue(){const i=this.readU32();return this.readValueOfType(i)}readValueOfType(i){switch(i){case 0:return this.readU8();case 1:return this.readI8();case 2:return this.readU16();case 3:return this.readI16();case 4:return this.readU32();case 5:return this.readI32();case 6:return this.readF32();case 7:return this.readU8()!==0;case 8:return this.readString();case 10:return this.readU64();case 11:return this.readI64();case 12:return this.readF64();case 9:{const e=this.readU32(),t=Number(this.readU64()),n=[];for(let s=0;s<t;s++)n.push(this.readValueOfType(e));return n}default:throw new Error(`Unknown GGUF metadata type: ${i}`)}}readTensorInfos(i){const e=[];for(let t=0;t<i;t++){const n=this.readString(),s=this.readU32(),r=[];for(let d=0;d<s;d++)r.push(this.readU64());const a=this.readU32(),o=this.readU64();e.push({name:n,nDimensions:s,shape:r,type:a,offset:o})}return e}readU8(){const i=this.view.getUint8(this.offset);return this.offset+=1,i}readI8(){const i=this.view.getInt8(this.offset);return this.offset+=1,i}readU16(){const i=this.view.getUint16(this.offset,!0);return this.offset+=2,i}readI16(){const i=this.view.getInt16(this.offset,!0);return this.offset+=2,i}readU32(){const i=this.view.getUint32(this.offset,!0);return this.offset+=4,i}readI32(){const i=this.view.getInt32(this.offset,!0);return this.offset+=4,i}readU64(){const i=this.view.getBigUint64(this.offset,!0);return this.offset+=8,i}readI64(){const i=this.view.getBigInt64(this.offset,!0);return this.offset+=8,i}readF32(){const i=this.view.getFloat32(this.offset,!0);return this.offset+=4,i}readF64(){const i=this.view.getFloat64(this.offset,!0);return this.offset+=8,i}readString(){const i=Number(this.readU64()),e=new Uint8Array(this.view.buffer,this.offset,i);return this.offset+=i,this.textDecoder.decode(e)}},V=8;function be(i){const e=new DataView(i),t=Number(e.getBigUint64(0,!0)),n=new Uint8Array(i,V,t),s=new TextDecoder().decode(n),r=JSON.parse(s);delete r.__metadata__;const a=V+t;return{header:r,dataOffset:a}}function we(i){switch(i){case"F32":return"f32";case"F16":return"f16";case"I8":return"i8";case"I32":return"i32";case"U8":return"u8";default:throw new Error(`Unsupported safetensors dtype: ${i}`)}}function ve(i,e){const t=[];for(const[n,s]of Object.entries(i)){const[r,a]=s.data_offsets;t.push({name:n,dtype:we(s.dtype),shape:s.shape,offset:e+r,size:a-r})}return t}function U(i){return i.hiddenSize/i.numAttentionHeads}var ye=class{cache=new Map;device;constructor(i){this.device=i}getOrCreate(i,e,t="main",n){const s=n?`${i}:${JSON.stringify(n)}`:i,r=this.cache.get(s);if(r)return r;const a=this.device.createShaderModule({code:e}),o=this.device.createComputePipeline({layout:"auto",compute:{module:a,entryPoint:t,constants:n}}),d=o.getBindGroupLayout(0),u={pipeline:o,bindGroupLayout:d};return this.cache.set(s,u),u}clear(){this.cache.clear()}},Ue=class{device;pools=new Map;bufferToEntry=new Map;alignment;constructor(i,e=256){this.device=i,this.alignment=e}alignSize(i){return Math.ceil(i/this.alignment)*this.alignment}acquire(i,e){const t=this.alignSize(i),n=this.pools.get(e);if(n){for(const a of n)if(!a.inUse&&a.size>=t)return a.inUse=!0,a.buffer}const s=this.device.createBuffer({size:t,usage:e}),r={buffer:s,size:t,inUse:!0};return this.bufferToEntry.set(s,r),n?n.push(r):this.pools.set(e,[r]),s}release(i){const e=this.bufferToEntry.get(i);e&&(e.inUse=!1)}destroy(){for(const i of this.pools.values())for(const e of i)e.buffer.destroy();this.pools.clear(),this.bufferToEntry.clear()}};function G(){return new Map}function B(i){i.clear()}function v(i,e,t,n,s){const r=s.map(d=>d.resource.buffer),a=i.get(t);if(a&&a.bufs.length===r.length){let d=!0;for(let u=0;u<r.length;u++)if(a.bufs[u]!==r[u]){d=!1;break}if(d)return a.bg}const o=e.createBindGroup({layout:n,entries:s});return i.set(t,{bg:o,bufs:r}),o}var O=`// RMSNorm: x_i * w_i / sqrt(mean(x²) + eps)
|
| 2 |
+
//
|
| 3 |
+
// Two-pass within one dispatch:
|
| 4 |
+
// 1. Compute sum of squares (workgroup reduction)
|
| 5 |
+
// 2. Normalize: x_i * w_i * rsqrt(mean_sq + eps)
|
| 6 |
+
//
|
| 7 |
+
// Layout:
|
| 8 |
+
// input: [N, D] f32
|
| 9 |
+
// weight: [D] f32 (learnable scale)
|
| 10 |
+
// output: [N, D] f32
|
| 11 |
+
|
| 12 |
+
struct Params {
|
| 13 |
+
N: u32,
|
| 14 |
+
D: u32,
|
| 15 |
+
eps: f32,
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
@group(0) @binding(0) var<storage, read> input: array<f32>;
|
| 19 |
+
@group(0) @binding(1) var<storage, read> weight: array<f32>;
|
| 20 |
+
@group(0) @binding(2) var<storage, read_write> output: array<f32>;
|
| 21 |
+
@group(0) @binding(3) var<uniform> params: Params;
|
| 22 |
+
|
| 23 |
+
const WORKGROUP_SIZE: u32 = 256u;
|
| 24 |
+
|
| 25 |
+
var<workgroup> shared_sum: array<f32, 256>;
|
| 26 |
+
|
| 27 |
+
@compute @workgroup_size(256)
|
| 28 |
+
fn main(
|
| 29 |
+
@builtin(workgroup_id) wg_id: vec3<u32>,
|
| 30 |
+
@builtin(local_invocation_id) local_id: vec3<u32>,
|
| 31 |
+
) {
|
| 32 |
+
let row = wg_id.x;
|
| 33 |
+
if (row >= params.N) {
|
| 34 |
+
return;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
let tid = local_id.x;
|
| 38 |
+
let row_offset = row * params.D;
|
| 39 |
+
|
| 40 |
+
// Pass 1: Sum of squares
|
| 41 |
+
var local_sum: f32 = 0.0;
|
| 42 |
+
for (var col = tid; col < params.D; col += WORKGROUP_SIZE) {
|
| 43 |
+
let val = input[row_offset + col];
|
| 44 |
+
local_sum += val * val;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
shared_sum[tid] = local_sum;
|
| 48 |
+
workgroupBarrier();
|
| 49 |
+
|
| 50 |
+
for (var stride = WORKGROUP_SIZE / 2u; stride > 0u; stride >>= 1u) {
|
| 51 |
+
if (tid < stride) {
|
| 52 |
+
shared_sum[tid] += shared_sum[tid + stride];
|
| 53 |
+
}
|
| 54 |
+
workgroupBarrier();
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
let rms = inverseSqrt(shared_sum[0] / f32(params.D) + params.eps);
|
| 58 |
+
|
| 59 |
+
// Pass 2: Normalize
|
| 60 |
+
for (var col = tid; col < params.D; col += WORKGROUP_SIZE) {
|
| 61 |
+
output[row_offset + col] = input[row_offset + col] * rms * weight[col];
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
`,J=`// Elementwise operations: add, multiply
|
| 65 |
+
//
|
| 66 |
+
// Used for residual connections and gating.
|
| 67 |
+
//
|
| 68 |
+
// Layout:
|
| 69 |
+
// a: [N] f32
|
| 70 |
+
// b: [N] f32
|
| 71 |
+
// output: [N] f32
|
| 72 |
+
|
| 73 |
+
struct Params {
|
| 74 |
+
N: u32,
|
| 75 |
+
op: u32, // 0 = add, 1 = multiply
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
@group(0) @binding(0) var<storage, read> a: array<f32>;
|
| 79 |
+
@group(0) @binding(1) var<storage, read> b: array<f32>;
|
| 80 |
+
@group(0) @binding(2) var<storage, read_write> output: array<f32>;
|
| 81 |
+
@group(0) @binding(3) var<uniform> params: Params;
|
| 82 |
+
|
| 83 |
+
@compute @workgroup_size(256)
|
| 84 |
+
fn main(
|
| 85 |
+
@builtin(global_invocation_id) gid: vec3<u32>,
|
| 86 |
+
) {
|
| 87 |
+
let idx = gid.x;
|
| 88 |
+
if (idx >= params.N) {
|
| 89 |
+
return;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
if (params.op == 0u) {
|
| 93 |
+
output[idx] = a[idx] + b[idx];
|
| 94 |
+
} else {
|
| 95 |
+
output[idx] = a[idx] * b[idx];
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
`,ke=class{device;pipelines;pool;config;inputLayerNorm;postAttnLayerNorm;attention;ffn;decodeNormUniform;decodeAddUniform;bgCache=G();constructor(i,e,t,n,s,r,a,o){this.device=i,this.pipelines=e,this.pool=t,this.config=n,this.inputLayerNorm=s,this.postAttnLayerNorm=r,this.attention=a,this.ffn=o}initDecodeUniforms(i){{const e=new ArrayBuffer(12),t=new DataView(e);t.setUint32(0,1,!0),t.setUint32(4,this.config.hiddenSize,!0),t.setFloat32(8,this.config.rmsNormEps,!0),this.decodeNormUniform=this.createUniform(e)}{const e=new ArrayBuffer(8),t=new DataView(e);t.setUint32(0,this.config.hiddenSize,!0),t.setUint32(4,0,!0),this.decodeAddUniform=this.createUniform(e)}this.attention.initDecodeUniforms(i),this.ffn.initDecodeUniforms()}forward(i,e,t,n){const s=this.config.hiddenSize,r=this.dispatchRMSNorm(n,i,this.inputLayerNorm,e,"attnNorm"),a=this.attention.forward(r,e,t,n);this.pool.release(r);const o=this.dispatchAdd(n,i,a,e*s,e,"attnAdd");this.pool.release(a);const d=this.dispatchRMSNorm(n,o,this.postAttnLayerNorm,e,"ffnNorm"),u=this.ffn.forward(d,e,n);this.pool.release(d);const c=this.dispatchAdd(n,o,u,e*s,e,"ffnAdd");return this.pool.release(o),this.pool.release(u),c}dispatchRMSNorm(i,e,t,n,s){const{pipeline:r,bindGroupLayout:a}=this.pipelines.getOrCreate("rmsnorm",O),o=this.config.hiddenSize,d=this.pool.acquire(n*o*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);let u;if(n===1&&this.decodeNormUniform)u=this.decodeNormUniform;else{const p=new ArrayBuffer(12),l=new DataView(p);l.setUint32(0,n,!0),l.setUint32(4,o,!0),l.setFloat32(8,this.config.rmsNormEps,!0),u=this.createUniform(p)}const c=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:t}},{binding:2,resource:{buffer:d}},{binding:3,resource:{buffer:u}}],f=n===1&&s?v(this.bgCache,this.device,s,a,c):this.device.createBindGroup({layout:a,entries:c}),h=i.beginComputePass();return h.setPipeline(r),h.setBindGroup(0,f),h.dispatchWorkgroups(n),h.end(),d}dispatchAdd(i,e,t,n,s,r){const{pipeline:a,bindGroupLayout:o}=this.pipelines.getOrCreate("elementwise_0",J),d=this.pool.acquire(n*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);let u;if(s===1&&this.decodeAddUniform)u=this.decodeAddUniform;else{const p=new ArrayBuffer(8),l=new DataView(p);l.setUint32(0,n,!0),l.setUint32(4,0,!0),u=this.createUniform(p)}const c=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:t}},{binding:2,resource:{buffer:d}},{binding:3,resource:{buffer:u}}],f=s===1&&r?v(this.bgCache,this.device,r,o,c):this.device.createBindGroup({layout:o,entries:c}),h=i.beginComputePass();return h.setPipeline(a),h.setBindGroup(0,f),h.dispatchWorkgroups(Math.ceil(n/256)),h.end(),d}clearBGCache(){B(this.bgCache),this.attention.clearBGCache(),this.ffn.clearBGCache()}destroyPreAllocated(){this.attention.destroyPreAllocated()}createUniform(i){const e=Math.max(Math.ceil(i.byteLength/4)*4,4),t=this.device.createBuffer({size:e,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});return new Uint8Array(t.getMappedRange()).set(new Uint8Array(i)),t.unmap(),t}},Be=`// Per-token absmax activation quantization: f32 → int8
|
| 99 |
+
//
|
| 100 |
+
// Two-pass approach:
|
| 101 |
+
// Pass 1: Compute absmax per row (token)
|
| 102 |
+
// Pass 2: Scale and round to [-127, 127]
|
| 103 |
+
//
|
| 104 |
+
// This shader combines both passes using workgroup reduction.
|
| 105 |
+
//
|
| 106 |
+
// Layout:
|
| 107 |
+
// input: [N, D] f32
|
| 108 |
+
// output: [N, D] i32 (int8 stored as i32 for compute compatibility)
|
| 109 |
+
// scales: [N] f32 (per-token absmax / 127)
|
| 110 |
+
|
| 111 |
+
struct Params {
|
| 112 |
+
N: u32, // number of tokens
|
| 113 |
+
D: u32, // hidden dimension
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
@group(0) @binding(0) var<storage, read> input: array<f32>;
|
| 117 |
+
@group(0) @binding(1) var<storage, read_write> output: array<i32>;
|
| 118 |
+
@group(0) @binding(2) var<storage, read_write> scales: array<f32>;
|
| 119 |
+
@group(0) @binding(3) var<uniform> params: Params;
|
| 120 |
+
|
| 121 |
+
const WORKGROUP_SIZE: u32 = 256u;
|
| 122 |
+
|
| 123 |
+
var<workgroup> shared_max: array<f32, 256>;
|
| 124 |
+
|
| 125 |
+
@compute @workgroup_size(256)
|
| 126 |
+
fn main(
|
| 127 |
+
@builtin(workgroup_id) wg_id: vec3<u32>,
|
| 128 |
+
@builtin(local_invocation_id) local_id: vec3<u32>,
|
| 129 |
+
) {
|
| 130 |
+
let row = wg_id.x;
|
| 131 |
+
if (row >= params.N) {
|
| 132 |
+
return;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
let tid = local_id.x;
|
| 136 |
+
let row_offset = row * params.D;
|
| 137 |
+
|
| 138 |
+
// Pass 1: Find absmax
|
| 139 |
+
var local_max: f32 = 0.0;
|
| 140 |
+
for (var col = tid; col < params.D; col += WORKGROUP_SIZE) {
|
| 141 |
+
local_max = max(local_max, abs(input[row_offset + col]));
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
shared_max[tid] = local_max;
|
| 145 |
+
workgroupBarrier();
|
| 146 |
+
|
| 147 |
+
// Reduction for max
|
| 148 |
+
for (var stride = WORKGROUP_SIZE / 2u; stride > 0u; stride >>= 1u) {
|
| 149 |
+
if (tid < stride) {
|
| 150 |
+
shared_max[tid] = max(shared_max[tid], shared_max[tid + stride]);
|
| 151 |
+
}
|
| 152 |
+
workgroupBarrier();
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
let absmax = shared_max[0];
|
| 156 |
+
let scale = select(absmax / 127.0, 1.0, absmax == 0.0);
|
| 157 |
+
|
| 158 |
+
if (tid == 0u) {
|
| 159 |
+
scales[row] = scale;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
workgroupBarrier();
|
| 163 |
+
|
| 164 |
+
// Pass 2: Quantize
|
| 165 |
+
let inv_scale = select(127.0 / absmax, 0.0, absmax == 0.0);
|
| 166 |
+
for (var col = tid; col < params.D; col += WORKGROUP_SIZE) {
|
| 167 |
+
let val = input[row_offset + col];
|
| 168 |
+
let quantized = clamp(i32(round(val * inv_scale)), -127, 127);
|
| 169 |
+
output[row_offset + col] = quantized;
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
`,Pe=`// Ternary GEMV: packed ternary weights × int8 activations → i32 accumulator
|
| 173 |
+
//
|
| 174 |
+
// Weight packing (I2_S / Eddie-Wang1120 llama.cpp fork):
|
| 175 |
+
// 128-element block interleaving for SIMD. Each 32-byte block stores 128 elements
|
| 176 |
+
// in 4 groups of 32. Byte[gp] within a block stores:
|
| 177 |
+
// bits[7:6] = element at group0 (offset 0*32 + gp)
|
| 178 |
+
// bits[5:4] = element at group1 (offset 1*32 + gp)
|
| 179 |
+
// bits[3:2] = element at group2 (offset 2*32 + gp)
|
| 180 |
+
// bits[1:0] = element at group3 (offset 3*32 + gp)
|
| 181 |
+
// code mapping: {0=-1, 1=0, 2=+1}
|
| 182 |
+
//
|
| 183 |
+
// Layout:
|
| 184 |
+
// weights: [M, K/16] u32 (packed ternary)
|
| 185 |
+
// input: [K] i32 (int8 stored as i32)
|
| 186 |
+
// scales: [M] f32 (per-row weight scale)
|
| 187 |
+
// input_scale: f32 (activation absmax scale)
|
| 188 |
+
// output: [M] f32
|
| 189 |
+
//
|
| 190 |
+
// Each workgroup processes one output row.
|
| 191 |
+
// Threads cooperatively reduce over the K dimension.
|
| 192 |
+
|
| 193 |
+
struct Params {
|
| 194 |
+
M: u32, // output rows
|
| 195 |
+
K: u32, // input dimension (unpacked)
|
| 196 |
+
K_packed: u32, // K / 16
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
@group(0) @binding(0) var<storage, read> weights: array<u32>;
|
| 200 |
+
@group(0) @binding(1) var<storage, read> input: array<i32>;
|
| 201 |
+
@group(0) @binding(2) var<storage, read> scales: array<f32>;
|
| 202 |
+
@group(0) @binding(3) var<uniform> params: Params;
|
| 203 |
+
@group(0) @binding(4) var<uniform> input_scale: f32;
|
| 204 |
+
@group(0) @binding(5) var<storage, read_write> output: array<f32>;
|
| 205 |
+
|
| 206 |
+
const WORKGROUP_SIZE: u32 = 256u;
|
| 207 |
+
|
| 208 |
+
var<workgroup> shared_sums: array<i32, 256>;
|
| 209 |
+
|
| 210 |
+
@compute @workgroup_size(256)
|
| 211 |
+
fn main(
|
| 212 |
+
@builtin(workgroup_id) wg_id: vec3<u32>,
|
| 213 |
+
@builtin(local_invocation_id) local_id: vec3<u32>,
|
| 214 |
+
) {
|
| 215 |
+
let row = wg_id.x;
|
| 216 |
+
if (row >= params.M) {
|
| 217 |
+
return;
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
let tid = local_id.x;
|
| 221 |
+
let row_offset = row * params.K_packed;
|
| 222 |
+
|
| 223 |
+
var acc: i32 = 0;
|
| 224 |
+
|
| 225 |
+
// Each thread processes a strided slice of packed u32 columns
|
| 226 |
+
for (var col = tid; col < params.K_packed; col += WORKGROUP_SIZE) {
|
| 227 |
+
let packed = weights[row_offset + col];
|
| 228 |
+
|
| 229 |
+
// I2_S block interleaving: 128 elements per 32-byte (8 u32) block
|
| 230 |
+
let block = col / 8u;
|
| 231 |
+
let base_gp = (col % 8u) * 4u;
|
| 232 |
+
|
| 233 |
+
// Unpack 16 ternary weights from this u32 and dot with input
|
| 234 |
+
for (var i = 0u; i < 16u; i++) {
|
| 235 |
+
let byte_in_u32 = i / 4u;
|
| 236 |
+
let group = i % 4u;
|
| 237 |
+
let gp = base_gp + byte_in_u32;
|
| 238 |
+
let k_idx = block * 128u + group * 32u + gp;
|
| 239 |
+
if (k_idx < params.K) {
|
| 240 |
+
let shift = byte_in_u32 * 8u + (6u - 2u * group);
|
| 241 |
+
let code = (packed >> shift) & 3u;
|
| 242 |
+
let w = i32(code) - 1;
|
| 243 |
+
acc += w * input[k_idx];
|
| 244 |
+
}
|
| 245 |
+
}
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
// Workgroup reduction
|
| 249 |
+
shared_sums[tid] = acc;
|
| 250 |
+
workgroupBarrier();
|
| 251 |
+
|
| 252 |
+
// Tree reduction
|
| 253 |
+
for (var stride = WORKGROUP_SIZE / 2u; stride > 0u; stride >>= 1u) {
|
| 254 |
+
if (tid < stride) {
|
| 255 |
+
shared_sums[tid] += shared_sums[tid + stride];
|
| 256 |
+
}
|
| 257 |
+
workgroupBarrier();
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
// Thread 0 writes the dequantized result
|
| 261 |
+
if (tid == 0u) {
|
| 262 |
+
let sum = f32(shared_sums[0]);
|
| 263 |
+
output[row] = sum * scales[row] * input_scale;
|
| 264 |
+
}
|
| 265 |
+
}
|
| 266 |
+
`,Se=`// Ternary GEMM: batched matrix multiply for prompt processing
|
| 267 |
+
// Output[N,M] = Input[N,K] × TernaryWeights[M,K]^T
|
| 268 |
+
//
|
| 269 |
+
// Weight packing (I2_S / Eddie-Wang1120 llama.cpp fork):
|
| 270 |
+
// 128-element block interleaving. Each 32-byte block stores 128 elements
|
| 271 |
+
// in 4 groups of 32. Byte[gp] within a block stores:
|
| 272 |
+
// bits[7:6] = group0 (offset 0*32+gp), bits[5:4] = group1 (offset 1*32+gp)
|
| 273 |
+
// bits[3:2] = group2 (offset 2*32+gp), bits[1:0] = group3 (offset 3*32+gp)
|
| 274 |
+
// code mapping: {0=-1, 1=0, 2=+1}
|
| 275 |
+
// Input: int8 activations stored as i32
|
| 276 |
+
// Output: f32 (dequantized)
|
| 277 |
+
//
|
| 278 |
+
// 2D tiling: 16×16 workgroup, 4×4 per-thread output tile
|
| 279 |
+
|
| 280 |
+
struct Params {
|
| 281 |
+
M: u32, // output rows (weight rows)
|
| 282 |
+
N: u32, // output cols (batch / seq_len)
|
| 283 |
+
K: u32, // inner dimension (unpacked)
|
| 284 |
+
K_packed: u32, // K / 16
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
@group(0) @binding(0) var<storage, read> weights: array<u32>;
|
| 288 |
+
@group(0) @binding(1) var<storage, read> input: array<i32>;
|
| 289 |
+
@group(0) @binding(2) var<storage, read> scales: array<f32>;
|
| 290 |
+
@group(0) @binding(3) var<uniform> params: Params;
|
| 291 |
+
@group(0) @binding(4) var<storage, read> input_scales: array<f32>;
|
| 292 |
+
@group(0) @binding(5) var<storage, read_write> output: array<f32>;
|
| 293 |
+
|
| 294 |
+
const TILE_M: u32 = 64u; // rows per workgroup
|
| 295 |
+
const TILE_N: u32 = 64u; // cols per workgroup
|
| 296 |
+
const TILE_K: u32 = 32u; // K-tile for shared memory (unpacked units)
|
| 297 |
+
const THREADS_M: u32 = 16u;
|
| 298 |
+
const THREADS_N: u32 = 16u;
|
| 299 |
+
const THREAD_TILE_M: u32 = 4u; // TILE_M / THREADS_M
|
| 300 |
+
const THREAD_TILE_N: u32 = 4u; // TILE_N / THREADS_N
|
| 301 |
+
|
| 302 |
+
var<workgroup> shared_w: array<i32, 2048>; // TILE_M × TILE_K
|
| 303 |
+
var<workgroup> shared_x: array<i32, 2048>; // TILE_K × TILE_N
|
| 304 |
+
|
| 305 |
+
@compute @workgroup_size(16, 16)
|
| 306 |
+
fn main(
|
| 307 |
+
@builtin(workgroup_id) wg_id: vec3<u32>,
|
| 308 |
+
@builtin(local_invocation_id) local_id: vec3<u32>,
|
| 309 |
+
) {
|
| 310 |
+
let wg_row = wg_id.x * TILE_M;
|
| 311 |
+
let wg_col = wg_id.y * TILE_N;
|
| 312 |
+
let tid_m = local_id.x;
|
| 313 |
+
let tid_n = local_id.y;
|
| 314 |
+
|
| 315 |
+
// Per-thread accumulators (4×4 tile)
|
| 316 |
+
var acc: array<i32, 16>; // THREAD_TILE_M × THREAD_TILE_N
|
| 317 |
+
for (var i = 0u; i < 16u; i++) {
|
| 318 |
+
acc[i] = 0;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
// Loop over K in tiles
|
| 322 |
+
let k_tiles = (params.K + TILE_K - 1u) / TILE_K;
|
| 323 |
+
for (var kt = 0u; kt < k_tiles; kt++) {
|
| 324 |
+
let k_base = kt * TILE_K;
|
| 325 |
+
|
| 326 |
+
// Cooperatively load weight tile into shared memory
|
| 327 |
+
let linear_id = tid_m * THREADS_N + tid_n;
|
| 328 |
+
let load_count = (TILE_M * TILE_K) / (THREADS_M * THREADS_N);
|
| 329 |
+
for (var ld = 0u; ld < load_count; ld++) {
|
| 330 |
+
let idx = linear_id + ld * (THREADS_M * THREADS_N);
|
| 331 |
+
let local_row = idx / TILE_K;
|
| 332 |
+
let local_col = idx % TILE_K;
|
| 333 |
+
let global_row = wg_row + local_row;
|
| 334 |
+
let global_k = k_base + local_col;
|
| 335 |
+
|
| 336 |
+
var w_val: i32 = 0;
|
| 337 |
+
if (global_row < params.M && global_k < params.K) {
|
| 338 |
+
// I2_S 128-element block interleaving
|
| 339 |
+
let block = global_k / 128u;
|
| 340 |
+
let pos = global_k % 128u;
|
| 341 |
+
let group = pos / 32u;
|
| 342 |
+
let gp = pos % 32u;
|
| 343 |
+
let u32_idx = block * 8u + gp / 4u;
|
| 344 |
+
let byte_in_u32 = gp % 4u;
|
| 345 |
+
let shift = byte_in_u32 * 8u + (6u - 2u * group);
|
| 346 |
+
let packed = weights[global_row * params.K_packed + u32_idx];
|
| 347 |
+
let code = (packed >> shift) & 3u;
|
| 348 |
+
w_val = i32(code) - 1;
|
| 349 |
+
}
|
| 350 |
+
shared_w[local_row * TILE_K + local_col] = w_val;
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
// Cooperatively load input tile into shared memory
|
| 354 |
+
let load_count_x = (TILE_K * TILE_N) / (THREADS_M * THREADS_N);
|
| 355 |
+
for (var ld = 0u; ld < load_count_x; ld++) {
|
| 356 |
+
let idx = linear_id + ld * (THREADS_M * THREADS_N);
|
| 357 |
+
let local_k = idx / TILE_N;
|
| 358 |
+
let local_col = idx % TILE_N;
|
| 359 |
+
let global_k = k_base + local_k;
|
| 360 |
+
let global_col = wg_col + local_col;
|
| 361 |
+
|
| 362 |
+
var x_val: i32 = 0;
|
| 363 |
+
if (global_k < params.K && global_col < params.N) {
|
| 364 |
+
x_val = input[global_col * params.K + global_k];
|
| 365 |
+
}
|
| 366 |
+
shared_x[local_k * TILE_N + local_col] = x_val;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
workgroupBarrier();
|
| 370 |
+
|
| 371 |
+
// Compute per-thread 4×4 accumulation
|
| 372 |
+
for (var k = 0u; k < TILE_K; k++) {
|
| 373 |
+
for (var tm = 0u; tm < THREAD_TILE_M; tm++) {
|
| 374 |
+
let w = shared_w[(tid_m * THREAD_TILE_M + tm) * TILE_K + k];
|
| 375 |
+
for (var tn = 0u; tn < THREAD_TILE_N; tn++) {
|
| 376 |
+
let x = shared_x[k * TILE_N + tid_n * THREAD_TILE_N + tn];
|
| 377 |
+
acc[tm * THREAD_TILE_N + tn] += w * x;
|
| 378 |
+
}
|
| 379 |
+
}
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
workgroupBarrier();
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
// Write results with dequantization
|
| 386 |
+
for (var tm = 0u; tm < THREAD_TILE_M; tm++) {
|
| 387 |
+
let global_row = wg_row + tid_m * THREAD_TILE_M + tm;
|
| 388 |
+
if (global_row >= params.M) { continue; }
|
| 389 |
+
let w_scale = scales[global_row];
|
| 390 |
+
for (var tn = 0u; tn < THREAD_TILE_N; tn++) {
|
| 391 |
+
let global_col = wg_col + tid_n * THREAD_TILE_N + tn;
|
| 392 |
+
if (global_col >= params.N) { continue; }
|
| 393 |
+
let scale = w_scale * input_scales[global_col];
|
| 394 |
+
output[global_col * params.M + global_row] = f32(acc[tm * THREAD_TILE_N + tn]) * scale;
|
| 395 |
+
}
|
| 396 |
+
}
|
| 397 |
+
}
|
| 398 |
+
`,w=class{device;pipelines;pool;packedWeights;weightScales;normWeight;inDim;outDim;kPacked;decodeNormUniform;decodeQuantUniform;decodeGemvParamsUniform;decodeGemvScaleUniform;bgCache=G();constructor(i,e,t,n,s,r,a,o){this.device=i,this.pipelines=e,this.pool=t,this.packedWeights=n,this.weightScales=s,this.normWeight=r,this.inDim=a,this.outDim=o,this.kPacked=Math.ceil(a/16)}initDecodeUniforms(){if(this.normWeight){const i=new ArrayBuffer(12),e=new DataView(i);e.setUint32(0,1,!0),e.setUint32(4,this.inDim,!0),e.setFloat32(8,1e-5,!0),this.decodeNormUniform=this.createUniformBuffer(i)}{const i=new ArrayBuffer(8),e=new DataView(i);e.setUint32(0,1,!0),e.setUint32(4,this.inDim,!0),this.decodeQuantUniform=this.createUniformBuffer(i)}{const i=new ArrayBuffer(12),e=new DataView(i);e.setUint32(0,this.outDim,!0),e.setUint32(4,this.inDim,!0),e.setUint32(8,this.kPacked,!0),this.decodeGemvParamsUniform=this.createUniformBuffer(i)}this.decodeGemvScaleUniform=this.device.createBuffer({size:4,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST})}forward(i,e,t){let n;this.normWeight?(n=this.pool.acquire(e*this.inDim*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.dispatchRMSNorm(t,i,n,e)):n=i;const s=this.pool.acquire(e*this.inDim*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),r=this.pool.acquire(e*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.UNIFORM);this.dispatchQuantize(t,n,s,r,e);const a=this.pool.acquire(e*this.outDim*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);return e===1?this.dispatchGEMV(t,s,r,a):this.dispatchGEMM(t,s,r,a,e),this.normWeight&&this.pool.release(n),this.pool.release(s),this.pool.release(r),a}dispatchRMSNorm(i,e,t,n){const{pipeline:s,bindGroupLayout:r}=this.pipelines.getOrCreate("rmsnorm",O);let a;if(n===1&&this.decodeNormUniform)a=this.decodeNormUniform;else{const c=new ArrayBuffer(12),f=new DataView(c);f.setUint32(0,n,!0),f.setUint32(4,this.inDim,!0),f.setFloat32(8,1e-5,!0),a=this.createUniformBuffer(c)}const o=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:this.normWeight}},{binding:2,resource:{buffer:t}},{binding:3,resource:{buffer:a}}],d=n===1?v(this.bgCache,this.device,"rmsnorm",r,o):this.device.createBindGroup({layout:r,entries:o}),u=i.beginComputePass();u.setPipeline(s),u.setBindGroup(0,d),u.dispatchWorkgroups(n),u.end()}dispatchQuantize(i,e,t,n,s){const{pipeline:r,bindGroupLayout:a}=this.pipelines.getOrCreate("quantize",Be);let o;if(s===1&&this.decodeQuantUniform)o=this.decodeQuantUniform;else{const f=new ArrayBuffer(8),h=new DataView(f);h.setUint32(0,s,!0),h.setUint32(4,this.inDim,!0),o=this.createUniformBuffer(f)}const d=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:t}},{binding:2,resource:{buffer:n}},{binding:3,resource:{buffer:o}}],u=s===1?v(this.bgCache,this.device,"quantize",a,d):this.device.createBindGroup({layout:a,entries:d}),c=i.beginComputePass();c.setPipeline(r),c.setBindGroup(0,u),c.dispatchWorkgroups(s),c.end()}dispatchGEMV(i,e,t,n){const{pipeline:s,bindGroupLayout:r}=this.pipelines.getOrCreate("ternary_gemv",Pe);let a,o;if(this.decodeGemvParamsUniform&&this.decodeGemvScaleUniform)a=this.decodeGemvParamsUniform,o=this.decodeGemvScaleUniform;else{const f=new ArrayBuffer(12),h=new DataView(f);h.setUint32(0,this.outDim,!0),h.setUint32(4,this.inDim,!0),h.setUint32(8,this.kPacked,!0),a=this.createUniformBuffer(f),o=this.createUniformBuffer(new ArrayBuffer(4))}i.copyBufferToBuffer(t,0,o,0,4);const d=[{binding:0,resource:{buffer:this.packedWeights}},{binding:1,resource:{buffer:e}},{binding:2,resource:{buffer:this.weightScales}},{binding:3,resource:{buffer:a}},{binding:4,resource:{buffer:o}},{binding:5,resource:{buffer:n}}],u=v(this.bgCache,this.device,"gemv",r,d),c=i.beginComputePass();c.setPipeline(s),c.setBindGroup(0,u),c.dispatchWorkgroups(this.outDim),c.end()}dispatchGEMM(i,e,t,n,s){const{pipeline:r,bindGroupLayout:a}=this.pipelines.getOrCreate("ternary_gemm",Se),o=new ArrayBuffer(16),d=new DataView(o);d.setUint32(0,this.outDim,!0),d.setUint32(4,s,!0),d.setUint32(8,this.inDim,!0),d.setUint32(12,this.kPacked,!0);const u=this.createUniformBuffer(o),c=this.device.createBindGroup({layout:a,entries:[{binding:0,resource:{buffer:this.packedWeights}},{binding:1,resource:{buffer:e}},{binding:2,resource:{buffer:this.weightScales}},{binding:3,resource:{buffer:u}},{binding:4,resource:{buffer:t}},{binding:5,resource:{buffer:n}}]}),f=Math.ceil(this.outDim/64),h=Math.ceil(s/64),p=i.beginComputePass();p.setPipeline(r),p.setBindGroup(0,c),p.dispatchWorkgroups(f,h),p.end()}clearBGCache(){B(this.bgCache)}createUniformBuffer(i){const e=Math.max(Math.ceil(i.byteLength/4)*4,4),t=this.device.createBuffer({size:e,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});return new Uint8Array(t.getMappedRange()).set(new Uint8Array(i)),t.unmap(),t}},Ge=`// Rotary Position Embeddings (RoPE)
|
| 399 |
+
//
|
| 400 |
+
// For each pair (x[2i], x[2i+1]) at position \`pos\`:
|
| 401 |
+
// theta = pos * base^(-2i/D)
|
| 402 |
+
// out[2i] = x[2i] * cos(theta) - x[2i+1] * sin(theta)
|
| 403 |
+
// out[2i+1] = x[2i] * sin(theta) + x[2i+1] * cos(theta)
|
| 404 |
+
//
|
| 405 |
+
// Layout:
|
| 406 |
+
// input: [N, num_heads, head_dim] f32
|
| 407 |
+
// output: [N, num_heads, head_dim] f32
|
| 408 |
+
// Dispatched per (token, head, pair)
|
| 409 |
+
|
| 410 |
+
struct Params {
|
| 411 |
+
N: u32, // sequence length
|
| 412 |
+
num_heads: u32,
|
| 413 |
+
head_dim: u32,
|
| 414 |
+
pos_offset: u32, // starting position (for KV-cache continuation)
|
| 415 |
+
theta_base: f32, // default 10000.0 or 500000.0
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
@group(0) @binding(0) var<storage, read> input: array<f32>;
|
| 419 |
+
@group(0) @binding(1) var<storage, read_write> output: array<f32>;
|
| 420 |
+
@group(0) @binding(2) var<uniform> params: Params;
|
| 421 |
+
|
| 422 |
+
@compute @workgroup_size(256)
|
| 423 |
+
fn main(
|
| 424 |
+
@builtin(global_invocation_id) gid: vec3<u32>,
|
| 425 |
+
) {
|
| 426 |
+
let half_dim = params.head_dim / 2u;
|
| 427 |
+
let total_pairs = params.N * params.num_heads * half_dim;
|
| 428 |
+
|
| 429 |
+
let pair_idx = gid.x;
|
| 430 |
+
if (pair_idx >= total_pairs) {
|
| 431 |
+
return;
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
// Decompose linear index into (token, head, dim_pair)
|
| 435 |
+
let dim_pair = pair_idx % half_dim;
|
| 436 |
+
let remainder = pair_idx / half_dim;
|
| 437 |
+
let head = remainder % params.num_heads;
|
| 438 |
+
let token = remainder / params.num_heads;
|
| 439 |
+
|
| 440 |
+
let pos = f32(token + params.pos_offset);
|
| 441 |
+
let freq_exp = -2.0 * f32(dim_pair) / f32(params.head_dim);
|
| 442 |
+
let theta = pos * pow(params.theta_base, freq_exp);
|
| 443 |
+
|
| 444 |
+
let cos_theta = cos(theta);
|
| 445 |
+
let sin_theta = sin(theta);
|
| 446 |
+
|
| 447 |
+
let base_idx = (token * params.num_heads + head) * params.head_dim + dim_pair * 2u;
|
| 448 |
+
let x0 = input[base_idx];
|
| 449 |
+
let x1 = input[base_idx + 1u];
|
| 450 |
+
|
| 451 |
+
output[base_idx] = x0 * cos_theta - x1 * sin_theta;
|
| 452 |
+
output[base_idx + 1u] = x0 * sin_theta + x1 * cos_theta;
|
| 453 |
+
}
|
| 454 |
+
`,xe=`// Numerically stable softmax
|
| 455 |
+
//
|
| 456 |
+
// For each row:
|
| 457 |
+
// 1. Find max value (for numerical stability)
|
| 458 |
+
// 2. Compute sum of exp(x - max)
|
| 459 |
+
// 3. Normalize: out[i] = exp(x[i] - max) / sum
|
| 460 |
+
//
|
| 461 |
+
// Layout:
|
| 462 |
+
// input: [N, D] f32
|
| 463 |
+
// output: [N, D] f32
|
| 464 |
+
|
| 465 |
+
struct Params {
|
| 466 |
+
N: u32,
|
| 467 |
+
D: u32,
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
@group(0) @binding(0) var<storage, read> input: array<f32>;
|
| 471 |
+
@group(0) @binding(1) var<storage, read_write> output: array<f32>;
|
| 472 |
+
@group(0) @binding(2) var<uniform> params: Params;
|
| 473 |
+
|
| 474 |
+
const WORKGROUP_SIZE: u32 = 256u;
|
| 475 |
+
|
| 476 |
+
var<workgroup> shared_val: array<f32, 256>;
|
| 477 |
+
|
| 478 |
+
@compute @workgroup_size(256)
|
| 479 |
+
fn main(
|
| 480 |
+
@builtin(workgroup_id) wg_id: vec3<u32>,
|
| 481 |
+
@builtin(local_invocation_id) local_id: vec3<u32>,
|
| 482 |
+
) {
|
| 483 |
+
let row = wg_id.x;
|
| 484 |
+
if (row >= params.N) {
|
| 485 |
+
return;
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
let tid = local_id.x;
|
| 489 |
+
let row_offset = row * params.D;
|
| 490 |
+
|
| 491 |
+
// Pass 1: Find max
|
| 492 |
+
var local_max: f32 = -3.402823e+38; // -FLT_MAX
|
| 493 |
+
for (var col = tid; col < params.D; col += WORKGROUP_SIZE) {
|
| 494 |
+
local_max = max(local_max, input[row_offset + col]);
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
shared_val[tid] = local_max;
|
| 498 |
+
workgroupBarrier();
|
| 499 |
+
|
| 500 |
+
for (var stride = WORKGROUP_SIZE / 2u; stride > 0u; stride >>= 1u) {
|
| 501 |
+
if (tid < stride) {
|
| 502 |
+
shared_val[tid] = max(shared_val[tid], shared_val[tid + stride]);
|
| 503 |
+
}
|
| 504 |
+
workgroupBarrier();
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
let row_max = shared_val[0];
|
| 508 |
+
workgroupBarrier();
|
| 509 |
+
|
| 510 |
+
// Pass 2: Sum of exp(x - max)
|
| 511 |
+
var local_sum: f32 = 0.0;
|
| 512 |
+
for (var col = tid; col < params.D; col += WORKGROUP_SIZE) {
|
| 513 |
+
local_sum += exp(input[row_offset + col] - row_max);
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
shared_val[tid] = local_sum;
|
| 517 |
+
workgroupBarrier();
|
| 518 |
+
|
| 519 |
+
for (var stride = WORKGROUP_SIZE / 2u; stride > 0u; stride >>= 1u) {
|
| 520 |
+
if (tid < stride) {
|
| 521 |
+
shared_val[tid] += shared_val[tid + stride];
|
| 522 |
+
}
|
| 523 |
+
workgroupBarrier();
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
let inv_sum = 1.0 / shared_val[0];
|
| 527 |
+
workgroupBarrier();
|
| 528 |
+
|
| 529 |
+
// Pass 3: Normalize
|
| 530 |
+
for (var col = tid; col < params.D; col += WORKGROUP_SIZE) {
|
| 531 |
+
output[row_offset + col] = exp(input[row_offset + col] - row_max) * inv_sum;
|
| 532 |
+
}
|
| 533 |
+
}
|
| 534 |
+
`,$=`// Standard f32 attention matmul kernels
|
| 535 |
+
//
|
| 536 |
+
// Two operations:
|
| 537 |
+
// 1. scores = Q @ K^T * scale (score computation)
|
| 538 |
+
// 2. output = attn_weights @ V (value aggregation)
|
| 539 |
+
//
|
| 540 |
+
// These use standard f32 matmul (not ternary) because Q,K,V are
|
| 541 |
+
// already projected through BitLinear and are f32 activations.
|
| 542 |
+
|
| 543 |
+
// ─── Kernel 1: Q @ K^T (score computation) ───
|
| 544 |
+
// Q: [N, num_heads, head_dim]
|
| 545 |
+
// K: [S, num_kv_heads, head_dim] (S = total seq including cache)
|
| 546 |
+
// scores: [num_heads, N, S]
|
| 547 |
+
|
| 548 |
+
struct ScoreParams {
|
| 549 |
+
N: u32, // query seq length
|
| 550 |
+
S: u32, // key seq length (including cache)
|
| 551 |
+
num_heads: u32,
|
| 552 |
+
num_kv_heads: u32,
|
| 553 |
+
head_dim: u32,
|
| 554 |
+
scale: f32, // 1/sqrt(head_dim)
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
@group(0) @binding(0) var<storage, read> Q: array<f32>;
|
| 558 |
+
@group(0) @binding(1) var<storage, read> K: array<f32>;
|
| 559 |
+
@group(0) @binding(2) var<storage, read_write> scores: array<f32>;
|
| 560 |
+
@group(0) @binding(3) var<uniform> params: ScoreParams;
|
| 561 |
+
|
| 562 |
+
@compute @workgroup_size(16, 16)
|
| 563 |
+
fn compute_scores(
|
| 564 |
+
@builtin(global_invocation_id) gid: vec3<u32>,
|
| 565 |
+
) {
|
| 566 |
+
// gid.x = query position, gid.y = key position, gid.z = head
|
| 567 |
+
let q_pos = gid.x;
|
| 568 |
+
let k_pos = gid.y;
|
| 569 |
+
let head = gid.z;
|
| 570 |
+
|
| 571 |
+
if (q_pos >= params.N || k_pos >= params.S || head >= params.num_heads) {
|
| 572 |
+
return;
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
// GQA: map attention head to KV head
|
| 576 |
+
let kv_head = head / (params.num_heads / params.num_kv_heads);
|
| 577 |
+
|
| 578 |
+
let q_offset = (q_pos * params.num_heads + head) * params.head_dim;
|
| 579 |
+
let k_offset = (k_pos * params.num_kv_heads + kv_head) * params.head_dim;
|
| 580 |
+
|
| 581 |
+
var dot: f32 = 0.0;
|
| 582 |
+
for (var d = 0u; d < params.head_dim; d++) {
|
| 583 |
+
dot += Q[q_offset + d] * K[k_offset + d];
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
// Causal mask: positions after query are -inf
|
| 587 |
+
let is_causal = k_pos > q_pos + (params.S - params.N);
|
| 588 |
+
let masked_score = select(dot * params.scale, -3.402823e+38, is_causal);
|
| 589 |
+
|
| 590 |
+
let score_idx = (head * params.N + q_pos) * params.S + k_pos;
|
| 591 |
+
scores[score_idx] = masked_score;
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
// ─── Kernel 2: Attention weights @ V ───
|
| 595 |
+
// attn: [num_heads, N, S]
|
| 596 |
+
// V: [S, num_kv_heads, head_dim]
|
| 597 |
+
// output: [N, num_heads, head_dim]
|
| 598 |
+
|
| 599 |
+
struct AttnVParams {
|
| 600 |
+
N: u32,
|
| 601 |
+
S: u32,
|
| 602 |
+
num_heads: u32,
|
| 603 |
+
num_kv_heads: u32,
|
| 604 |
+
head_dim: u32,
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
@group(0) @binding(0) var<storage, read> attn: array<f32>;
|
| 608 |
+
@group(0) @binding(1) var<storage, read> V: array<f32>;
|
| 609 |
+
@group(0) @binding(2) var<storage, read_write> attn_output: array<f32>;
|
| 610 |
+
@group(0) @binding(3) var<uniform> attn_v_params: AttnVParams;
|
| 611 |
+
|
| 612 |
+
@compute @workgroup_size(256)
|
| 613 |
+
fn attn_v(
|
| 614 |
+
@builtin(global_invocation_id) gid: vec3<u32>,
|
| 615 |
+
) {
|
| 616 |
+
let total = attn_v_params.N * attn_v_params.num_heads * attn_v_params.head_dim;
|
| 617 |
+
let idx = gid.x;
|
| 618 |
+
if (idx >= total) {
|
| 619 |
+
return;
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
let d = idx % attn_v_params.head_dim;
|
| 623 |
+
let remainder = idx / attn_v_params.head_dim;
|
| 624 |
+
let head = remainder % attn_v_params.num_heads;
|
| 625 |
+
let q_pos = remainder / attn_v_params.num_heads;
|
| 626 |
+
|
| 627 |
+
let kv_head = head / (attn_v_params.num_heads / attn_v_params.num_kv_heads);
|
| 628 |
+
|
| 629 |
+
var sum: f32 = 0.0;
|
| 630 |
+
for (var s = 0u; s < attn_v_params.S; s++) {
|
| 631 |
+
let a = attn[(head * attn_v_params.N + q_pos) * attn_v_params.S + s];
|
| 632 |
+
let v = V[(s * attn_v_params.num_kv_heads + kv_head) * attn_v_params.head_dim + d];
|
| 633 |
+
sum += a * v;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
let out_idx = (q_pos * attn_v_params.num_heads + head) * attn_v_params.head_dim + d;
|
| 637 |
+
attn_output[out_idx] = sum;
|
| 638 |
+
}
|
| 639 |
+
`,De=class{device;pipelines;pool;config;hDim;qProj;kProj;vProj;oProj;decodeRopeQUniform;decodeRopeKUniform;decodeScoresUniform;decodeSoftmaxUniform;decodeAttnVUniform;decodeScoresBuf;decodeAttnWeightsBuf;bgCache=G();constructor(i,e,t,n,s,r,a,o){this.device=i,this.pipelines=e,this.pool=t,this.config=n,this.hDim=U(n),this.qProj=s,this.kProj=r,this.vProj=a,this.oProj=o}initDecodeUniforms(i){const e=n=>this.device.createBuffer({size:n,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST});this.decodeRopeQUniform=e(20),this.decodeRopeKUniform=e(20),this.decodeScoresUniform=e(24),this.decodeSoftmaxUniform=e(8),this.decodeAttnVUniform=e(20);const t=this.config.numAttentionHeads*i*4;this.decodeScoresBuf=this.device.createBuffer({size:t,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC}),this.decodeAttnWeightsBuf=this.device.createBuffer({size:t,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC}),this.qProj.initDecodeUniforms(),this.kProj.initDecodeUniforms(),this.vProj.initDecodeUniforms(),this.oProj.initDecodeUniforms()}forward(i,e,t,n){const{numAttentionHeads:s,numKeyValueHeads:r,hiddenSize:a}=this.config,o=this.qProj.forward(i,e,n),d=this.kProj.forward(i,e,n),u=this.vProj.forward(i,e,n),c=this.applyRoPE(n,o,e,s,t.seqLen,e===1?this.decodeRopeQUniform:void 0,"ropeQ"),f=this.applyRoPE(n,d,e,r,t.seqLen,e===1?this.decodeRopeKUniform:void 0,"ropeK");this.pool.release(o),this.pool.release(d),this.appendToCache(n,f,u,t,e),this.pool.release(f),this.pool.release(u);const h=t.seqLen+e,p=this.computeScores(n,c,t.key,e,h,e===1?this.decodeScoresUniform:void 0,e===1?this.decodeScoresBuf:void 0);this.pool.release(c);const l=this.applySoftmax(n,p,s*e,h,e===1?this.decodeSoftmaxUniform:void 0,e===1?this.decodeAttnWeightsBuf:void 0);e!==1&&this.pool.release(p);const m=this.computeAttnV(n,l,t.value,e,h,e===1?this.decodeAttnVUniform:void 0);e!==1&&this.pool.release(l);const g=this.oProj.forward(m,e,n);return this.pool.release(m),g}applyRoPE(i,e,t,n,s,r,a){const{pipeline:o,bindGroupLayout:d}=this.pipelines.getOrCreate("rope",Ge),u=t*n*this.hDim*4,c=this.pool.acquire(u,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),f=new ArrayBuffer(20),h=new DataView(f);h.setUint32(0,t,!0),h.setUint32(4,n,!0),h.setUint32(8,this.hDim,!0),h.setUint32(12,s,!0),h.setFloat32(16,this.config.ropeTheta,!0);let p;r?(this.device.queue.writeBuffer(r,0,new Uint8Array(f)),p=r):p=this.createUniform(f);const l=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:c}},{binding:2,resource:{buffer:p}}],m=t===1&&a?v(this.bgCache,this.device,a,d,l):this.device.createBindGroup({layout:d,entries:l}),g=t*n*(this.hDim/2),_=i.beginComputePass();return _.setPipeline(o),_.setBindGroup(0,m),_.dispatchWorkgroups(Math.ceil(g/256)),_.end(),c}appendToCache(i,e,t,n,s){const r=s*this.config.numKeyValueHeads*this.hDim*4,a=n.seqLen*this.config.numKeyValueHeads*this.hDim*4;i.copyBufferToBuffer(e,0,n.key,a,r),i.copyBufferToBuffer(t,0,n.value,a,r)}computeScores(i,e,t,n,s,r,a){const{pipeline:o,bindGroupLayout:d}=this.pipelines.getOrCreate("attention_scores",$,"compute_scores"),{numAttentionHeads:u,numKeyValueHeads:c}=this.config,f=a??this.pool.acquire(u*n*s*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),h=new ArrayBuffer(24),p=new DataView(h);p.setUint32(0,n,!0),p.setUint32(4,s,!0),p.setUint32(8,u,!0),p.setUint32(12,c,!0),p.setUint32(16,this.hDim,!0),p.setFloat32(20,1/Math.sqrt(this.hDim),!0);let l;r?(this.device.queue.writeBuffer(r,0,new Uint8Array(h)),l=r):l=this.createUniform(h);const m=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:t}},{binding:2,resource:{buffer:f}},{binding:3,resource:{buffer:l}}],g=n===1?v(this.bgCache,this.device,"scores",d,m):this.device.createBindGroup({layout:d,entries:m}),_=i.beginComputePass();return _.setPipeline(o),_.setBindGroup(0,g),_.dispatchWorkgroups(Math.ceil(n/16),Math.ceil(s/16),u),_.end(),f}applySoftmax(i,e,t,n,s,r){const{pipeline:a,bindGroupLayout:o}=this.pipelines.getOrCreate("softmax",xe),d=r??this.pool.acquire(t*n*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),u=new ArrayBuffer(8),c=new DataView(u);c.setUint32(0,t,!0),c.setUint32(4,n,!0);let f;s?(this.device.queue.writeBuffer(s,0,new Uint8Array(u)),f=s):f=this.createUniform(u);const h=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:d}},{binding:2,resource:{buffer:f}}],p=t===1?v(this.bgCache,this.device,"softmax",o,h):this.device.createBindGroup({layout:o,entries:h}),l=i.beginComputePass();return l.setPipeline(a),l.setBindGroup(0,p),l.dispatchWorkgroups(t),l.end(),d}computeAttnV(i,e,t,n,s,r){const{pipeline:a,bindGroupLayout:o}=this.pipelines.getOrCreate("attn_v",$,"attn_v"),{numAttentionHeads:d,numKeyValueHeads:u}=this.config,c=n*d*this.hDim*4,f=this.pool.acquire(c,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),h=new ArrayBuffer(20),p=new DataView(h);p.setUint32(0,n,!0),p.setUint32(4,s,!0),p.setUint32(8,d,!0),p.setUint32(12,u,!0),p.setUint32(16,this.hDim,!0);let l;r?(this.device.queue.writeBuffer(r,0,new Uint8Array(h)),l=r):l=this.createUniform(h);const m=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:t}},{binding:2,resource:{buffer:f}},{binding:3,resource:{buffer:l}}],g=n===1?v(this.bgCache,this.device,"attnV",o,m):this.device.createBindGroup({layout:o,entries:m}),_=n*d*this.hDim,b=i.beginComputePass();return b.setPipeline(a),b.setBindGroup(0,g),b.dispatchWorkgroups(Math.ceil(_/256)),b.end(),f}clearBGCache(){B(this.bgCache),this.qProj.clearBGCache(),this.kProj.clearBGCache(),this.vProj.clearBGCache(),this.oProj.clearBGCache()}destroyPreAllocated(){this.decodeScoresBuf?.destroy(),this.decodeAttnWeightsBuf?.destroy(),this.decodeScoresBuf=void 0,this.decodeAttnWeightsBuf=void 0}createUniform(i){const e=Math.max(Math.ceil(i.byteLength/4)*4,4),t=this.device.createBuffer({size:e,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});return new Uint8Array(t.getMappedRange()).set(new Uint8Array(i)),t.unmap(),t}};function Te(i,e,t){const n=t*e.numKeyValueHeads*U(e)*4,s=i.createBuffer({size:n,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC}),r=i.createBuffer({size:n,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC});return{key:s,value:r,seqLen:0,maxSeqLen:t}}var Ee=`// Activation functions for BitNet FFN
|
| 640 |
+
//
|
| 641 |
+
// ReLU²: relu(x)² — used in official 2B-4T model
|
| 642 |
+
// SiLU: x * sigmoid(x) — used in community models
|
| 643 |
+
//
|
| 644 |
+
// Layout:
|
| 645 |
+
// input: [N] f32
|
| 646 |
+
// output: [N] f32
|
| 647 |
+
|
| 648 |
+
struct Params {
|
| 649 |
+
N: u32,
|
| 650 |
+
activation_type: u32, // 0 = ReLU², 1 = SiLU
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
@group(0) @binding(0) var<storage, read> input: array<f32>;
|
| 654 |
+
@group(0) @binding(1) var<storage, read_write> output: array<f32>;
|
| 655 |
+
@group(0) @binding(2) var<uniform> params: Params;
|
| 656 |
+
|
| 657 |
+
@compute @workgroup_size(256)
|
| 658 |
+
fn main(
|
| 659 |
+
@builtin(global_invocation_id) gid: vec3<u32>,
|
| 660 |
+
) {
|
| 661 |
+
let idx = gid.x;
|
| 662 |
+
if (idx >= params.N) {
|
| 663 |
+
return;
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
+
let x = input[idx];
|
| 667 |
+
|
| 668 |
+
if (params.activation_type == 0u) {
|
| 669 |
+
// ReLU²: max(0, x)²
|
| 670 |
+
let relu_x = max(0.0, x);
|
| 671 |
+
output[idx] = relu_x * relu_x;
|
| 672 |
+
} else {
|
| 673 |
+
// SiLU: x * sigmoid(x)
|
| 674 |
+
output[idx] = x / (1.0 + exp(-x));
|
| 675 |
+
}
|
| 676 |
+
}
|
| 677 |
+
`,Ce=class{device;pipelines;pool;config;upProj;downProj;gateProj;decodeActivationUniform;decodeElementwiseUniform;bgCache=G();constructor(i,e,t,n,s,r,a){this.device=i,this.pipelines=e,this.pool=t,this.config=n,this.upProj=s,this.downProj=r,this.gateProj=a}initDecodeUniforms(){const i=this.config.activation==="relu2"?0:1;{const e=new ArrayBuffer(8),t=new DataView(e);t.setUint32(0,this.config.intermediateSize,!0),t.setUint32(4,i,!0),this.decodeActivationUniform=this.createUniform(e)}{const e=new ArrayBuffer(8),t=new DataView(e);t.setUint32(0,this.config.intermediateSize,!0),t.setUint32(4,1,!0),this.decodeElementwiseUniform=this.createUniform(e)}this.upProj.initDecodeUniforms(),this.downProj.initDecodeUniforms(),this.gateProj?.initDecodeUniforms()}forward(i,e,t){return this.gateProj?this.forwardGated(i,e,t):this.forwardSimple(i,e,t)}forwardGated(i,e,t){const n=this.config.activation==="relu2"?0:1,s=this.gateProj.forward(i,e,t),r=this.upProj.forward(i,e,t),a=this.applyActivation(t,s,e*this.config.intermediateSize,n,e);this.pool.release(s);const o=this.applyElementwise(t,a,r,e*this.config.intermediateSize,1,e);this.pool.release(a),this.pool.release(r);const d=this.downProj.forward(o,e,t);return this.pool.release(o),d}forwardSimple(i,e,t){const n=this.config.activation==="relu2"?0:1,s=this.upProj.forward(i,e,t),r=this.applyActivation(t,s,e*this.config.intermediateSize,n,e);this.pool.release(s);const a=this.downProj.forward(r,e,t);return this.pool.release(r),a}applyActivation(i,e,t,n,s){const{pipeline:r,bindGroupLayout:a}=this.pipelines.getOrCreate(`activation_${n}`,Ee),o=this.pool.acquire(t*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);let d;if(s===1&&this.decodeActivationUniform)d=this.decodeActivationUniform;else{const h=new ArrayBuffer(8),p=new DataView(h);p.setUint32(0,t,!0),p.setUint32(4,n,!0),d=this.createUniform(h)}const u=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:o}},{binding:2,resource:{buffer:d}}],c=s===1?v(this.bgCache,this.device,"activation",a,u):this.device.createBindGroup({layout:a,entries:u}),f=i.beginComputePass();return f.setPipeline(r),f.setBindGroup(0,c),f.dispatchWorkgroups(Math.ceil(t/256)),f.end(),o}applyElementwise(i,e,t,n,s,r){const{pipeline:a,bindGroupLayout:o}=this.pipelines.getOrCreate(`elementwise_${s}`,J),d=this.pool.acquire(n*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);let u;if(r===1&&this.decodeElementwiseUniform)u=this.decodeElementwiseUniform;else{const p=new ArrayBuffer(8),l=new DataView(p);l.setUint32(0,n,!0),l.setUint32(4,s,!0),u=this.createUniform(p)}const c=[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:t}},{binding:2,resource:{buffer:d}},{binding:3,resource:{buffer:u}}],f=r===1?v(this.bgCache,this.device,"elementwise",o,c):this.device.createBindGroup({layout:o,entries:c}),h=i.beginComputePass();return h.setPipeline(a),h.setBindGroup(0,f),h.dispatchWorkgroups(Math.ceil(n/256)),h.end(),d}clearBGCache(){B(this.bgCache),this.upProj.clearBGCache(),this.downProj.clearBGCache(),this.gateProj?.clearBGCache()}createUniform(i){const e=Math.max(Math.ceil(i.byteLength/4)*4,4),t=this.device.createBuffer({size:e,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});return new Uint8Array(t.getMappedRange()).set(new Uint8Array(i)),t.unmap(),t}},H=class extends Error{constructor(i){super(i),this.name="GPUDeviceError"}};async function W(i){if(i)return{device:i,adapter:null,limits:i.limits};if(typeof navigator>"u"||!navigator.gpu)throw new H("WebGPU is not supported in this environment. Please use a browser with WebGPU support (Chrome 113+, Edge 113+, Firefox Nightly).");const e=await navigator.gpu.requestAdapter({powerPreference:"high-performance"});if(!e)throw new H("Failed to obtain WebGPU adapter. Check that your GPU drivers are up to date.");const t={};t.maxBufferSize=e.limits.maxBufferSize,t.maxStorageBufferBindingSize=e.limits.maxStorageBufferBindingSize;const n=e.limits.maxStorageBuffersPerShaderStage;t.maxStorageBuffersPerShaderStage=n,t.maxComputeWorkgroupSizeX=e.limits.maxComputeWorkgroupSizeX,t.maxComputeWorkgroupSizeY=e.limits.maxComputeWorkgroupSizeY,t.maxComputeWorkgroupSizeZ=e.limits.maxComputeWorkgroupSizeZ,t.maxComputeInvocationsPerWorkgroup=e.limits.maxComputeInvocationsPerWorkgroup,t.maxComputeWorkgroupStorageSize=e.limits.maxComputeWorkgroupStorageSize;const s=await e.requestDevice({requiredLimits:t});return s.lost.then(r=>{console.error(`WebGPU device lost: ${r.message} (reason: ${r.reason})`)}),{device:s,adapter:e,limits:s.limits}}var X=class{buffers=new Map;device;constructor(i){this.device=i}upload(i,e){const t=this.device.createBuffer({size:Math.max(e.byteLength,4),usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});return new Uint8Array(t.getMappedRange()).set(new Uint8Array(e)),t.unmap(),this.buffers.set(i,t),t}uploadSharded(i,e,t){if(e.byteLength<=t)return[this.upload(i,e)];const n=[];let s=0,r=0;for(;s<e.byteLength;){const a=Math.min(s+t,e.byteLength),o=e.slice(s,a),d=`${i}.shard_${r}`;n.push(this.upload(d,o)),s=a,r++}return n.length>0&&!this.buffers.has(i)&&this.buffers.set(i,n[0]),n}get(i){return this.buffers.get(i)}has(i){return this.buffers.has(i)}destroy(){for(const i of this.buffers.values())i.destroy();this.buffers.clear()}};async function Ae(i,e,t,n){const s=typeof i=="string"?i:i.href,r=ze(s);t?.({phase:"download",loaded:0,total:0,fraction:0});const a=await We(s,(o,d)=>{t?.({phase:"download",loaded:o,total:d,fraction:d>0?o/d:0})},n);return t?.({phase:"parse",loaded:0,total:1,fraction:0}),r==="gguf"?Re(a,e,t):Ie(a,e,t)}function ze(i){return i.endsWith(".gguf")?"gguf":i.endsWith(".safetensors")?"safetensors":"gguf"}async function Re(i,e,t){const s=new _e(i).parse(),r=qe(s.metadata),a=s.tensors.some(c=>c.name==="output.weight");r.tieWordEmbeddings=!a,console.debug(`[0xBitNet] config: arch=${s.metadata["general.architecture"]}, heads=${r.numAttentionHeads}, kv_heads=${r.numKeyValueHeads}, head_dim=${r.hiddenSize/r.numAttentionHeads}, hidden=${r.hiddenSize}, intermediate=${r.intermediateSize}, layers=${r.numHiddenLayers}, tied=${r.tieWordEmbeddings}`);const o=new X(e),d=e.limits.maxStorageBufferBindingSize,u=s.tensors.length;for(let c=0;c<u;c++){const f=s.tensors[c],h=s.tensorDataOffset+Number(f.offset),p=f.shape.reduce((_,b)=>_*Number(b),1);let l;if(f.type===R)l=Math.ceil(p/4)+32;else{const _=ge(f.type);l=Math.ceil(p*_)}const m=i.slice(h,h+l),g=Le(f.name);if(console.debug(`[0xBitNet] tensor: ${f.name} → ${g} (type=${f.type}, ${l} bytes)`),f.type===R){const _=Math.ceil(p/4),b=m.slice(0,_);o.uploadSharded(g,b,d);const T=new DataView(m,_,32).getFloat32(0,!0),E=Number(f.shape[1]),C=g.replace(".weight",".weight_scale"),A=new Float32Array(E).fill(T);o.upload(C,A.buffer)}else if(f.type===Q)if(g==="model.embed_tokens.weight")o.uploadSharded(g,m,d);else{const _=Me(new Uint16Array(m),p);o.uploadSharded(g,_.buffer,d)}else o.uploadSharded(g,m,d);t?.({phase:"upload",loaded:c+1,total:u,fraction:(c+1)/u})}return console.debug(`[0xBitNet] ${u} tensors loaded, tieWordEmbeddings=${r.tieWordEmbeddings}`),Oe(o,r),{config:r,weights:o,metadata:s.metadata}}function Me(i,e){const t=new Float32Array(e);for(let n=0;n<e;n++){const s=i[n],r=s>>15&1,a=s>>10&31,o=s&1023;let d;a===0?d=o/1024*Math.pow(2,-14):a===31?d=o===0?1/0:NaN:d=(1+o/1024)*Math.pow(2,a-15),t[n]=r?-d:d}return t}function Le(i){if(i==="token_embd.weight")return"model.embed_tokens.weight";if(i==="output_norm.weight")return"model.norm.weight";if(i==="output.weight")return"lm_head.weight";const e=i.match(/^blk\.(\d+)\.(.+)$/);if(!e)return i;const[,t,n]=e,s=`model.layers.${t}`,a={"attn_q.weight":"self_attn.q_proj.weight","attn_k.weight":"self_attn.k_proj.weight","attn_v.weight":"self_attn.v_proj.weight","attn_output.weight":"self_attn.o_proj.weight","attn_norm.weight":"input_layernorm.weight","ffn_norm.weight":"post_attention_layernorm.weight","attn_sub_norm.weight":"self_attn.sub_norm.weight","ffn_sub_norm.weight":"mlp.sub_norm.weight","ffn_up.weight":"mlp.up_proj.weight","ffn_down.weight":"mlp.down_proj.weight","ffn_gate.weight":"mlp.gate_proj.weight"}[n];return a?`${s}.${a}`:`${s}.${n}`}function Oe(i,e,t){const n=[];for(let s=0;s<e.numHiddenLayers;s++){const r=`model.layers.${s}`,a=e.hiddenSize,o=e.numAttentionHeads,d=e.numKeyValueHeads,u=a/o;n.push({name:`${r}.self_attn.q_proj.weight_scale`,dim:o*u},{name:`${r}.self_attn.k_proj.weight_scale`,dim:d*u},{name:`${r}.self_attn.v_proj.weight_scale`,dim:d*u},{name:`${r}.self_attn.o_proj.weight_scale`,dim:a},{name:`${r}.mlp.up_proj.weight_scale`,dim:e.intermediateSize},{name:`${r}.mlp.down_proj.weight_scale`,dim:a},{name:`${r}.mlp.gate_proj.weight_scale`,dim:e.intermediateSize})}n.push({name:"lm_head.weight_scale",dim:e.vocabSize});for(const{name:s,dim:r}of n)if(!i.has(s)){const a=new Float32Array(r).fill(1);i.upload(s,a.buffer)}}async function Ie(i,e,t){const{header:n,dataOffset:s}=be(i),r=ve(n,s),a=Ne(r),o=new X(e),d=e.limits.maxStorageBufferBindingSize;for(let u=0;u<r.length;u++){const c=r[u],f=i.slice(c.offset,c.offset+c.size);o.uploadSharded(c.name,f,d),t?.({phase:"upload",loaded:u+1,total:r.length,fraction:(u+1)/r.length})}return{config:a,weights:o}}function qe(i){const e=i["general.architecture"]??"bitnet";function t(c){return i[`${e}.${c}`]??i[`llama.${c}`]??i[`bitnet.${c}`]??i[`bitnet-25.${c}`]}const n=t("embedding_length")??2560,s=t("block_count")??30,r=t("attention.head_count")??20,a=t("attention.head_count_kv")??r,o=t("vocab_size")??i["tokenizer.ggml.tokens"]?.length??128256,d=t("feed_forward_length")??6912,u=o>1e5||e.includes("bitnet");return{modelType:"bitnet",vocabSize:o,hiddenSize:n,intermediateSize:d,numHiddenLayers:s,numAttentionHeads:r,numKeyValueHeads:a,maxPositionEmbeddings:t("context_length")??4096,rmsNormEps:t("attention.layer_norm_rms_epsilon")??1e-5,ropeTheta:t("rope.freq_base")??(u?5e5:1e4),tieWordEmbeddings:!1,activation:u?"relu2":"silu"}}function Ne(i){const e=i.find(p=>p.name==="model.embed_tokens.weight"||p.name==="transformer.wte.weight"),t=e?.shape[0]??128256,n=e?.shape[1]??2560,s=i.map(p=>{const l=p.name.match(/layers\.(\d+)\./);return l?parseInt(l[1],10):-1}).filter(p=>p>=0),r=s.length>0?Math.max(...s)+1:30,a=i.find(p=>p.name.includes("q_proj.weight")),o=a?a.shape[0]/(n/32):32,u=i.find(p=>p.name.includes("k_proj.weight"))?.shape[0]??n,c=n/o,f=u/c,h=t>1e5;return{modelType:"bitnet",vocabSize:t,hiddenSize:n,intermediateSize:0,numHiddenLayers:r,numAttentionHeads:o,numKeyValueHeads:f,maxPositionEmbeddings:4096,rmsNormEps:1e-5,ropeTheta:h?5e5:1e4,tieWordEmbeddings:!1,activation:h?"relu2":"silu"}}var Ve="0xbitnet",S="models";function j(){return new Promise((i,e)=>{const t=indexedDB.open(Ve,1);t.onupgradeneeded=()=>t.result.createObjectStore(S),t.onsuccess=()=>i(t.result),t.onerror=()=>e(t.error)})}function $e(i,e){return new Promise((t,n)=>{const r=i.transaction(S,"readonly").objectStore(S).get(e);r.onsuccess=()=>t(r.result),r.onerror=()=>n(r.error)})}function He(i,e,t){return new Promise((n,s)=>{const r=i.transaction(S,"readwrite");r.objectStore(S).put(t,e),r.oncomplete=()=>n(),r.onerror=()=>s(r.error)})}async function We(i,e,t){if(typeof indexedDB<"u")try{const c=await j(),f=await $e(c,i);if(c.close(),f)return e(f.byteLength,f.byteLength),f}catch{}const n=await fetch(i,{signal:t});if(!n.ok)throw new Error(`Failed to fetch model: ${n.status} ${n.statusText}`);const s=parseInt(n.headers.get("content-length")??"0",10),r=n.body?.getReader();if(!r){const c=await n.arrayBuffer();return e(c.byteLength,c.byteLength),c}const a=[];let o=0;for(;;){const{done:c,value:f}=await r.read();if(c)break;a.push(f),o+=f.byteLength,e(o,s)}const d=new Uint8Array(o);let u=0;for(const c of a)d.set(c,u),u+=c.byteLength;if(typeof indexedDB<"u")try{const c=await j();await He(c,i,d.buffer),c.close()}catch{}return d.buffer}var je=`// Token embedding lookup (F16 on GPU)
|
| 678 |
+
//
|
| 679 |
+
// For each token ID, copy the corresponding row from the embedding table.
|
| 680 |
+
// Embedding table is stored as packed F16 pairs (two f16 values per u32)
|
| 681 |
+
// to avoid exceeding maxStorageBufferBindingSize on most GPUs.
|
| 682 |
+
//
|
| 683 |
+
// Layout:
|
| 684 |
+
// token_ids: [N] u32
|
| 685 |
+
// embed_table: [V * D / 2] u32 (packed f16 pairs)
|
| 686 |
+
// output: [N, D] f32
|
| 687 |
+
|
| 688 |
+
struct Params {
|
| 689 |
+
N: u32, // number of tokens
|
| 690 |
+
D: u32, // embedding dimension
|
| 691 |
+
V: u32, // vocab size
|
| 692 |
+
}
|
| 693 |
+
|
| 694 |
+
@group(0) @binding(0) var<storage, read> token_ids: array<u32>;
|
| 695 |
+
@group(0) @binding(1) var<storage, read> embed_table: array<u32>;
|
| 696 |
+
@group(0) @binding(2) var<storage, read_write> output: array<f32>;
|
| 697 |
+
@group(0) @binding(3) var<uniform> params: Params;
|
| 698 |
+
|
| 699 |
+
@compute @workgroup_size(256)
|
| 700 |
+
fn main(
|
| 701 |
+
@builtin(global_invocation_id) gid: vec3<u32>,
|
| 702 |
+
) {
|
| 703 |
+
let idx = gid.x;
|
| 704 |
+
let total = params.N * params.D;
|
| 705 |
+
if (idx >= total) {
|
| 706 |
+
return;
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
let token = idx / params.D;
|
| 710 |
+
let dim = idx % params.D;
|
| 711 |
+
let token_id = token_ids[token];
|
| 712 |
+
|
| 713 |
+
// Bounds check: treat out-of-vocab as zero
|
| 714 |
+
if (token_id < params.V) {
|
| 715 |
+
let flat = token_id * params.D + dim;
|
| 716 |
+
let packed = embed_table[flat / 2u];
|
| 717 |
+
let pair = unpack2x16float(packed);
|
| 718 |
+
output[idx] = select(pair.x, pair.y, (flat & 1u) == 1u);
|
| 719 |
+
} else {
|
| 720 |
+
output[idx] = 0.0;
|
| 721 |
+
}
|
| 722 |
+
}
|
| 723 |
+
`,Ke=`// F32 GEMV for tied-embedding LM head (F16 embedding on GPU)
|
| 724 |
+
// logits[n, v] = sum_d( hidden[n, d] * embed[v, d] )
|
| 725 |
+
//
|
| 726 |
+
// hidden: [N, D] f32 — final hidden states
|
| 727 |
+
// embed: [V * D / 2] u32 — embedding table stored as packed f16 pairs
|
| 728 |
+
// output: [N, V] f32 — logits
|
| 729 |
+
//
|
| 730 |
+
// Each workgroup computes one (n, v) element.
|
| 731 |
+
// 256 threads cooperatively reduce over D.
|
| 732 |
+
// 2D dispatch: v = wg_id.x + wg_id.y * 65535 (V can exceed 65535)
|
| 733 |
+
|
| 734 |
+
struct Params {
|
| 735 |
+
N: u32,
|
| 736 |
+
V: u32,
|
| 737 |
+
D: u32,
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
@group(0) @binding(0) var<storage, read> hidden: array<f32>;
|
| 741 |
+
@group(0) @binding(1) var<storage, read> embed: array<u32>;
|
| 742 |
+
@group(0) @binding(2) var<storage, read_write> output: array<f32>;
|
| 743 |
+
@group(0) @binding(3) var<uniform> params: Params;
|
| 744 |
+
|
| 745 |
+
const WG_SIZE: u32 = 256u;
|
| 746 |
+
|
| 747 |
+
var<workgroup> shared_sums: array<f32, 256>;
|
| 748 |
+
|
| 749 |
+
@compute @workgroup_size(256)
|
| 750 |
+
fn main(
|
| 751 |
+
@builtin(workgroup_id) wg_id: vec3<u32>,
|
| 752 |
+
@builtin(local_invocation_id) local_id: vec3<u32>,
|
| 753 |
+
) {
|
| 754 |
+
// Decode (n, v) from 2D dispatch
|
| 755 |
+
let flat_id = wg_id.x + wg_id.y * 65535u;
|
| 756 |
+
let n = flat_id / params.V;
|
| 757 |
+
let v = flat_id % params.V;
|
| 758 |
+
|
| 759 |
+
if (n >= params.N || v >= params.V) {
|
| 760 |
+
return;
|
| 761 |
+
}
|
| 762 |
+
|
| 763 |
+
let tid = local_id.x;
|
| 764 |
+
|
| 765 |
+
// Each thread accumulates a strided slice of D
|
| 766 |
+
// Process pairs of dimensions for efficiency
|
| 767 |
+
var acc: f32 = 0.0;
|
| 768 |
+
let hidden_base = n * params.D;
|
| 769 |
+
let embed_base = v * params.D;
|
| 770 |
+
|
| 771 |
+
// Process two dimensions at a time using packed f16 pairs
|
| 772 |
+
let D_half = params.D / 2u;
|
| 773 |
+
for (var dh = tid; dh < D_half; dh += WG_SIZE) {
|
| 774 |
+
let d = dh * 2u;
|
| 775 |
+
let packed = embed[embed_base / 2u + dh];
|
| 776 |
+
let pair = unpack2x16float(packed);
|
| 777 |
+
acc += hidden[hidden_base + d] * pair.x;
|
| 778 |
+
acc += hidden[hidden_base + d + 1u] * pair.y;
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
// Workgroup reduction
|
| 782 |
+
shared_sums[tid] = acc;
|
| 783 |
+
workgroupBarrier();
|
| 784 |
+
|
| 785 |
+
for (var stride = WG_SIZE / 2u; stride > 0u; stride >>= 1u) {
|
| 786 |
+
if (tid < stride) {
|
| 787 |
+
shared_sums[tid] += shared_sums[tid + stride];
|
| 788 |
+
}
|
| 789 |
+
workgroupBarrier();
|
| 790 |
+
}
|
| 791 |
+
|
| 792 |
+
// Thread 0 writes the result
|
| 793 |
+
if (tid == 0u) {
|
| 794 |
+
output[n * params.V + v] = shared_sums[0];
|
| 795 |
+
}
|
| 796 |
+
}
|
| 797 |
+
`,Fe=class ee{device;pipelines;pool;config;embedTokens;layers;finalNorm;lmHead;kvCaches;decodeTokenBuffer;decodeEmbeddingUniform;decodeFinalNormUniform;decodeLMHeadUniform;bgCache=G();constructor(e,t,n,s,r,a,o,d,u){this.device=e,this.pipelines=t,this.pool=n,this.config=s,this.embedTokens=r,this.layers=a,this.finalNorm=o,this.lmHead=d,this.kvCaches=u}static build(e,t,n,s=4096){const r=new ye(e),a=new Ue(e);function o(l){const m=n.get(l);if(!m)throw new Error(`Missing weight tensor: "${l}"`);return m}const d=o("model.embed_tokens.weight"),u=o("model.norm.weight"),c=[],f=[];for(let l=0;l<t.numHiddenLayers;l++){const m=`model.layers.${l}`,g=o(`${m}.input_layernorm.weight`),_=o(`${m}.post_attention_layernorm.weight`),b=n.get(`${m}.self_attn.sub_norm.weight`)??null,I=n.get(`${m}.mlp.sub_norm.weight`)??null,T=new w(e,r,a,o(`${m}.self_attn.q_proj.weight`),o(`${m}.self_attn.q_proj.weight_scale`),null,t.hiddenSize,t.numAttentionHeads*U(t)),E=new w(e,r,a,o(`${m}.self_attn.k_proj.weight`),o(`${m}.self_attn.k_proj.weight_scale`),null,t.hiddenSize,t.numKeyValueHeads*U(t)),C=new w(e,r,a,o(`${m}.self_attn.v_proj.weight`),o(`${m}.self_attn.v_proj.weight_scale`),null,t.hiddenSize,t.numKeyValueHeads*U(t)),A=new w(e,r,a,o(`${m}.self_attn.o_proj.weight`),o(`${m}.self_attn.o_proj.weight_scale`),b,t.numAttentionHeads*U(t),t.hiddenSize),re=new De(e,r,a,t,T,E,C,A),ie=new w(e,r,a,o(`${m}.mlp.up_proj.weight`),o(`${m}.mlp.up_proj.weight_scale`),null,t.hiddenSize,t.intermediateSize),se=new w(e,r,a,o(`${m}.mlp.down_proj.weight`),o(`${m}.mlp.down_proj.weight_scale`),I,t.intermediateSize,t.hiddenSize);let q=null;n.has(`${m}.mlp.gate_proj.weight`)&&(q=new w(e,r,a,o(`${m}.mlp.gate_proj.weight`),o(`${m}.mlp.gate_proj.weight_scale`),null,t.hiddenSize,t.intermediateSize));const ae=new Ce(e,r,a,t,ie,se,q);c.push(new ke(e,r,a,t,g,_,re,ae)),f.push(Te(e,t,s))}let h;t.tieWordEmbeddings||!n.has("lm_head.weight")?h=d:h=new w(e,r,a,o("lm_head.weight"),o("lm_head.weight_scale"),n.get("lm_head.input_norm.weight")??u,t.hiddenSize,t.vocabSize);const p=new ee(e,r,a,t,d,c,u,h,f);return p.initDecodeUniforms(s),p}initDecodeUniforms(e){this.decodeTokenBuffer=this.device.createBuffer({size:4,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST});{const t=new ArrayBuffer(12),n=new DataView(t);n.setUint32(0,1,!0),n.setUint32(4,this.config.hiddenSize,!0),n.setUint32(8,this.config.vocabSize,!0),this.decodeEmbeddingUniform=this.createUniform(t)}{const t=new ArrayBuffer(12),n=new DataView(t);n.setUint32(0,1,!0),n.setUint32(4,this.config.hiddenSize,!0),n.setFloat32(8,this.config.rmsNormEps,!0),this.decodeFinalNormUniform=this.createUniform(t)}if(!(this.lmHead instanceof w)){const t=new ArrayBuffer(12),n=new DataView(t);n.setUint32(0,1,!0),n.setUint32(4,this.config.vocabSize,!0),n.setUint32(8,this.config.hiddenSize,!0),this.decodeLMHeadUniform=this.createUniform(t)}for(const t of this.layers)t.initDecodeUniforms(e);this.lmHead instanceof w&&this.lmHead.initDecodeUniforms()}forward(e){const t=e.length,n=this.device.createCommandEncoder();let s;if(t===1&&this.decodeTokenBuffer){const u=new ArrayBuffer(4);new DataView(u).setUint32(0,e[0],!0),this.device.queue.writeBuffer(this.decodeTokenBuffer,0,new Uint8Array(u)),s=this.decodeTokenBuffer}else s=this.device.createBuffer({size:e.byteLength,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST,mappedAtCreation:!0}),new Uint32Array(s.getMappedRange()).set(e),s.unmap();let r=this.dispatchEmbedding(n,s,t);for(let u=0;u<this.layers.length;u++){const c=this.layers[u].forward(r,t,this.kvCaches[u],n);this.pool.release(r),r=c,this.kvCaches[u].seqLen+=t}const a=this.dispatchFinalNorm(n,r,t);this.pool.release(r);let o;t>1?(o=this.pool.acquire(this.config.hiddenSize*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST),n.copyBufferToBuffer(a,(t-1)*this.config.hiddenSize*4,o,0,this.config.hiddenSize*4),this.pool.release(a)):o=a;let d;return this.lmHead instanceof w?d=this.lmHead.forward(o,1,n):d=this.dispatchLMHead(n,o,1),t>1?this.pool.release(o):this.pool.release(a),this.device.queue.submit([n.finish()]),d}releaseBuffer(e){this.pool.release(e)}resetKVCache(){for(const e of this.kvCaches)e.seqLen=0;B(this.bgCache);for(const e of this.layers)e.clearBGCache();this.lmHead instanceof w&&this.lmHead.clearBGCache()}dispose(){B(this.bgCache);for(const e of this.layers)e.clearBGCache(),e.destroyPreAllocated();this.lmHead instanceof w&&this.lmHead.clearBGCache();for(const e of this.kvCaches)e.key.destroy(),e.value.destroy();this.pool.destroy(),this.pipelines.clear()}async diagnose(e){const t=e.length,n=[];this.resetKVCache();const s=this.device.createBuffer({size:e.byteLength,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});new Uint32Array(s.getMappedRange()).set(e),s.unmap();let r=this.device.createCommandEncoder();const a=this.dispatchEmbedding(r,s,t);this.device.queue.submit([r.finish()]),n.push(await this.readDiag("embedding",a,t*this.config.hiddenSize)),r=this.device.createCommandEncoder();const o=this.layers[0].forward(a,t,this.kvCaches[0],r);this.device.queue.submit([r.finish()]),this.kvCaches[0].seqLen+=t,n.push(await this.readDiag("layer_0",o,t*this.config.hiddenSize)),this.pool.release(a),r=this.device.createCommandEncoder();const d=this.layers[1].forward(o,t,this.kvCaches[1],r);this.device.queue.submit([r.finish()]),this.kvCaches[1].seqLen+=t,n.push(await this.readDiag("layer_1",d,t*this.config.hiddenSize)),this.pool.release(o);let u=d;for(let p=2;p<this.layers.length;p++){r=this.device.createCommandEncoder();const l=this.layers[p].forward(u,t,this.kvCaches[p],r);this.device.queue.submit([r.finish()]),this.pool.release(u),u=l,this.kvCaches[p].seqLen+=t}n.push(await this.readDiag("last_layer",u,t*this.config.hiddenSize)),r=this.device.createCommandEncoder();const c=this.dispatchFinalNorm(r,u,t);this.device.queue.submit([r.finish()]),this.pool.release(u),n.push(await this.readDiag("final_norm",c,t*this.config.hiddenSize));let f;t>1?(f=this.pool.acquire(this.config.hiddenSize*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST),r=this.device.createCommandEncoder(),r.copyBufferToBuffer(c,(t-1)*this.config.hiddenSize*4,f,0,this.config.hiddenSize*4),this.device.queue.submit([r.finish()]),this.pool.release(c)):f=c,n.push(await this.readDiag("lm_input",f,this.config.hiddenSize)),r=this.device.createCommandEncoder();let h;return this.lmHead instanceof w?h=this.lmHead.forward(f,1,r):h=this.dispatchLMHead(r,f,1),this.device.queue.submit([r.finish()]),n.push(await this.readDiag("logits_first100",h,100)),this.pool.release(f===c?c:f),this.pool.release(h),n}async readDiag(e,t,n){const s=n*4,r=this.device.createBuffer({size:s,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),a=this.device.createCommandEncoder();a.copyBufferToBuffer(t,0,r,0,s),this.device.queue.submit([a.finish()]),await r.mapAsync(GPUMapMode.READ);const o=new Float32Array(r.getMappedRange().slice(0));r.unmap(),r.destroy();let d=1/0,u=-1/0,c=0,f=0,h=0,p=0,l=0;for(let _=0;_<o.length;_++){const b=o[_];if(isNaN(b)){h++;continue}if(!isFinite(b)){p++;continue}b===0&&l++,b<d&&(d=b),b>u&&(u=b),c+=b,f+=b*b}const m=c/o.length,g=Math.sqrt(f/o.length);return{name:e,length:o.length,min:d,max:u,mean:m,rms:g,nanCount:h,infCount:p,zeroCount:l,first8:Array.from(o.slice(0,8))}}dispatchEmbedding(e,t,n){const{pipeline:s,bindGroupLayout:r}=this.pipelines.getOrCreate("embedding",je),a=n*this.config.hiddenSize*4,o=this.pool.acquire(a,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);let d;if(n===1&&this.decodeEmbeddingUniform)d=this.decodeEmbeddingUniform;else{const p=new ArrayBuffer(12),l=new DataView(p);l.setUint32(0,n,!0),l.setUint32(4,this.config.hiddenSize,!0),l.setUint32(8,this.config.vocabSize,!0),d=this.createUniform(p)}const u=[{binding:0,resource:{buffer:t}},{binding:1,resource:{buffer:this.embedTokens}},{binding:2,resource:{buffer:o}},{binding:3,resource:{buffer:d}}],c=n===1?v(this.bgCache,this.device,"embedding",r,u):this.device.createBindGroup({layout:r,entries:u}),f=n*this.config.hiddenSize,h=e.beginComputePass();return h.setPipeline(s),h.setBindGroup(0,c),h.dispatchWorkgroups(Math.ceil(f/256)),h.end(),o}dispatchFinalNorm(e,t,n){const{pipeline:s,bindGroupLayout:r}=this.pipelines.getOrCreate("rmsnorm",O),a=this.pool.acquire(n*this.config.hiddenSize*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);let o;if(n===1&&this.decodeFinalNormUniform)o=this.decodeFinalNormUniform;else{const f=new ArrayBuffer(12),h=new DataView(f);h.setUint32(0,n,!0),h.setUint32(4,this.config.hiddenSize,!0),h.setFloat32(8,this.config.rmsNormEps,!0),o=this.createUniform(f)}const d=[{binding:0,resource:{buffer:t}},{binding:1,resource:{buffer:this.finalNorm}},{binding:2,resource:{buffer:a}},{binding:3,resource:{buffer:o}}],u=n===1?v(this.bgCache,this.device,"finalNorm",r,d):this.device.createBindGroup({layout:r,entries:d}),c=e.beginComputePass();return c.setPipeline(s),c.setBindGroup(0,u),c.dispatchWorkgroups(n),c.end(),a}dispatchLMHead(e,t,n){const s=this.config.vocabSize,r=this.config.hiddenSize,{pipeline:a,bindGroupLayout:o}=this.pipelines.getOrCreate("f32_matmul",Ke),d=this.pool.acquire(n*s*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC);let u;if(n===1&&this.decodeLMHeadUniform)u=this.decodeLMHeadUniform;else{const g=new ArrayBuffer(12),_=new DataView(g);_.setUint32(0,n,!0),_.setUint32(4,s,!0),_.setUint32(8,r,!0),u=this.createUniform(g)}const c=[{binding:0,resource:{buffer:t}},{binding:1,resource:{buffer:this.embedTokens}},{binding:2,resource:{buffer:d}},{binding:3,resource:{buffer:u}}],f=n===1?v(this.bgCache,this.device,"lmHead",o,c):this.device.createBindGroup({layout:o,entries:c}),h=n*s,p=Math.min(h,65535),l=Math.ceil(h/65535),m=e.beginComputePass();return m.setPipeline(a),m.setBindGroup(0,f),m.dispatchWorkgroups(p,l),m.end(),d}createUniform(e){const t=Math.max(Math.ceil(e.byteLength/4)*4,4),n=this.device.createBuffer({size:t,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});return new Uint8Array(n.getMappedRange()).set(new Uint8Array(e)),n.unmap(),n}},K=class y{config;vocab;reverseVocab;merges;mergeRanks;bosId;eosId;textEncoder=new TextEncoder;textDecoder=new TextDecoder("utf-8",{fatal:!1});constructor(e,t,n){this.config=e,this.vocab=t,this.merges=n,this.bosId=e.bosToken??1,this.eosId=e.eosToken??2,this.reverseVocab=new Map;for(const[s,r]of t)this.reverseVocab.set(r,s);this.mergeRanks=new Map;for(let s=0;s<n.length;s++)this.mergeRanks.set(`${n[s][0]} ${n[s][1]}`,s)}static fromGGUFMetadata(e){const t=e["tokenizer.ggml.tokens"],n=e["tokenizer.ggml.merges"],s=e["tokenizer.ggml.model"]??"gpt2",r=new Map;for(let c=0;c<t.length;c++)r.set(t[c],c);const a=[];if(n)for(const c of n){const f=c.split(" ");f.length===2&&a.push([f[0],f[1]])}const o=e["tokenizer.ggml.bos_token_id"]??1,d=e["tokenizer.ggml.eos_token_id"]??2,u={type:s==="gpt2"?"bpe":"sentencepiece",vocabSize:t.length,bosToken:o,eosToken:d};return new y(u,r,a)}static fromJSON(e){const t=new Map(Object.entries(e.vocab)),n=e.merges.map(r=>{const a=r.split(" ");return[a[0],a[1]]}),s={type:e.config?.type??"bpe",vocabSize:t.size,bosToken:e.config?.bosToken??1,eosToken:e.config?.eosToken??2};return new y(s,t,n)}encode(e,t=!0){const n=[];t&&n.push(this.bosId),this.config.type==="sentencepiece"&&(e=" "+e);const s=new RegExp("(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+","gu"),r=e.match(s)??[e];for(const a of r){const o=this.bpeEncode(a);n.push(...o)}return new Uint32Array(n)}decode(e){const t=[];for(const n of e){if(n===this.bosId||n===this.eosId)continue;const s=this.reverseVocab.get(n);s!==void 0&&t.push(this.decodeToken(s))}return t.join("")}decodeToken(e){if(e.startsWith("<0x")&&e.endsWith(">")){const t=parseInt(e.slice(3,-1),16);return String.fromCharCode(t)}return this.config.type==="sentencepiece"?e.replace(/▁/g," "):this.bytesToString(e)}get eosTokenId(){return this.eosId}get bosTokenId(){return this.bosId}get eotTokenId(){return this.vocab.get("<|eot_id|>")}applyChatTemplate(e){const t=this.vocab.get("<|start_header_id|>"),n=this.vocab.get("<|end_header_id|>"),s=this.vocab.get("<|eot_id|>");if(t===void 0||n===void 0||s===void 0){console.warn(`[0xBitNet] Chat template fallback: special tokens missing (start_header=${t}, end_header=${n}, eot=${s})`);const a=e.map(o=>o.content).join(`
|
| 798 |
+
`);return this.encode(a)}console.debug(`[0xBitNet] Chat template: start_header=${t}, end_header=${n}, eot=${s}`);const r=[this.bosId];for(const a of e)r.push(t),r.push(...this.encode(a.role,!1)),r.push(n),r.push(...this.encode(`
|
| 799 |
+
|
| 800 |
+
`+a.content,!1)),r.push(s);return r.push(t),r.push(...this.encode("assistant",!1)),r.push(n),r.push(...this.encode(`
|
| 801 |
+
|
| 802 |
+
`,!1)),new Uint32Array(r)}bpeEncode(e){if(e.length===0)return[];let t;for(this.config.type==="sentencepiece"?t=[...e].map(s=>s.replace(" ","▁")):t=this.stringToBytes(e);t.length>1;){let s=1/0,r=-1;for(let o=0;o<t.length-1;o++){const d=`${t[o]} ${t[o+1]}`,u=this.mergeRanks.get(d);u!==void 0&&u<s&&(s=u,r=o)}if(r===-1)break;const a=t[r]+t[r+1];t.splice(r,2,a)}const n=[];for(const s of t){const r=this.vocab.get(s);if(r!==void 0)n.push(r);else for(const a of this.textEncoder.encode(s)){const o=`<0x${a.toString(16).toUpperCase().padStart(2,"0")}>`,d=this.vocab.get(o);d!==void 0&&n.push(d)}}return n}static byteToUnicode=null;static getByteToUnicode(){if(y.byteToUnicode)return y.byteToUnicode;const e=new Map,t=[[33,126],[161,172],[174,255]],n=[];for(const[a,o]of t)for(let d=a;d<=o;d++)n.push(d);const s=[...n];let r=0;for(let a=0;a<256;a++)n.includes(a)||(n.push(a),s.push(256+r),r++);for(let a=0;a<n.length;a++)e.set(n[a],String.fromCharCode(s[a]));return y.byteToUnicode=e,e}stringToBytes(e){const t=y.getByteToUnicode(),n=this.textEncoder.encode(e),s=[];for(const r of n)s.push(t.get(r)??String.fromCharCode(r));return s}bytesToString(e){const t=y.getByteToUnicode(),n=new Map;for(const[r,a]of t)n.set(a,r);const s=[];for(const r of e){const a=n.get(r);a!==void 0?s.push(a):s.push(r.charCodeAt(0))}return this.textDecoder.decode(new Uint8Array(s))}},Ye=class te{model;tokenizer;device;readbackBuffer;logitsArray;constructor(e,t,n){this.model=e,this.tokenizer=t,this.device=n;const s=e.config.vocabSize;this.readbackBuffer=n.createBuffer({size:s*4,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this.logitsArray=new Float32Array(s)}static async load(e,t={}){const n=t.device?await W(t.device):await W(),s=await Ae(e,n.device,t.onProgress,t.signal),r=Fe.build(n.device,s.config,s.weights);let a;const o=typeof e=="string"?e:e.href;if(o.endsWith(".gguf")&&s.metadata)a=K.fromGGUFMetadata(s.metadata);else{if(o.endsWith(".gguf"))throw new Error("Could not extract tokenizer metadata from GGUF file.");{const u=`${o.substring(0,o.lastIndexOf("/"))}/tokenizer.json`,c=await fetch(u,{signal:t.signal});if(c.ok){const f=await c.json();a=K.fromJSON(f)}else throw new Error("Could not find tokenizer. For safetensors models, provide a tokenizer.json in the same directory.")}}return new te(r,a,n.device)}async*generate(e,t={}){const n=t.maxTokens??256,s=t.temperature??1,r=t.topK??50,a=t.repeatPenalty??1,o=t.repeatLastN??64,d=t.signal,u=Array.isArray(e)?this.tokenizer.applyChatTemplate(e):this.tokenizer.encode(e);this.model.resetKVCache();const c=this.tokenizer.eotTokenId,f=[];console.debug(`[0xBitNet] generate: ${u.length} input tokens, eotId=${c}, temp=${s}, topK=${r}, repeatPenalty=${a}`),console.debug("[0xBitNet] first 20 token IDs:",Array.from(u.slice(0,20)));let h=this.model.forward(u);for(let p=0;p<n&&!d?.aborted;p++){const l=o>0?f.slice(-o):f,m=await this.sampleToken(h,s,r,a,l);if(this.model.releaseBuffer(h),m===this.tokenizer.eosTokenId||m===c)break;f.push(m);const g=this.tokenizer.decode([m]);t.onToken?.(g),yield g,h=this.model.forward(new Uint32Array([m]))}}dispose(){this.readbackBuffer.destroy(),this.model.dispose()}async diagnose(e="Hello"){const t=this.tokenizer.encode(e);return this.model.diagnose(t)}async sampleToken(e,t,n,s,r){const a=this.model.config.vocabSize,o=this.device.createCommandEncoder();o.copyBufferToBuffer(e,0,this.readbackBuffer,0,a*4),this.device.queue.submit([o.finish()]),await this.readbackBuffer.mapAsync(GPUMapMode.READ);const d=new Float32Array(this.readbackBuffer.getMappedRange()),u=this.logitsArray;if(u.set(d),this.readbackBuffer.unmap(),s!==1&&r.length>0)for(const l of r)u[l]>0?u[l]/=s:u[l]*=s;if(t!==1){const l=1/t;for(let m=0;m<a;m++)u[m]*=l}if(n>0&&n<a){const l=new Uint32Array(n);for(let g=0;g<n;g++)l[g]=g;for(let g=(n>>1)-1;g>=0;g--)F(l,g,n,u);for(let g=n;g<a;g++)u[g]>u[l[0]]&&(l[0]=g,F(l,0,n,u));const m=u[l[0]];for(let g=0;g<a;g++)u[g]<m&&(u[g]=-1/0)}let c=-1/0;for(let l=0;l<a;l++)u[l]>c&&(c=u[l]);let f=0;for(let l=0;l<a;l++)u[l]=Math.exp(u[l]-c),f+=u[l];const h=Math.random()*f;let p=0;for(let l=0;l<a;l++)if(p+=u[l],p>=h)return l;return a-1}};function F(i,e,t,n){for(;;){let s=e;const r=2*e+1,a=2*e+2;if(r<t&&n[i[r]]<n[i[s]]&&(s=r),a<t&&n[i[a]]<n[i[s]]&&(s=a),s===e)break;const o=i[e];i[e]=i[s],i[s]=o,e=s}}const Ze="https://huggingface.co/microsoft/bitnet-b1.58-2B-4T-gguf/resolve/main/ggml-model-i2_s.gguf",Qe=document.getElementById("hero"),Y=document.getElementById("status"),Z=document.getElementById("status-text"),Je=document.getElementById("progress-fill"),x=document.getElementById("load-btn"),k=document.getElementById("messages"),Xe=document.getElementById("input-area"),P=document.getElementById("user-input"),D=document.getElementById("send-btn");let M=null,z=!1;x.addEventListener("click",async()=>{x.disabled=!0,Qe.style.display="none",Y.style.display="block";try{M=await Ye.load(Ze,{onProgress(i){const e=(i.fraction*100).toFixed(1);Z.textContent=`${i.phase}: ${e}%`,Je.style.width=`${e}%`}}),Y.style.display="none",k.style.display="flex",Xe.style.display="flex",D.disabled=!1,P.focus(),L("assistant","Model loaded! Ask me anything.")}catch(i){Z.textContent=`Error: ${i.message}`}});D.addEventListener("click",ne);P.addEventListener("keydown",i=>{i.key==="Enter"&&!i.shiftKey&&(i.preventDefault(),ne())});async function ne(){if(!M||z)return;const i=P.value.trim();if(!i)return;P.value="",L("user",i),z=!0,D.disabled=!0;const e=L("assistant",""),t=[{role:"system",content:"You are a helpful assistant."},{role:"user",content:i}];try{for await(const n of M.generate(t,{maxTokens:512,temperature:.7,topK:40,repeatPenalty:1.1}))e.textContent+=n,k.scrollTop=k.scrollHeight}catch(n){e.textContent+=`
|
| 803 |
+
[Error: ${n.message}]`}z=!1,D.disabled=!1,P.focus()}function L(i,e){const t=document.createElement("div");return t.className=`msg ${i}`,t.textContent=e,k.appendChild(t),k.scrollTop=k.scrollHeight,t}navigator.gpu||(x.disabled=!0,x.textContent="WebGPU not supported — use Chrome 113+");
|
index.html
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>0xBitNet Demo</title>
|
| 7 |
+
<style>
|
| 8 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
| 9 |
+
|
| 10 |
+
body {
|
| 11 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
| 12 |
+
background: #0a0a0a;
|
| 13 |
+
color: #e0e0e0;
|
| 14 |
+
height: 100vh;
|
| 15 |
+
display: flex;
|
| 16 |
+
flex-direction: column;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
header {
|
| 20 |
+
padding: 1rem 1.5rem;
|
| 21 |
+
border-bottom: 1px solid #222;
|
| 22 |
+
display: flex;
|
| 23 |
+
align-items: center;
|
| 24 |
+
gap: 0.75rem;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
header h1 { font-size: 1.25rem; font-weight: 600; color: #fff; }
|
| 28 |
+
|
| 29 |
+
header .badge {
|
| 30 |
+
font-size: 0.7rem;
|
| 31 |
+
padding: 0.15rem 0.5rem;
|
| 32 |
+
border-radius: 9999px;
|
| 33 |
+
background: #1a3a2a;
|
| 34 |
+
color: #4ade80;
|
| 35 |
+
font-weight: 500;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
#status {
|
| 39 |
+
padding: 0.75rem 1.5rem;
|
| 40 |
+
background: #111;
|
| 41 |
+
border-bottom: 1px solid #222;
|
| 42 |
+
font-size: 0.85rem;
|
| 43 |
+
color: #888;
|
| 44 |
+
display: none;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
.progress-bar {
|
| 48 |
+
margin-top: 0.5rem;
|
| 49 |
+
height: 4px;
|
| 50 |
+
background: #222;
|
| 51 |
+
border-radius: 2px;
|
| 52 |
+
overflow: hidden;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
.progress-fill {
|
| 56 |
+
height: 100%;
|
| 57 |
+
background: #4ade80;
|
| 58 |
+
width: 0%;
|
| 59 |
+
transition: width 0.3s;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
#hero {
|
| 63 |
+
flex: 1;
|
| 64 |
+
display: flex;
|
| 65 |
+
flex-direction: column;
|
| 66 |
+
align-items: center;
|
| 67 |
+
justify-content: center;
|
| 68 |
+
gap: 1.5rem;
|
| 69 |
+
padding: 2rem;
|
| 70 |
+
text-align: center;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
#hero h2 {
|
| 74 |
+
font-size: 1.5rem;
|
| 75 |
+
color: #fff;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
#hero p {
|
| 79 |
+
max-width: 480px;
|
| 80 |
+
color: #888;
|
| 81 |
+
line-height: 1.6;
|
| 82 |
+
font-size: 0.95rem;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
#hero pre {
|
| 86 |
+
background: #111;
|
| 87 |
+
border: 1px solid #333;
|
| 88 |
+
border-radius: 8px;
|
| 89 |
+
padding: 1.25rem 1.5rem;
|
| 90 |
+
max-width: 560px;
|
| 91 |
+
width: 100%;
|
| 92 |
+
text-align: left;
|
| 93 |
+
font-size: 0.85rem;
|
| 94 |
+
line-height: 1.7;
|
| 95 |
+
color: #c9d1d9;
|
| 96 |
+
overflow-x: auto;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
#hero pre .kw { color: #ff7b72; }
|
| 100 |
+
#hero pre .str { color: #a5d6ff; }
|
| 101 |
+
#hero pre .fn { color: #d2a8ff; }
|
| 102 |
+
#hero pre .cmt { color: #555; }
|
| 103 |
+
|
| 104 |
+
#load-btn {
|
| 105 |
+
padding: 0.85rem 2.5rem;
|
| 106 |
+
border-radius: 8px;
|
| 107 |
+
border: none;
|
| 108 |
+
background: #166534;
|
| 109 |
+
color: #fff;
|
| 110 |
+
font-weight: 600;
|
| 111 |
+
cursor: pointer;
|
| 112 |
+
font-size: 1rem;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
#load-btn:hover { background: #15803d; }
|
| 116 |
+
#load-btn:disabled { opacity: 0.5; cursor: not-allowed; }
|
| 117 |
+
|
| 118 |
+
#messages {
|
| 119 |
+
flex: 1;
|
| 120 |
+
overflow-y: auto;
|
| 121 |
+
padding: 1.5rem;
|
| 122 |
+
display: none;
|
| 123 |
+
flex-direction: column;
|
| 124 |
+
gap: 1rem;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
.msg {
|
| 128 |
+
max-width: 70%;
|
| 129 |
+
padding: 0.75rem 1rem;
|
| 130 |
+
border-radius: 12px;
|
| 131 |
+
line-height: 1.5;
|
| 132 |
+
white-space: pre-wrap;
|
| 133 |
+
word-break: break-word;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
.msg.user {
|
| 137 |
+
align-self: flex-end;
|
| 138 |
+
background: #1a365d;
|
| 139 |
+
color: #bee3f8;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
.msg.assistant {
|
| 143 |
+
align-self: flex-start;
|
| 144 |
+
background: #1a1a1a;
|
| 145 |
+
border: 1px solid #333;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
#input-area {
|
| 149 |
+
padding: 1rem 1.5rem;
|
| 150 |
+
border-top: 1px solid #222;
|
| 151 |
+
display: none;
|
| 152 |
+
gap: 0.75rem;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
#input-area input {
|
| 156 |
+
flex: 1;
|
| 157 |
+
padding: 0.75rem 1rem;
|
| 158 |
+
border-radius: 8px;
|
| 159 |
+
border: 1px solid #333;
|
| 160 |
+
background: #111;
|
| 161 |
+
color: #e0e0e0;
|
| 162 |
+
font-size: 0.95rem;
|
| 163 |
+
outline: none;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
#input-area input:focus { border-color: #4ade80; }
|
| 167 |
+
|
| 168 |
+
#input-area button {
|
| 169 |
+
padding: 0.75rem 1.5rem;
|
| 170 |
+
border-radius: 8px;
|
| 171 |
+
border: none;
|
| 172 |
+
background: #166534;
|
| 173 |
+
color: #fff;
|
| 174 |
+
font-weight: 600;
|
| 175 |
+
cursor: pointer;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
#input-area button:hover { background: #15803d; }
|
| 179 |
+
#input-area button:disabled { opacity: 0.5; cursor: not-allowed; }
|
| 180 |
+
|
| 181 |
+
.npm-link {
|
| 182 |
+
font-size: 0.8rem;
|
| 183 |
+
color: #555;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.npm-link a { color: #4ade80; }
|
| 187 |
+
</style>
|
| 188 |
+
<script type="module" crossorigin src="./assets/index-gTdi7eoP.js"></script>
|
| 189 |
+
</head>
|
| 190 |
+
<body>
|
| 191 |
+
<header>
|
| 192 |
+
<h1>0xBitNet</h1>
|
| 193 |
+
<span class="badge">WebGPU</span>
|
| 194 |
+
</header>
|
| 195 |
+
|
| 196 |
+
<div id="status">
|
| 197 |
+
<span id="status-text">Loading model...</span>
|
| 198 |
+
<div class="progress-bar">
|
| 199 |
+
<div class="progress-fill" id="progress-fill"></div>
|
| 200 |
+
</div>
|
| 201 |
+
</div>
|
| 202 |
+
|
| 203 |
+
<div id="hero">
|
| 204 |
+
<h2>BitNet in the Browser. That's it.</h2>
|
| 205 |
+
<pre><code><span class="kw">import</span> { BitNet } <span class="kw">from</span> <span class="str">"0xbitnet"</span>;
|
| 206 |
+
|
| 207 |
+
<span class="kw">const</span> model = <span class="kw">await</span> BitNet.<span class="fn">load</span>(url);
|
| 208 |
+
|
| 209 |
+
<span class="kw">for await</span> (<span class="kw">const</span> token <span class="kw">of</span> model.<span class="fn">generate</span>(messages)) {
|
| 210 |
+
process.stdout.<span class="fn">write</span>(token);
|
| 211 |
+
}</code></pre>
|
| 212 |
+
<button id="load-btn">Try It — Load BitNet 2B-4T (~700 MB)</button>
|
| 213 |
+
<p class="npm-link"><code>npm install 0xbitnet</code> — <a href="https://github.com/m96-chan/0xBitNet" target="_blank">GitHub</a> · <a href="https://www.npmjs.com/package/0xbitnet" target="_blank">npm</a></p>
|
| 214 |
+
</div>
|
| 215 |
+
|
| 216 |
+
<div id="messages"></div>
|
| 217 |
+
|
| 218 |
+
<div id="input-area">
|
| 219 |
+
<input type="text" id="user-input" placeholder="Type a message..." autocomplete="off" />
|
| 220 |
+
<button id="send-btn" disabled>Send</button>
|
| 221 |
+
</div>
|
| 222 |
+
|
| 223 |
+
</body>
|
| 224 |
+
</html>
|
package.json
DELETED
|
@@ -1,39 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"name": "react-template",
|
| 3 |
-
"version": "0.1.0",
|
| 4 |
-
"private": true,
|
| 5 |
-
"dependencies": {
|
| 6 |
-
"@testing-library/dom": "^10.4.0",
|
| 7 |
-
"@testing-library/jest-dom": "^6.6.3",
|
| 8 |
-
"@testing-library/react": "^16.3.0",
|
| 9 |
-
"@testing-library/user-event": "^13.5.0",
|
| 10 |
-
"react": "^19.1.0",
|
| 11 |
-
"react-dom": "^19.1.0",
|
| 12 |
-
"react-scripts": "5.0.1",
|
| 13 |
-
"web-vitals": "^2.1.4"
|
| 14 |
-
},
|
| 15 |
-
"scripts": {
|
| 16 |
-
"start": "react-scripts start",
|
| 17 |
-
"build": "react-scripts build",
|
| 18 |
-
"test": "react-scripts test",
|
| 19 |
-
"eject": "react-scripts eject"
|
| 20 |
-
},
|
| 21 |
-
"eslintConfig": {
|
| 22 |
-
"extends": [
|
| 23 |
-
"react-app",
|
| 24 |
-
"react-app/jest"
|
| 25 |
-
]
|
| 26 |
-
},
|
| 27 |
-
"browserslist": {
|
| 28 |
-
"production": [
|
| 29 |
-
">0.2%",
|
| 30 |
-
"not dead",
|
| 31 |
-
"not op_mini all"
|
| 32 |
-
],
|
| 33 |
-
"development": [
|
| 34 |
-
"last 1 chrome version",
|
| 35 |
-
"last 1 firefox version",
|
| 36 |
-
"last 1 safari version"
|
| 37 |
-
]
|
| 38 |
-
}
|
| 39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/favicon.ico
DELETED
|
Binary file (3.87 kB)
|
|
|
public/index.html
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
<!DOCTYPE html>
|
| 2 |
-
<html lang="en">
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="utf-8" />
|
| 5 |
-
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
|
| 6 |
-
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 7 |
-
<meta name="theme-color" content="#000000" />
|
| 8 |
-
<meta
|
| 9 |
-
name="description"
|
| 10 |
-
content="Web site created using create-react-app"
|
| 11 |
-
/>
|
| 12 |
-
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
|
| 13 |
-
<!--
|
| 14 |
-
manifest.json provides metadata used when your web app is installed on a
|
| 15 |
-
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
| 16 |
-
-->
|
| 17 |
-
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
|
| 18 |
-
<!--
|
| 19 |
-
Notice the use of %PUBLIC_URL% in the tags above.
|
| 20 |
-
It will be replaced with the URL of the `public` folder during the build.
|
| 21 |
-
Only files inside the `public` folder can be referenced from the HTML.
|
| 22 |
-
|
| 23 |
-
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
| 24 |
-
work correctly both with client-side routing and a non-root public URL.
|
| 25 |
-
Learn how to configure a non-root public URL by running `npm run build`.
|
| 26 |
-
-->
|
| 27 |
-
<title>React App</title>
|
| 28 |
-
</head>
|
| 29 |
-
<body>
|
| 30 |
-
<noscript>You need to enable JavaScript to run this app.</noscript>
|
| 31 |
-
<div id="root"></div>
|
| 32 |
-
<!--
|
| 33 |
-
This HTML file is a template.
|
| 34 |
-
If you open it directly in the browser, you will see an empty page.
|
| 35 |
-
|
| 36 |
-
You can add webfonts, meta tags, or analytics to this file.
|
| 37 |
-
The build step will place the bundled scripts into the <body> tag.
|
| 38 |
-
|
| 39 |
-
To begin the development, run `npm start` or `yarn start`.
|
| 40 |
-
To create a production bundle, use `npm run build` or `yarn build`.
|
| 41 |
-
-->
|
| 42 |
-
</body>
|
| 43 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/logo192.png
DELETED
|
Binary file (5.35 kB)
|
|
|
public/logo512.png
DELETED
|
Binary file (9.66 kB)
|
|
|
public/manifest.json
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"short_name": "React App",
|
| 3 |
-
"name": "Create React App Sample",
|
| 4 |
-
"icons": [
|
| 5 |
-
{
|
| 6 |
-
"src": "favicon.ico",
|
| 7 |
-
"sizes": "64x64 32x32 24x24 16x16",
|
| 8 |
-
"type": "image/x-icon"
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"src": "logo192.png",
|
| 12 |
-
"type": "image/png",
|
| 13 |
-
"sizes": "192x192"
|
| 14 |
-
},
|
| 15 |
-
{
|
| 16 |
-
"src": "logo512.png",
|
| 17 |
-
"type": "image/png",
|
| 18 |
-
"sizes": "512x512"
|
| 19 |
-
}
|
| 20 |
-
],
|
| 21 |
-
"start_url": ".",
|
| 22 |
-
"display": "standalone",
|
| 23 |
-
"theme_color": "#000000",
|
| 24 |
-
"background_color": "#ffffff"
|
| 25 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/robots.txt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
# https://www.robotstxt.org/robotstxt.html
|
| 2 |
-
User-agent: *
|
| 3 |
-
Disallow:
|
|
|
|
|
|
|
|
|
|
|
|
src/App.css
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
.App {
|
| 2 |
-
text-align: center;
|
| 3 |
-
}
|
| 4 |
-
|
| 5 |
-
.App-logo {
|
| 6 |
-
height: 40vmin;
|
| 7 |
-
pointer-events: none;
|
| 8 |
-
}
|
| 9 |
-
|
| 10 |
-
@media (prefers-reduced-motion: no-preference) {
|
| 11 |
-
.App-logo {
|
| 12 |
-
animation: App-logo-spin infinite 20s linear;
|
| 13 |
-
}
|
| 14 |
-
}
|
| 15 |
-
|
| 16 |
-
.App-header {
|
| 17 |
-
background-color: #282c34;
|
| 18 |
-
min-height: 100vh;
|
| 19 |
-
display: flex;
|
| 20 |
-
flex-direction: column;
|
| 21 |
-
align-items: center;
|
| 22 |
-
justify-content: center;
|
| 23 |
-
font-size: calc(10px + 2vmin);
|
| 24 |
-
color: white;
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
.App-link {
|
| 28 |
-
color: #61dafb;
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
@keyframes App-logo-spin {
|
| 32 |
-
from {
|
| 33 |
-
transform: rotate(0deg);
|
| 34 |
-
}
|
| 35 |
-
to {
|
| 36 |
-
transform: rotate(360deg);
|
| 37 |
-
}
|
| 38 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/App.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
import logo from './logo.svg';
|
| 2 |
-
import './App.css';
|
| 3 |
-
|
| 4 |
-
function App() {
|
| 5 |
-
return (
|
| 6 |
-
<div className="App">
|
| 7 |
-
<header className="App-header">
|
| 8 |
-
<img src={logo} className="App-logo" alt="logo" />
|
| 9 |
-
<p>
|
| 10 |
-
Edit <code>src/App.js</code> and save to reload.
|
| 11 |
-
</p>
|
| 12 |
-
<a
|
| 13 |
-
className="App-link"
|
| 14 |
-
href="https://reactjs.org"
|
| 15 |
-
target="_blank"
|
| 16 |
-
rel="noopener noreferrer"
|
| 17 |
-
>
|
| 18 |
-
Learn React
|
| 19 |
-
</a>
|
| 20 |
-
</header>
|
| 21 |
-
</div>
|
| 22 |
-
);
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
export default App;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/App.test.js
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
import { render, screen } from '@testing-library/react';
|
| 2 |
-
import App from './App';
|
| 3 |
-
|
| 4 |
-
test('renders learn react link', () => {
|
| 5 |
-
render(<App />);
|
| 6 |
-
const linkElement = screen.getByText(/learn react/i);
|
| 7 |
-
expect(linkElement).toBeInTheDocument();
|
| 8 |
-
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/index.css
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
body {
|
| 2 |
-
margin: 0;
|
| 3 |
-
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
|
| 4 |
-
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
|
| 5 |
-
sans-serif;
|
| 6 |
-
-webkit-font-smoothing: antialiased;
|
| 7 |
-
-moz-osx-font-smoothing: grayscale;
|
| 8 |
-
}
|
| 9 |
-
|
| 10 |
-
code {
|
| 11 |
-
font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
|
| 12 |
-
monospace;
|
| 13 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/index.js
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
import React from 'react';
|
| 2 |
-
import ReactDOM from 'react-dom/client';
|
| 3 |
-
import './index.css';
|
| 4 |
-
import App from './App';
|
| 5 |
-
import reportWebVitals from './reportWebVitals';
|
| 6 |
-
|
| 7 |
-
const root = ReactDOM.createRoot(document.getElementById('root'));
|
| 8 |
-
root.render(
|
| 9 |
-
<React.StrictMode>
|
| 10 |
-
<App />
|
| 11 |
-
</React.StrictMode>
|
| 12 |
-
);
|
| 13 |
-
|
| 14 |
-
// If you want to start measuring performance in your app, pass a function
|
| 15 |
-
// to log results (for example: reportWebVitals(console.log))
|
| 16 |
-
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
|
| 17 |
-
reportWebVitals();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/logo.svg
DELETED
src/reportWebVitals.js
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
const reportWebVitals = onPerfEntry => {
|
| 2 |
-
if (onPerfEntry && onPerfEntry instanceof Function) {
|
| 3 |
-
import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
|
| 4 |
-
getCLS(onPerfEntry);
|
| 5 |
-
getFID(onPerfEntry);
|
| 6 |
-
getFCP(onPerfEntry);
|
| 7 |
-
getLCP(onPerfEntry);
|
| 8 |
-
getTTFB(onPerfEntry);
|
| 9 |
-
});
|
| 10 |
-
}
|
| 11 |
-
};
|
| 12 |
-
|
| 13 |
-
export default reportWebVitals;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/setupTests.js
DELETED
|
@@ -1,5 +0,0 @@
|
|
| 1 |
-
// jest-dom adds custom jest matchers for asserting on DOM nodes.
|
| 2 |
-
// allows you to do things like:
|
| 3 |
-
// expect(element).toHaveTextContent(/react/i)
|
| 4 |
-
// learn more: https://github.com/testing-library/jest-dom
|
| 5 |
-
import '@testing-library/jest-dom';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|