From 7dbdb48efc9ebd93751e86256970db1bf1e27ac4 Mon Sep 17 00:00:00 2001 From: Chethan Regala Date: Sun, 8 Mar 2026 18:17:00 +0530 Subject: [PATCH 1/7] feat(indexer): initialize incremental indexing engine module structure --- .gitignore | 3 ++ apps/indexer/package-lock.json | 30 +++++++++++++++++++ apps/indexer/package.json | 16 ++++++++++ apps/indexer/src/IndexQueue.ts | 0 apps/indexer/src/IndexingEngine.ts | 0 apps/indexer/src/NoteChunker.ts | 0 apps/indexer/src/adapters/EmbeddingAdapter.ts | 0 apps/indexer/src/adapters/VaultAdapter.ts | 0 apps/indexer/src/index.ts | 0 apps/indexer/src/types.ts | 0 apps/indexer/tsconfig.json | 11 +++++++ 11 files changed, 60 insertions(+) create mode 100644 apps/indexer/package-lock.json create mode 100644 apps/indexer/package.json create mode 100644 apps/indexer/src/IndexQueue.ts create mode 100644 apps/indexer/src/IndexingEngine.ts create mode 100644 apps/indexer/src/NoteChunker.ts create mode 100644 apps/indexer/src/adapters/EmbeddingAdapter.ts create mode 100644 apps/indexer/src/adapters/VaultAdapter.ts create mode 100644 apps/indexer/src/index.ts create mode 100644 apps/indexer/src/types.ts create mode 100644 apps/indexer/tsconfig.json diff --git a/.gitignore b/.gitignore index 9308a4b..f26d29b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Node.js +node_modules/ + ## Core latex/pdflatex auxiliary files: *.aux *.lof diff --git a/apps/indexer/package-lock.json b/apps/indexer/package-lock.json new file mode 100644 index 0000000..1da2418 --- /dev/null +++ b/apps/indexer/package-lock.json @@ -0,0 +1,30 @@ +{ + "name": "indexer", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "indexer", + "version": "1.0.0", + "license": "ISC", + "devDependencies": { + "typescript": "^5.9.3" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + } + } +} diff --git a/apps/indexer/package.json b/apps/indexer/package.json new file mode 100644 index 0000000..8a2851f --- /dev/null +++ b/apps/indexer/package.json @@ -0,0 +1,16 @@ +{ + "name": "indexer", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "type": "commonjs", + "devDependencies": { + "typescript": "^5.9.3" + } +} diff --git a/apps/indexer/src/IndexQueue.ts b/apps/indexer/src/IndexQueue.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/indexer/src/IndexingEngine.ts b/apps/indexer/src/IndexingEngine.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/indexer/src/NoteChunker.ts b/apps/indexer/src/NoteChunker.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/indexer/src/adapters/EmbeddingAdapter.ts b/apps/indexer/src/adapters/EmbeddingAdapter.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/indexer/src/adapters/VaultAdapter.ts b/apps/indexer/src/adapters/VaultAdapter.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/indexer/src/index.ts b/apps/indexer/src/index.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/indexer/src/types.ts b/apps/indexer/src/types.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/indexer/tsconfig.json b/apps/indexer/tsconfig.json new file mode 100644 index 0000000..86b8bda --- /dev/null +++ b/apps/indexer/tsconfig.json @@ -0,0 +1,11 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "CommonJS", + "rootDir": "src", + "outDir": "dist", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + } +} \ No newline at end of file From 147fc279cef0549e84fc805b83e5bef5fe3c21e0 Mon Sep 17 00:00:00 2001 From: CodexSandboxOffline Date: Sun, 8 Mar 2026 18:28:17 +0530 Subject: [PATCH 2/7] feat(indexer): add core types and adapter interfaces --- apps/indexer/dist/IndexQueue.js | 2 ++ apps/indexer/dist/IndexingEngine.js | 2 ++ apps/indexer/dist/NoteChunker.js | 2 ++ .../indexer/dist/adapters/EmbeddingAdapter.js | 2 ++ apps/indexer/dist/adapters/VaultAdapter.js | 2 ++ apps/indexer/dist/index.js | 22 ++++++++++++++++ apps/indexer/dist/types.js | 2 ++ apps/indexer/src/IndexQueue.ts | 1 + apps/indexer/src/IndexingEngine.ts | 1 + apps/indexer/src/NoteChunker.ts | 1 + apps/indexer/src/adapters/EmbeddingAdapter.ts | 10 +++++++ apps/indexer/src/adapters/VaultAdapter.ts | 16 ++++++++++++ apps/indexer/src/index.ts | 7 +++++ apps/indexer/src/types.ts | 26 +++++++++++++++++++ 14 files changed, 96 insertions(+) create mode 100644 apps/indexer/dist/IndexQueue.js create mode 100644 apps/indexer/dist/IndexingEngine.js create mode 100644 apps/indexer/dist/NoteChunker.js create mode 100644 apps/indexer/dist/adapters/EmbeddingAdapter.js create mode 100644 apps/indexer/dist/adapters/VaultAdapter.js create mode 100644 apps/indexer/dist/index.js create mode 100644 apps/indexer/dist/types.js diff --git a/apps/indexer/dist/IndexQueue.js b/apps/indexer/dist/IndexQueue.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/apps/indexer/dist/IndexQueue.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/IndexingEngine.js b/apps/indexer/dist/IndexingEngine.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/apps/indexer/dist/IndexingEngine.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/NoteChunker.js b/apps/indexer/dist/NoteChunker.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/apps/indexer/dist/NoteChunker.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/adapters/EmbeddingAdapter.js b/apps/indexer/dist/adapters/EmbeddingAdapter.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/apps/indexer/dist/adapters/EmbeddingAdapter.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/adapters/VaultAdapter.js b/apps/indexer/dist/adapters/VaultAdapter.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/apps/indexer/dist/adapters/VaultAdapter.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/index.js b/apps/indexer/dist/index.js new file mode 100644 index 0000000..e52e951 --- /dev/null +++ b/apps/indexer/dist/index.js @@ -0,0 +1,22 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __exportStar = (this && this.__exportStar) || function(m, exports) { + for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); +}; +Object.defineProperty(exports, "__esModule", { value: true }); +__exportStar(require("./types"), exports); +__exportStar(require("./IndexingEngine"), exports); +__exportStar(require("./IndexQueue"), exports); +__exportStar(require("./NoteChunker"), exports); +__exportStar(require("./adapters/VaultAdapter"), exports); +__exportStar(require("./adapters/EmbeddingAdapter"), exports); diff --git a/apps/indexer/dist/types.js b/apps/indexer/dist/types.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/apps/indexer/dist/types.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/src/IndexQueue.ts b/apps/indexer/src/IndexQueue.ts index e69de29..336ce12 100644 --- a/apps/indexer/src/IndexQueue.ts +++ b/apps/indexer/src/IndexQueue.ts @@ -0,0 +1 @@ +export {} diff --git a/apps/indexer/src/IndexingEngine.ts b/apps/indexer/src/IndexingEngine.ts index e69de29..336ce12 100644 --- a/apps/indexer/src/IndexingEngine.ts +++ b/apps/indexer/src/IndexingEngine.ts @@ -0,0 +1 @@ +export {} diff --git a/apps/indexer/src/NoteChunker.ts b/apps/indexer/src/NoteChunker.ts index e69de29..336ce12 100644 --- a/apps/indexer/src/NoteChunker.ts +++ b/apps/indexer/src/NoteChunker.ts @@ -0,0 +1 @@ +export {} diff --git a/apps/indexer/src/adapters/EmbeddingAdapter.ts b/apps/indexer/src/adapters/EmbeddingAdapter.ts index e69de29..8b304db 100644 --- a/apps/indexer/src/adapters/EmbeddingAdapter.ts +++ b/apps/indexer/src/adapters/EmbeddingAdapter.ts @@ -0,0 +1,10 @@ +/** + * Adapter interface for embedding generation. + * Allows plugging different embedding models. + */ +export interface EmbeddingAdapter { + /** + * Generate embeddings for chunks. + */ + embed(chunks: string[]): Promise +} diff --git a/apps/indexer/src/adapters/VaultAdapter.ts b/apps/indexer/src/adapters/VaultAdapter.ts index e69de29..5664a24 100644 --- a/apps/indexer/src/adapters/VaultAdapter.ts +++ b/apps/indexer/src/adapters/VaultAdapter.ts @@ -0,0 +1,16 @@ +/** + * Adapter interface for reading notes from the vault. + * This keeps the indexing engine independent of the + * underlying filesystem implementation. + */ +export interface VaultAdapter { + /** + * Read the contents of a note. + */ + readNote(notePath: string): Promise + + /** + * List all notes in the vault. + */ + listNotes(): Promise +} diff --git a/apps/indexer/src/index.ts b/apps/indexer/src/index.ts index e69de29..9b54436 100644 --- a/apps/indexer/src/index.ts +++ b/apps/indexer/src/index.ts @@ -0,0 +1,7 @@ +export * from "./types" +export * from "./IndexingEngine" +export * from "./IndexQueue" +export * from "./NoteChunker" + +export * from "./adapters/VaultAdapter" +export * from "./adapters/EmbeddingAdapter" diff --git a/apps/indexer/src/types.ts b/apps/indexer/src/types.ts index e69de29..fc4d293 100644 --- a/apps/indexer/src/types.ts +++ b/apps/indexer/src/types.ts @@ -0,0 +1,26 @@ +/** + * Represents a semantic chunk extracted from a note. + */ +export type NoteChunk = { + id: string + notePath: string + text: string + position: number +} + +/** + * Job sent to the indexing queue. + */ +export type IndexJob = { + type: "update" | "delete" + notePath: string +} + +/** + * Result produced by the indexing pipeline. + */ +export type IndexResult = { + notePath: string + chunks: NoteChunk[] + embeddings: number[][] +} From e4e80f490d30a768b92b02945dfbe6ee1d11b86d Mon Sep 17 00:00:00 2001 From: CodexSandboxOffline Date: Sun, 8 Mar 2026 18:33:48 +0530 Subject: [PATCH 3/7] feat(indexer): implement note chunking and indexing queue --- apps/indexer/dist/IndexQueue.js | 32 +++++++++++++++++++++++++++++ apps/indexer/dist/NoteChunker.js | 31 ++++++++++++++++++++++++++++ apps/indexer/package-lock.json | 18 ++++++++++++++++ apps/indexer/package.json | 1 + apps/indexer/src/IndexQueue.ts | 35 +++++++++++++++++++++++++++++++- apps/indexer/src/NoteChunker.ts | 32 ++++++++++++++++++++++++++++- 6 files changed, 147 insertions(+), 2 deletions(-) diff --git a/apps/indexer/dist/IndexQueue.js b/apps/indexer/dist/IndexQueue.js index c8ad2e5..c1a76f3 100644 --- a/apps/indexer/dist/IndexQueue.js +++ b/apps/indexer/dist/IndexQueue.js @@ -1,2 +1,34 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.IndexQueue = void 0; +/** + * Simple sequential job queue for indexing tasks. + * Ensures indexing operations run in order. + */ +class IndexQueue { + constructor() { + this.queue = []; + this.running = false; + } + enqueue(job) { + this.queue.push(job); + } + async process(handler) { + if (this.running) + return; + this.running = true; + while (this.queue.length > 0) { + const job = this.queue.shift(); + if (!job) + continue; + try { + await handler(job); + } + catch (err) { + console.error("Index job failed:", err); + } + } + this.running = false; + } +} +exports.IndexQueue = IndexQueue; diff --git a/apps/indexer/dist/NoteChunker.js b/apps/indexer/dist/NoteChunker.js index c8ad2e5..81d7f36 100644 --- a/apps/indexer/dist/NoteChunker.js +++ b/apps/indexer/dist/NoteChunker.js @@ -1,2 +1,33 @@ "use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; Object.defineProperty(exports, "__esModule", { value: true }); +exports.NoteChunker = void 0; +const crypto_1 = __importDefault(require("crypto")); +/** + * Splits markdown notes into chunks. + * Current implementation is simple paragraph-based splitting. + */ +class NoteChunker { + split(notePath, markdown) { + const paragraphs = markdown + .split(/\n\s*\n/) + .map((p) => p.trim()) + .filter((p) => p.length > 0); + const chunks = paragraphs.map((text, index) => { + const id = crypto_1.default + .createHash("sha1") + .update(notePath + index + text) + .digest("hex"); + return { + id, + notePath, + text, + position: index, + }; + }); + return chunks; + } +} +exports.NoteChunker = NoteChunker; diff --git a/apps/indexer/package-lock.json b/apps/indexer/package-lock.json index 1da2418..57e0595 100644 --- a/apps/indexer/package-lock.json +++ b/apps/indexer/package-lock.json @@ -9,9 +9,20 @@ "version": "1.0.0", "license": "ISC", "devDependencies": { + "@types/node": "^25.3.5", "typescript": "^5.9.3" } }, + "node_modules/@types/node": { + "version": "25.3.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.5.tgz", + "integrity": "sha512-oX8xrhvpiyRCQkG1MFchB09f+cXftgIXb3a7UUa4Y3wpmZPw5tyZGTLWhlESOLq1Rq6oDlc8npVU2/9xiCuXMA==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.18.0" + } + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -25,6 +36,13 @@ "engines": { "node": ">=14.17" } + }, + "node_modules/undici-types": { + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "dev": true, + "license": "MIT" } } } diff --git a/apps/indexer/package.json b/apps/indexer/package.json index 8a2851f..a401b78 100644 --- a/apps/indexer/package.json +++ b/apps/indexer/package.json @@ -11,6 +11,7 @@ "license": "ISC", "type": "commonjs", "devDependencies": { + "@types/node": "^25.3.5", "typescript": "^5.9.3" } } diff --git a/apps/indexer/src/IndexQueue.ts b/apps/indexer/src/IndexQueue.ts index 336ce12..9deaec9 100644 --- a/apps/indexer/src/IndexQueue.ts +++ b/apps/indexer/src/IndexQueue.ts @@ -1 +1,34 @@ -export {} +import { IndexJob } from "./types" + +/** + * Simple sequential job queue for indexing tasks. + * Ensures indexing operations run in order. + */ +export class IndexQueue { + private queue: IndexJob[] = [] + private running = false + + enqueue(job: IndexJob) { + this.queue.push(job) + } + + async process(handler: (job: IndexJob) => Promise) { + if (this.running) return + + this.running = true + + while (this.queue.length > 0) { + const job = this.queue.shift() + + if (!job) continue + + try { + await handler(job) + } catch (err) { + console.error("Index job failed:", err) + } + } + + this.running = false + } +} diff --git a/apps/indexer/src/NoteChunker.ts b/apps/indexer/src/NoteChunker.ts index 336ce12..f29ed14 100644 --- a/apps/indexer/src/NoteChunker.ts +++ b/apps/indexer/src/NoteChunker.ts @@ -1 +1,31 @@ -export {} +import { NoteChunk } from "./types" +import crypto from "crypto" + +/** + * Splits markdown notes into chunks. + * Current implementation is simple paragraph-based splitting. + */ +export class NoteChunker { + split(notePath: string, markdown: string): NoteChunk[] { + const paragraphs = markdown + .split(/\n\s*\n/) + .map((p) => p.trim()) + .filter((p) => p.length > 0) + + const chunks: NoteChunk[] = paragraphs.map((text, index) => { + const id = crypto + .createHash("sha1") + .update(notePath + index + text) + .digest("hex") + + return { + id, + notePath, + text, + position: index, + } + }) + + return chunks + } +} From eb7514d64bc43f095352bd87d84427c53b27f15c Mon Sep 17 00:00:00 2001 From: CodexSandboxOffline Date: Sun, 8 Mar 2026 20:26:32 +0530 Subject: [PATCH 4/7] feat(indexer): implement core indexing engine pipeline --- apps/indexer/dist/IndexingEngine.js | 72 ++++++++++++++++++++++ apps/indexer/src/IndexingEngine.ts | 95 ++++++++++++++++++++++++++++- 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/apps/indexer/dist/IndexingEngine.js b/apps/indexer/dist/IndexingEngine.js index c8ad2e5..d67dd4b 100644 --- a/apps/indexer/dist/IndexingEngine.js +++ b/apps/indexer/dist/IndexingEngine.js @@ -1,2 +1,74 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.IndexingEngine = void 0; +const NoteChunker_1 = require("./NoteChunker"); +const IndexQueue_1 = require("./IndexQueue"); +/** + * Coordinates the incremental indexing pipeline. + */ +class IndexingEngine { + constructor(vault, embedder) { + this.vault = vault; + this.embedder = embedder; + this.chunker = new NoteChunker_1.NoteChunker(); + this.queue = new IndexQueue_1.IndexQueue(); + } + /** + * Schedule indexing for a note. + */ + scheduleUpdate(notePath) { + const job = { + type: "update", + notePath, + }; + this.queue.enqueue(job); + this.queue.process(this.processJob.bind(this)); + } + /** + * Schedule deletion of a note from the index. + */ + scheduleDelete(notePath) { + const job = { + type: "delete", + notePath, + }; + this.queue.enqueue(job); + this.queue.process(this.processJob.bind(this)); + } + /** + * Process jobs coming from the queue. + */ + async processJob(job) { + if (job.type === "update") { + await this.indexNote(job.notePath); + } + if (job.type === "delete") { + await this.removeNote(job.notePath); + } + } + /** + * Full indexing pipeline for a note. + */ + async indexNote(notePath) { + const markdown = await this.vault.readNote(notePath); + const chunks = this.chunker.split(notePath, markdown); + const chunkTexts = chunks.map((c) => c.text); + const embeddings = await this.embedder.embed(chunkTexts); + return { + notePath, + chunks, + embeddings, + }; + } + /** + * Remove a note from the index. + * (Implementation placeholder for future index storage layer) + */ + async removeNote(notePath) { + // Future implementation: + // remove embeddings from vector store + // remove chunks from registry table + console.log(`Remove note from index: ${notePath}`); + } +} +exports.IndexingEngine = IndexingEngine; diff --git a/apps/indexer/src/IndexingEngine.ts b/apps/indexer/src/IndexingEngine.ts index 336ce12..609e794 100644 --- a/apps/indexer/src/IndexingEngine.ts +++ b/apps/indexer/src/IndexingEngine.ts @@ -1 +1,94 @@ -export {} +import { VaultAdapter } from "./adapters/VaultAdapter" +import { EmbeddingAdapter } from "./adapters/EmbeddingAdapter" +import { NoteChunker } from "./NoteChunker" +import { IndexQueue } from "./IndexQueue" +import { IndexResult, IndexJob } from "./types" + +/** + * Coordinates the incremental indexing pipeline. + */ +export class IndexingEngine { + private vault: VaultAdapter + private embedder: EmbeddingAdapter + private chunker: NoteChunker + private queue: IndexQueue + + constructor(vault: VaultAdapter, embedder: EmbeddingAdapter) { + this.vault = vault + this.embedder = embedder + this.chunker = new NoteChunker() + this.queue = new IndexQueue() + } + + /** + * Schedule indexing for a note. + */ + scheduleUpdate(notePath: string) { + const job: IndexJob = { + type: "update", + notePath, + } + + this.queue.enqueue(job) + + this.queue.process(this.processJob.bind(this)) + } + + /** + * Schedule deletion of a note from the index. + */ + scheduleDelete(notePath: string) { + const job: IndexJob = { + type: "delete", + notePath, + } + + this.queue.enqueue(job) + + this.queue.process(this.processJob.bind(this)) + } + + /** + * Process jobs coming from the queue. + */ + private async processJob(job: IndexJob) { + if (job.type === "update") { + await this.indexNote(job.notePath) + } + + if (job.type === "delete") { + await this.removeNote(job.notePath) + } + } + + /** + * Full indexing pipeline for a note. + */ + private async indexNote(notePath: string): Promise { + const markdown = await this.vault.readNote(notePath) + + const chunks = this.chunker.split(notePath, markdown) + + const chunkTexts = chunks.map((c) => c.text) + + const embeddings = await this.embedder.embed(chunkTexts) + + return { + notePath, + chunks, + embeddings, + } + } + + /** + * Remove a note from the index. + * (Implementation placeholder for future index storage layer) + */ + private async removeNote(notePath: string) { + // Future implementation: + // remove embeddings from vector store + // remove chunks from registry table + + console.log(`Remove note from index: ${notePath}`) + } +} From 4ad0274090c6964f0fc812a960dab7cdf2ee5454 Mon Sep 17 00:00:00 2001 From: CodexSandboxOffline Date: Sun, 8 Mar 2026 20:30:21 +0530 Subject: [PATCH 5/7] feat(indexer): add storage adapter and demo indexing runner --- apps/indexer/dist/IndexingEngine.js | 13 +++--- apps/indexer/dist/adapters/IndexStore.js | 2 + apps/indexer/dist/demo/DemoRunner.js | 40 ++++++++++++++++ apps/indexer/dist/index.js | 1 + apps/indexer/src/IndexingEngine.ts | 22 +++++---- apps/indexer/src/adapters/IndexStore.ts | 21 +++++++++ apps/indexer/src/demo/DemoRunner.ts | 59 ++++++++++++++++++++++++ apps/indexer/src/index.ts | 1 + 8 files changed, 144 insertions(+), 15 deletions(-) create mode 100644 apps/indexer/dist/adapters/IndexStore.js create mode 100644 apps/indexer/dist/demo/DemoRunner.js create mode 100644 apps/indexer/src/adapters/IndexStore.ts create mode 100644 apps/indexer/src/demo/DemoRunner.ts diff --git a/apps/indexer/dist/IndexingEngine.js b/apps/indexer/dist/IndexingEngine.js index d67dd4b..a8e9c70 100644 --- a/apps/indexer/dist/IndexingEngine.js +++ b/apps/indexer/dist/IndexingEngine.js @@ -7,9 +7,10 @@ const IndexQueue_1 = require("./IndexQueue"); * Coordinates the incremental indexing pipeline. */ class IndexingEngine { - constructor(vault, embedder) { + constructor(vault, embedder, store) { this.vault = vault; this.embedder = embedder; + this.store = store; this.chunker = new NoteChunker_1.NoteChunker(); this.queue = new IndexQueue_1.IndexQueue(); } @@ -54,21 +55,19 @@ class IndexingEngine { const chunks = this.chunker.split(notePath, markdown); const chunkTexts = chunks.map((c) => c.text); const embeddings = await this.embedder.embed(chunkTexts); - return { + const result = { notePath, chunks, embeddings, }; + await this.store.saveChunks(notePath, chunks, embeddings); + return result; } /** * Remove a note from the index. - * (Implementation placeholder for future index storage layer) */ async removeNote(notePath) { - // Future implementation: - // remove embeddings from vector store - // remove chunks from registry table - console.log(`Remove note from index: ${notePath}`); + await this.store.deleteNote(notePath); } } exports.IndexingEngine = IndexingEngine; diff --git a/apps/indexer/dist/adapters/IndexStore.js b/apps/indexer/dist/adapters/IndexStore.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/apps/indexer/dist/adapters/IndexStore.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/demo/DemoRunner.js b/apps/indexer/dist/demo/DemoRunner.js new file mode 100644 index 0000000..1f0e18d --- /dev/null +++ b/apps/indexer/dist/demo/DemoRunner.js @@ -0,0 +1,40 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const IndexingEngine_1 = require("../IndexingEngine"); +/** + * Simple in-memory demo implementations + */ +class DemoVault { + async readNote(notePath) { + return ` +# Example Note + +This is the first paragraph. + +This is another paragraph about Smart Notes. +`; + } + async listNotes() { + return ["demo.md"]; + } +} +class DemoEmbedder { + async embed(chunks) { + return chunks.map(() => [Math.random(), Math.random(), Math.random()]); + } +} +class DemoStore { + async saveChunks(notePath, chunks, embeddings) { + console.log("Indexed note:", notePath); + console.log("Chunks:", chunks.length); + console.log("Embeddings:", embeddings.length); + } + async deleteNote(notePath) { + console.log("Deleted note:", notePath); + } +} +async function runDemo() { + const engine = new IndexingEngine_1.IndexingEngine(new DemoVault(), new DemoEmbedder(), new DemoStore()); + engine.scheduleUpdate("demo.md"); +} +runDemo(); diff --git a/apps/indexer/dist/index.js b/apps/indexer/dist/index.js index e52e951..5ad6917 100644 --- a/apps/indexer/dist/index.js +++ b/apps/indexer/dist/index.js @@ -20,3 +20,4 @@ __exportStar(require("./IndexQueue"), exports); __exportStar(require("./NoteChunker"), exports); __exportStar(require("./adapters/VaultAdapter"), exports); __exportStar(require("./adapters/EmbeddingAdapter"), exports); +__exportStar(require("./adapters/IndexStore"), exports); diff --git a/apps/indexer/src/IndexingEngine.ts b/apps/indexer/src/IndexingEngine.ts index 609e794..166ff97 100644 --- a/apps/indexer/src/IndexingEngine.ts +++ b/apps/indexer/src/IndexingEngine.ts @@ -1,5 +1,6 @@ import { VaultAdapter } from "./adapters/VaultAdapter" import { EmbeddingAdapter } from "./adapters/EmbeddingAdapter" +import { IndexStore } from "./adapters/IndexStore" import { NoteChunker } from "./NoteChunker" import { IndexQueue } from "./IndexQueue" import { IndexResult, IndexJob } from "./types" @@ -10,12 +11,18 @@ import { IndexResult, IndexJob } from "./types" export class IndexingEngine { private vault: VaultAdapter private embedder: EmbeddingAdapter + private store: IndexStore private chunker: NoteChunker private queue: IndexQueue - constructor(vault: VaultAdapter, embedder: EmbeddingAdapter) { + constructor( + vault: VaultAdapter, + embedder: EmbeddingAdapter, + store: IndexStore + ) { this.vault = vault this.embedder = embedder + this.store = store this.chunker = new NoteChunker() this.queue = new IndexQueue() } @@ -73,22 +80,21 @@ export class IndexingEngine { const embeddings = await this.embedder.embed(chunkTexts) - return { + const result: IndexResult = { notePath, chunks, embeddings, } + + await this.store.saveChunks(notePath, chunks, embeddings) + + return result } /** * Remove a note from the index. - * (Implementation placeholder for future index storage layer) */ private async removeNote(notePath: string) { - // Future implementation: - // remove embeddings from vector store - // remove chunks from registry table - - console.log(`Remove note from index: ${notePath}`) + await this.store.deleteNote(notePath) } } diff --git a/apps/indexer/src/adapters/IndexStore.ts b/apps/indexer/src/adapters/IndexStore.ts new file mode 100644 index 0000000..b2c9f8a --- /dev/null +++ b/apps/indexer/src/adapters/IndexStore.ts @@ -0,0 +1,21 @@ +import { NoteChunk } from "../types" + +/** + * Storage abstraction for indexed notes. + * Allows plugging SQLite / vector DB / other stores. + */ +export interface IndexStore { + /** + * Store indexed chunks and embeddings. + */ + saveChunks( + notePath: string, + chunks: NoteChunk[], + embeddings: number[][] + ): Promise + + /** + * Remove all chunks belonging to a note. + */ + deleteNote(notePath: string): Promise +} diff --git a/apps/indexer/src/demo/DemoRunner.ts b/apps/indexer/src/demo/DemoRunner.ts new file mode 100644 index 0000000..73d13e7 --- /dev/null +++ b/apps/indexer/src/demo/DemoRunner.ts @@ -0,0 +1,59 @@ +import { IndexingEngine } from "../IndexingEngine" +import { VaultAdapter } from "../adapters/VaultAdapter" +import { EmbeddingAdapter } from "../adapters/EmbeddingAdapter" +import { IndexStore } from "../adapters/IndexStore" +import { NoteChunk } from "../types" + +/** + * Simple in-memory demo implementations + */ + +class DemoVault implements VaultAdapter { + async readNote(notePath: string): Promise { + return ` +# Example Note + +This is the first paragraph. + +This is another paragraph about Smart Notes. +` + } + + async listNotes(): Promise { + return ["demo.md"] + } +} + +class DemoEmbedder implements EmbeddingAdapter { + async embed(chunks: string[]): Promise { + return chunks.map(() => [Math.random(), Math.random(), Math.random()]) + } +} + +class DemoStore implements IndexStore { + async saveChunks( + notePath: string, + chunks: NoteChunk[], + embeddings: number[][] + ): Promise { + console.log("Indexed note:", notePath) + console.log("Chunks:", chunks.length) + console.log("Embeddings:", embeddings.length) + } + + async deleteNote(notePath: string): Promise { + console.log("Deleted note:", notePath) + } +} + +async function runDemo() { + const engine = new IndexingEngine( + new DemoVault(), + new DemoEmbedder(), + new DemoStore() + ) + + engine.scheduleUpdate("demo.md") +} + +runDemo() diff --git a/apps/indexer/src/index.ts b/apps/indexer/src/index.ts index 9b54436..fca5706 100644 --- a/apps/indexer/src/index.ts +++ b/apps/indexer/src/index.ts @@ -5,3 +5,4 @@ export * from "./NoteChunker" export * from "./adapters/VaultAdapter" export * from "./adapters/EmbeddingAdapter" +export * from "./adapters/IndexStore" From fc753214ce0f9ae8e86a72a3d03f0de08c569228 Mon Sep 17 00:00:00 2001 From: CodexSandboxOffline Date: Sun, 8 Mar 2026 20:45:47 +0530 Subject: [PATCH 6/7] chore(indexer): ignore build artifacts and untrack dist output --- apps/indexer/.gitignore | 2 + apps/indexer/dist/IndexQueue.js | 34 --------- apps/indexer/dist/IndexingEngine.js | 73 ------------------- apps/indexer/dist/NoteChunker.js | 33 --------- .../indexer/dist/adapters/EmbeddingAdapter.js | 2 - apps/indexer/dist/adapters/IndexStore.js | 2 - apps/indexer/dist/adapters/VaultAdapter.js | 2 - apps/indexer/dist/demo/DemoRunner.js | 40 ---------- apps/indexer/dist/index.js | 23 ------ apps/indexer/dist/types.js | 2 - 10 files changed, 2 insertions(+), 211 deletions(-) create mode 100644 apps/indexer/.gitignore delete mode 100644 apps/indexer/dist/IndexQueue.js delete mode 100644 apps/indexer/dist/IndexingEngine.js delete mode 100644 apps/indexer/dist/NoteChunker.js delete mode 100644 apps/indexer/dist/adapters/EmbeddingAdapter.js delete mode 100644 apps/indexer/dist/adapters/IndexStore.js delete mode 100644 apps/indexer/dist/adapters/VaultAdapter.js delete mode 100644 apps/indexer/dist/demo/DemoRunner.js delete mode 100644 apps/indexer/dist/index.js delete mode 100644 apps/indexer/dist/types.js diff --git a/apps/indexer/.gitignore b/apps/indexer/.gitignore new file mode 100644 index 0000000..de4d1f0 --- /dev/null +++ b/apps/indexer/.gitignore @@ -0,0 +1,2 @@ +dist +node_modules diff --git a/apps/indexer/dist/IndexQueue.js b/apps/indexer/dist/IndexQueue.js deleted file mode 100644 index c1a76f3..0000000 --- a/apps/indexer/dist/IndexQueue.js +++ /dev/null @@ -1,34 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.IndexQueue = void 0; -/** - * Simple sequential job queue for indexing tasks. - * Ensures indexing operations run in order. - */ -class IndexQueue { - constructor() { - this.queue = []; - this.running = false; - } - enqueue(job) { - this.queue.push(job); - } - async process(handler) { - if (this.running) - return; - this.running = true; - while (this.queue.length > 0) { - const job = this.queue.shift(); - if (!job) - continue; - try { - await handler(job); - } - catch (err) { - console.error("Index job failed:", err); - } - } - this.running = false; - } -} -exports.IndexQueue = IndexQueue; diff --git a/apps/indexer/dist/IndexingEngine.js b/apps/indexer/dist/IndexingEngine.js deleted file mode 100644 index a8e9c70..0000000 --- a/apps/indexer/dist/IndexingEngine.js +++ /dev/null @@ -1,73 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.IndexingEngine = void 0; -const NoteChunker_1 = require("./NoteChunker"); -const IndexQueue_1 = require("./IndexQueue"); -/** - * Coordinates the incremental indexing pipeline. - */ -class IndexingEngine { - constructor(vault, embedder, store) { - this.vault = vault; - this.embedder = embedder; - this.store = store; - this.chunker = new NoteChunker_1.NoteChunker(); - this.queue = new IndexQueue_1.IndexQueue(); - } - /** - * Schedule indexing for a note. - */ - scheduleUpdate(notePath) { - const job = { - type: "update", - notePath, - }; - this.queue.enqueue(job); - this.queue.process(this.processJob.bind(this)); - } - /** - * Schedule deletion of a note from the index. - */ - scheduleDelete(notePath) { - const job = { - type: "delete", - notePath, - }; - this.queue.enqueue(job); - this.queue.process(this.processJob.bind(this)); - } - /** - * Process jobs coming from the queue. - */ - async processJob(job) { - if (job.type === "update") { - await this.indexNote(job.notePath); - } - if (job.type === "delete") { - await this.removeNote(job.notePath); - } - } - /** - * Full indexing pipeline for a note. - */ - async indexNote(notePath) { - const markdown = await this.vault.readNote(notePath); - const chunks = this.chunker.split(notePath, markdown); - const chunkTexts = chunks.map((c) => c.text); - const embeddings = await this.embedder.embed(chunkTexts); - const result = { - notePath, - chunks, - embeddings, - }; - await this.store.saveChunks(notePath, chunks, embeddings); - return result; - } - /** - * Remove a note from the index. - */ - async removeNote(notePath) { - await this.store.deleteNote(notePath); - } -} -exports.IndexingEngine = IndexingEngine; diff --git a/apps/indexer/dist/NoteChunker.js b/apps/indexer/dist/NoteChunker.js deleted file mode 100644 index 81d7f36..0000000 --- a/apps/indexer/dist/NoteChunker.js +++ /dev/null @@ -1,33 +0,0 @@ -"use strict"; -var __importDefault = (this && this.__importDefault) || function (mod) { - return (mod && mod.__esModule) ? mod : { "default": mod }; -}; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.NoteChunker = void 0; -const crypto_1 = __importDefault(require("crypto")); -/** - * Splits markdown notes into chunks. - * Current implementation is simple paragraph-based splitting. - */ -class NoteChunker { - split(notePath, markdown) { - const paragraphs = markdown - .split(/\n\s*\n/) - .map((p) => p.trim()) - .filter((p) => p.length > 0); - const chunks = paragraphs.map((text, index) => { - const id = crypto_1.default - .createHash("sha1") - .update(notePath + index + text) - .digest("hex"); - return { - id, - notePath, - text, - position: index, - }; - }); - return chunks; - } -} -exports.NoteChunker = NoteChunker; diff --git a/apps/indexer/dist/adapters/EmbeddingAdapter.js b/apps/indexer/dist/adapters/EmbeddingAdapter.js deleted file mode 100644 index c8ad2e5..0000000 --- a/apps/indexer/dist/adapters/EmbeddingAdapter.js +++ /dev/null @@ -1,2 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/adapters/IndexStore.js b/apps/indexer/dist/adapters/IndexStore.js deleted file mode 100644 index c8ad2e5..0000000 --- a/apps/indexer/dist/adapters/IndexStore.js +++ /dev/null @@ -1,2 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/adapters/VaultAdapter.js b/apps/indexer/dist/adapters/VaultAdapter.js deleted file mode 100644 index c8ad2e5..0000000 --- a/apps/indexer/dist/adapters/VaultAdapter.js +++ /dev/null @@ -1,2 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/apps/indexer/dist/demo/DemoRunner.js b/apps/indexer/dist/demo/DemoRunner.js deleted file mode 100644 index 1f0e18d..0000000 --- a/apps/indexer/dist/demo/DemoRunner.js +++ /dev/null @@ -1,40 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -const IndexingEngine_1 = require("../IndexingEngine"); -/** - * Simple in-memory demo implementations - */ -class DemoVault { - async readNote(notePath) { - return ` -# Example Note - -This is the first paragraph. - -This is another paragraph about Smart Notes. -`; - } - async listNotes() { - return ["demo.md"]; - } -} -class DemoEmbedder { - async embed(chunks) { - return chunks.map(() => [Math.random(), Math.random(), Math.random()]); - } -} -class DemoStore { - async saveChunks(notePath, chunks, embeddings) { - console.log("Indexed note:", notePath); - console.log("Chunks:", chunks.length); - console.log("Embeddings:", embeddings.length); - } - async deleteNote(notePath) { - console.log("Deleted note:", notePath); - } -} -async function runDemo() { - const engine = new IndexingEngine_1.IndexingEngine(new DemoVault(), new DemoEmbedder(), new DemoStore()); - engine.scheduleUpdate("demo.md"); -} -runDemo(); diff --git a/apps/indexer/dist/index.js b/apps/indexer/dist/index.js deleted file mode 100644 index 5ad6917..0000000 --- a/apps/indexer/dist/index.js +++ /dev/null @@ -1,23 +0,0 @@ -"use strict"; -var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { - if (k2 === undefined) k2 = k; - var desc = Object.getOwnPropertyDescriptor(m, k); - if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { - desc = { enumerable: true, get: function() { return m[k]; } }; - } - Object.defineProperty(o, k2, desc); -}) : (function(o, m, k, k2) { - if (k2 === undefined) k2 = k; - o[k2] = m[k]; -})); -var __exportStar = (this && this.__exportStar) || function(m, exports) { - for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); -}; -Object.defineProperty(exports, "__esModule", { value: true }); -__exportStar(require("./types"), exports); -__exportStar(require("./IndexingEngine"), exports); -__exportStar(require("./IndexQueue"), exports); -__exportStar(require("./NoteChunker"), exports); -__exportStar(require("./adapters/VaultAdapter"), exports); -__exportStar(require("./adapters/EmbeddingAdapter"), exports); -__exportStar(require("./adapters/IndexStore"), exports); diff --git a/apps/indexer/dist/types.js b/apps/indexer/dist/types.js deleted file mode 100644 index c8ad2e5..0000000 --- a/apps/indexer/dist/types.js +++ /dev/null @@ -1,2 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); From 487fbfa04f05935feb8b60a5408be7341de1015c Mon Sep 17 00:00:00 2001 From: CodexSandboxOffline Date: Mon, 9 Mar 2026 10:43:35 +0530 Subject: [PATCH 7/7] refactor(indexer): address automated review feedback --- apps/indexer/package.json | 3 ++- apps/indexer/src/IndexQueue.ts | 25 +++++++++++-------------- apps/indexer/src/IndexingEngine.ts | 14 ++++++++++---- apps/indexer/src/NoteChunker.ts | 4 ++-- apps/indexer/src/adapters/IndexStore.ts | 5 ++++- apps/indexer/tsconfig.json | 5 +++-- 6 files changed, 32 insertions(+), 24 deletions(-) diff --git a/apps/indexer/package.json b/apps/indexer/package.json index a401b78..1020d10 100644 --- a/apps/indexer/package.json +++ b/apps/indexer/package.json @@ -2,8 +2,9 @@ "name": "indexer", "version": "1.0.0", "description": "", - "main": "index.js", + "main": "dist/index.js", "scripts": { + "build": "tsc", "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], diff --git a/apps/indexer/src/IndexQueue.ts b/apps/indexer/src/IndexQueue.ts index 9deaec9..8de3026 100644 --- a/apps/indexer/src/IndexQueue.ts +++ b/apps/indexer/src/IndexQueue.ts @@ -6,29 +6,26 @@ import { IndexJob } from "./types" */ export class IndexQueue { private queue: IndexJob[] = [] - private running = false + private processing: Promise | null = null enqueue(job: IndexJob) { this.queue.push(job) } - async process(handler: (job: IndexJob) => Promise) { - if (this.running) return + process(handler: (job: IndexJob) => Promise): Promise { + if (this.processing) return this.processing - this.running = true + this.processing = (async () => { + while (this.queue.length > 0) { + const job = this.queue.shift() + if (!job) continue - while (this.queue.length > 0) { - const job = this.queue.shift() - - if (!job) continue - - try { await handler(job) - } catch (err) { - console.error("Index job failed:", err) } - } + })().finally(() => { + this.processing = null + }) - this.running = false + return this.processing } } diff --git a/apps/indexer/src/IndexingEngine.ts b/apps/indexer/src/IndexingEngine.ts index 166ff97..65ef271 100644 --- a/apps/indexer/src/IndexingEngine.ts +++ b/apps/indexer/src/IndexingEngine.ts @@ -30,7 +30,7 @@ export class IndexingEngine { /** * Schedule indexing for a note. */ - scheduleUpdate(notePath: string) { + scheduleUpdate(notePath: string): Promise { const job: IndexJob = { type: "update", notePath, @@ -38,13 +38,13 @@ export class IndexingEngine { this.queue.enqueue(job) - this.queue.process(this.processJob.bind(this)) + return this.queue.process(this.processJob.bind(this)) } /** * Schedule deletion of a note from the index. */ - scheduleDelete(notePath: string) { + scheduleDelete(notePath: string): Promise { const job: IndexJob = { type: "delete", notePath, @@ -52,7 +52,7 @@ export class IndexingEngine { this.queue.enqueue(job) - this.queue.process(this.processJob.bind(this)) + return this.queue.process(this.processJob.bind(this)) } /** @@ -80,6 +80,12 @@ export class IndexingEngine { const embeddings = await this.embedder.embed(chunkTexts) + if (embeddings.length !== chunks.length) { + throw new Error( + `Embedding adapter returned ${embeddings.length} embeddings for ${chunks.length} chunks` + ) + } + const result: IndexResult = { notePath, chunks, diff --git a/apps/indexer/src/NoteChunker.ts b/apps/indexer/src/NoteChunker.ts index f29ed14..5da928f 100644 --- a/apps/indexer/src/NoteChunker.ts +++ b/apps/indexer/src/NoteChunker.ts @@ -1,4 +1,4 @@ -import { NoteChunk } from "./types" +import type { NoteChunk } from "./types" import crypto from "crypto" /** @@ -15,7 +15,7 @@ export class NoteChunker { const chunks: NoteChunk[] = paragraphs.map((text, index) => { const id = crypto .createHash("sha1") - .update(notePath + index + text) + .update(`${notePath}\0${index}\0${text}`) .digest("hex") return { diff --git a/apps/indexer/src/adapters/IndexStore.ts b/apps/indexer/src/adapters/IndexStore.ts index b2c9f8a..4c08063 100644 --- a/apps/indexer/src/adapters/IndexStore.ts +++ b/apps/indexer/src/adapters/IndexStore.ts @@ -6,7 +6,10 @@ import { NoteChunk } from "../types" */ export interface IndexStore { /** - * Store indexed chunks and embeddings. + * Atomically replace all indexed chunks and embeddings for the given notePath. + * + * Implementations must remove any previously stored chunks that no longer + * exist after a note edit. */ saveChunks( notePath: string, diff --git a/apps/indexer/tsconfig.json b/apps/indexer/tsconfig.json index 86b8bda..39c5bd9 100644 --- a/apps/indexer/tsconfig.json +++ b/apps/indexer/tsconfig.json @@ -7,5 +7,6 @@ "strict": true, "esModuleInterop": true, "skipLibCheck": true - } -} \ No newline at end of file + }, + "include": ["src"] +}