feat: use llama

2026-02-01 13:03:51 +01:00
parent e51942a9b2
commit 7d1ef8a7e5
10 changed files with 13205 additions and 1058 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,2 +1,3 @@
 DATABASE_URL=""
 USER_KEY=""
+OVHCLOUD_API_KEY=""
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
  "name": "siri-shortcuts",
  "version": "1.0.0",
-  "description": "Siri-enabled, Anthropic-powered shortcuts",
+  "description": "Siri-enabled, Llama-powered shortcuts",
  "author": "riccardo@frompixels.com",
  "scripts": {
    "dev": "next dev",
@@ -25,12 +25,12 @@
  },
  "dependencies": {
    "@prisma/client": "^5.22.0",
-    "ai": "^5.0.68",
+    "openai": "^4.77.0",
    "axios": "^1.12.0",
    "next": "^15.5.9",
    "react": "^19.0.0",
    "react-dom": "^19.0.0",
-    "zod": "^4.1.12"
+    "zod": "^3.23.8"
  },
  "devDependencies": {
    "@commitlint/cli": "^19.6.1",
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -7,7 +7,7 @@ datasource db {
  url      = env("DATABASE_URL")
 }

-model AnthropicQuery {
+model AiQuery {
  id          String   @id @default(cuid())
  question    String   @db.Text
  response    String   @db.Text
@@ -17,7 +17,7 @@ model AnthropicQuery {
  createdAt   DateTime @default(now())
  updatedAt   DateTime @updatedAt
  
-  @@map("anthropic_queries")
+  @@map("ai_queries")
  @@index([createdAt])
  @@index([success])
 }
--- a/test-request.js
+++ b/test-request.js
@@ -4,7 +4,7 @@ const fetch = require('axios');
 async function testAPI() {
  try {
    const response = await fetch.post('http://localhost:3000/api/shortcut', {
-      command: 'anthropic',
+      command: 'llama',
      parameters: {
        question: 'What is 42?'
      },
--- a/utils/aiGatewayClient.ts
+++ b/utils/aiGatewayClient.ts
@@ -1,54 +1,90 @@
-import { generateText } from 'ai';
+import OpenAI from 'openai';

-interface AiGatewayResponse {
+let ovhAI: OpenAI | null = null;
+
+function getClient(): OpenAI {
+  if (!ovhAI) {
+    ovhAI = new OpenAI({
+      apiKey: process.env.OVHCLOUD_API_KEY,
+      baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1'
+    });
+  }
+  return ovhAI;
+}
+
+interface AiResponse {
  text: string;
  tokensUsed: number;
 }

-export async function queryAiGateway(
-  text: string,
-  model: string
-): Promise<AiGatewayResponse> {
+const MAX_RETRIES = 3;
+
+export async function queryAi(text: string): Promise<AiResponse> {
  const requestId = Math.random().toString(36).substring(7);
  const startTime = Date.now();

-  console.info(`[AI-${requestId}] Starting Vercel Gateway AI request`, {
+  console.info(`[AI-${requestId}] Starting OVH AI request`, {
    promptLength: text.length,
-    model,
    timestamp: new Date().toISOString()
  });

-  try {
-    const response = await generateText({
-      model,
-      prompt: text
-    });
+  let lastError: Error | null = null;

-    const duration = Date.now() - startTime;
-    const tokensUsed = response.usage?.totalTokens || 0;
+  for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+    try {
+      console.info(
+        `[AI-${requestId}] OVH AI request attempt ${attempt}/${MAX_RETRIES}`
+      );

-    console.info(
-      `[AI-${requestId}] Vercel Gateway AI response received in ${duration}ms`,
-      {
-        responseLength: response.text.length,
-        tokensUsed,
-        usage: response.usage
+      const completion = await getClient().chat.completions.create({
+        model: 'Meta-Llama-3_3-70B-Instruct',
+        temperature: 0.7,
+        max_tokens: 4096,
+        messages: [
+          {
+            role: 'user',
+            content: text
+          }
+        ]
+      });
+
+      const responseText = completion.choices[0]?.message?.content || '';
+      const tokensUsed =
+        (completion.usage?.prompt_tokens || 0) +
+        (completion.usage?.completion_tokens || 0);
+
+      const duration = Date.now() - startTime;
+      console.info(
+        `[AI-${requestId}] OVH AI response received in ${duration}ms`,
+        {
+          responseLength: responseText.length,
+          tokensUsed,
+          usage: completion.usage
+        }
+      );
+
+      return {
+        text: responseText,
+        tokensUsed
+      };
+    } catch (error) {
+      const duration = Date.now() - startTime;
+      console.error(
+        `[AI-${requestId}] OVH AI attempt ${attempt} failed after ${duration}ms:`,
+        {
+          error: error instanceof Error ? error.message : String(error),
+          promptLength: text.length
+        }
+      );
+      lastError = error as Error;
+
+      if (attempt < MAX_RETRIES) {
+        const delay = 1000 * attempt;
+        console.info(`[AI-${requestId}] Retrying in ${delay}ms...`);
+        await new Promise(resolve => setTimeout(resolve, delay));
      }
-    );
-
-    return {
-      text: response.text,
-      tokensUsed
-    };
-  } catch (error) {
-    const duration = Date.now() - startTime;
-    console.error(
-      `[AI-${requestId}] Vercel Gateway AI error after ${duration}ms:`,
-      {
-        error: error instanceof Error ? error.message : String(error),
-        promptLength: text.length
-      }
-    );
-    throw new Error(`Vercel Gateway AI error: ${JSON.stringify(error)}.`);
+    }
  }
+
+  throw lastError || new Error('OVH AI error: all retry attempts failed.');
 }
--- a/utils/commands/anthropic.ts
+++ b/utils/commands/anthropic.ts
@@ -1,8 +1,8 @@
-import { queryAiGateway } from '@utils/aiGatewayClient';
+import { queryAi } from '@utils/aiGatewayClient';
 import { ShortcutsResponse } from '../types';
 import { dbOperations } from '@utils/db';

-export async function anthropicCommand(
+export async function llamaCommand(
  parameters: Record<string, string> | undefined
 ): Promise<ShortcutsResponse> {
  const commandId = Math.random().toString(36).substring(7);
@@ -14,7 +14,7 @@ export async function anthropicCommand(
  let errorMessage: string | undefined;
  let tokensUsed: number | undefined;

-  console.info(`[CMD-${commandId}] Anthropic command started`, {
+  console.info(`[CMD-${commandId}] Llama command started`, {
    hasParameters: !!parameters,
    timestamp: new Date().toISOString()
  });
@@ -41,17 +41,14 @@ export async function anthropicCommand(
      question +
      '. Structure the response in a manner suitable for spoken communication.';

-    const anthropicResponse = await queryAiGateway(
-      prompt,
-      'anthropic/claude-sonnet-4.5'
-    );
-    response = anthropicResponse.text;
-    tokensUsed = anthropicResponse.tokensUsed;
+    const aiResponse = await queryAi(prompt);
+    response = aiResponse.text;
+    tokensUsed = aiResponse.tokensUsed;
    success = true;

    const duration = Date.now() - startTime;
    console.info(
-      `[CMD-${commandId}] Anthropic command completed in ${duration}ms`,
+      `[CMD-${commandId}] Llama command completed in ${duration}ms`,
      {
        responseLength: response.length,
        tokensUsed,
@@ -69,12 +66,12 @@ export async function anthropicCommand(
  } catch (error) {
    const duration = Date.now() - startTime;
    console.error(
-      `[CMD-${commandId}] Anthropic command failed after ${duration}ms:`,
+      `[CMD-${commandId}] Llama command failed after ${duration}ms:`,
      error
    );
    success = false;
    errorMessage = error instanceof Error ? error.message : 'Unknown error';
-    response = 'Sorry. There was a problem with Anthropic.';
+    response = 'Sorry. There was a problem with the AI service.';

    return {
      success: false,
@@ -84,7 +81,7 @@ export async function anthropicCommand(
    if (question) {
      try {
        console.info(`[CMD-${commandId}] Saving query to database`);
-        await dbOperations.saveAnthropicQuery({
+        await dbOperations.saveQuery({
          question,
          response,
          success,
--- a/utils/db.ts
+++ b/utils/db.ts
@@ -11,7 +11,7 @@ if (process.env.NODE_ENV === 'development') {
 }

 export const dbOperations = {
-  async saveAnthropicQuery({
+  async saveQuery({
    question,
    response,
    success,
@@ -25,7 +25,7 @@ export const dbOperations = {
    tokensUsed?: number;
  }) {
    try {
-      return await db.anthropicQuery.create({
+      return await db.aiQuery.create({
        data: {
          question,
          response,
--- a/utils/registry.ts
+++ b/utils/registry.ts
@@ -1,7 +1,7 @@
 import { ShortcutsResponse } from './types';
 import { pingCommand } from './commands/ping';
 import { timeCommand } from './commands/time';
-import { anthropicCommand } from './commands/anthropic';
+import { llamaCommand } from './commands/llama';

 type CommandHandler = (
  parameters?: Record<string, string>
@@ -18,7 +18,7 @@ export class CommandRegistry {
  private registerDefaultCommands() {
    this.register('ping', pingCommand);
    this.register('time', timeCommand);
-    this.register('anthropic', anthropicCommand);
+    this.register('llama', llamaCommand);
  }

  register(command: string, handler: CommandHandler) {
--- a/yarn.lock
+++ b/yarn.lock