feat: use llama

2026-02-01 13:03:51 +01:00
parent e51942a9b2
commit 7d1ef8a7e5
10 changed files with 13205 additions and 1058 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,2 +1,3 @@
 DATABASE_URL=""
 USER_KEY=""
 OVHCLOUD_API_KEY=""
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
  "name": "siri-shortcuts",
  "version": "1.0.0",
-  "description": "Siri-enabled, Anthropic-powered shortcuts",
+  "description": "Siri-enabled, Llama-powered shortcuts",
  "author": "riccardo@frompixels.com",
  "scripts": {
    "dev": "next dev",
@@ -25,12 +25,12 @@
  },
  "dependencies": {
    "@prisma/client": "^5.22.0",
-    "ai": "^5.0.68",
+    "openai": "^4.77.0",
    "axios": "^1.12.0",
    "next": "^15.5.9",
    "react": "^19.0.0",
    "react-dom": "^19.0.0",
-    "zod": "^4.1.12"
+    "zod": "^3.23.8"
  },
  "devDependencies": {
    "@commitlint/cli": "^19.6.1",
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -7,7 +7,7 @@ datasource db {
  url      = env("DATABASE_URL")
 }
-model AnthropicQuery {
+model AiQuery {
  id          String   @id @default(cuid())
  question    String   @db.Text
  response    String   @db.Text
@@ -17,7 +17,7 @@ model AnthropicQuery {
  createdAt   DateTime @default(now())
  updatedAt   DateTime @updatedAt
-  @@map("anthropic_queries")
+  @@map("ai_queries")
  @@index([createdAt])
  @@index([success])
 }
--- a/test-request.js
+++ b/test-request.js
@@ -4,7 +4,7 @@ const fetch = require('axios');
 async function testAPI() {
  try {
    const response = await fetch.post('http://localhost:3000/api/shortcut', {
-      command: 'anthropic',
+      command: 'llama',
      parameters: {
        question: 'What is 42?'
      },
--- a/utils/aiGatewayClient.ts
+++ b/utils/aiGatewayClient.ts
@@ -1,54 +1,90 @@
-import { generateText } from 'ai';
+import OpenAI from 'openai';
-interface AiGatewayResponse {
+let ovhAI: OpenAI | null = null;
 function getClient(): OpenAI {
  if (!ovhAI) {
    ovhAI = new OpenAI({
      apiKey: process.env.OVHCLOUD_API_KEY,
      baseURL: 'https://oai.endpoints.kepler.ai.cloud.ovh.net/v1'
    });
  }
  return ovhAI;
 }
 interface AiResponse {
  text: string;
  tokensUsed: number;
 }
-export async function queryAiGateway(
+const MAX_RETRIES = 3;
-  text: string,
+
-  model: string
+export async function queryAi(text: string): Promise<AiResponse> {
 ): Promise<AiGatewayResponse> {
  const requestId = Math.random().toString(36).substring(7);
  const startTime = Date.now();
-  console.info(`[AI-${requestId}] Starting Vercel Gateway AI request`, {
+  console.info(`[AI-${requestId}] Starting OVH AI request`, {
    promptLength: text.length,
    model,
    timestamp: new Date().toISOString()
  });
  let lastError: Error | null = null;
  for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
    try {
-    const response = await generateText({
+      console.info(
-      model,
+        `[AI-${requestId}] OVH AI request attempt ${attempt}/${MAX_RETRIES}`
-      prompt: text
+      );
      const completion = await getClient().chat.completions.create({
        model: 'Meta-Llama-3_3-70B-Instruct',
        temperature: 0.7,
        max_tokens: 4096,
        messages: [
          {
            role: 'user',
            content: text
          }
        ]
      });
-    const duration = Date.now() - startTime;
+      const responseText = completion.choices[0]?.message?.content || '';
-    const tokensUsed = response.usage?.totalTokens || 0;
+      const tokensUsed =
        (completion.usage?.prompt_tokens || 0) +
        (completion.usage?.completion_tokens || 0);
      const duration = Date.now() - startTime;
      console.info(
-      `[AI-${requestId}] Vercel Gateway AI response received in ${duration}ms`,
+        `[AI-${requestId}] OVH AI response received in ${duration}ms`,
        {
-        responseLength: response.text.length,
+          responseLength: responseText.length,
          tokensUsed,
-        usage: response.usage
+          usage: completion.usage
        }
      );
      return {
-      text: response.text,
+        text: responseText,
        tokensUsed
      };
    } catch (error) {
      const duration = Date.now() - startTime;
      console.error(
-      `[AI-${requestId}] Vercel Gateway AI error after ${duration}ms:`,
+        `[AI-${requestId}] OVH AI attempt ${attempt} failed after ${duration}ms:`,
        {
          error: error instanceof Error ? error.message : String(error),
          promptLength: text.length
        }
      );
-    throw new Error(`Vercel Gateway AI error: ${JSON.stringify(error)}.`);
+      lastError = error as Error;
      if (attempt < MAX_RETRIES) {
        const delay = 1000 * attempt;
        console.info(`[AI-${requestId}] Retrying in ${delay}ms...`);
        await new Promise(resolve => setTimeout(resolve, delay));
      }
    }
  }
  throw lastError || new Error('OVH AI error: all retry attempts failed.');
 }
--- a/utils/commands/anthropic.ts
+++ b/utils/commands/anthropic.ts
@@ -1,8 +1,8 @@
-import { queryAiGateway } from '@utils/aiGatewayClient';
+import { queryAi } from '@utils/aiGatewayClient';
 import { ShortcutsResponse } from '../types';
 import { dbOperations } from '@utils/db';
-export async function anthropicCommand(
+export async function llamaCommand(
  parameters: Record<string, string> | undefined
 ): Promise<ShortcutsResponse> {
  const commandId = Math.random().toString(36).substring(7);
@@ -14,7 +14,7 @@ export async function anthropicCommand(
  let errorMessage: string | undefined;
  let tokensUsed: number | undefined;
-  console.info(`[CMD-${commandId}] Anthropic command started`, {
+  console.info(`[CMD-${commandId}] Llama command started`, {
    hasParameters: !!parameters,
    timestamp: new Date().toISOString()
  });
@@ -41,17 +41,14 @@ export async function anthropicCommand(
      question +
      '. Structure the response in a manner suitable for spoken communication.';
-    const anthropicResponse = await queryAiGateway(
+    const aiResponse = await queryAi(prompt);
-      prompt,
+    response = aiResponse.text;
-      'anthropic/claude-sonnet-4.5'
+    tokensUsed = aiResponse.tokensUsed;
    );
    response = anthropicResponse.text;
    tokensUsed = anthropicResponse.tokensUsed;
    success = true;
    const duration = Date.now() - startTime;
    console.info(
-      `[CMD-${commandId}] Anthropic command completed in ${duration}ms`,
+      `[CMD-${commandId}] Llama command completed in ${duration}ms`,
      {
        responseLength: response.length,
        tokensUsed,
@@ -69,12 +66,12 @@ export async function anthropicCommand(
  } catch (error) {
    const duration = Date.now() - startTime;
    console.error(
-      `[CMD-${commandId}] Anthropic command failed after ${duration}ms:`,
+      `[CMD-${commandId}] Llama command failed after ${duration}ms:`,
      error
    );
    success = false;
    errorMessage = error instanceof Error ? error.message : 'Unknown error';
-    response = 'Sorry. There was a problem with Anthropic.';
+    response = 'Sorry. There was a problem with the AI service.';
    return {
      success: false,
@@ -84,7 +81,7 @@ export async function anthropicCommand(
    if (question) {
      try {
        console.info(`[CMD-${commandId}] Saving query to database`);
-        await dbOperations.saveAnthropicQuery({
+        await dbOperations.saveQuery({
          question,
          response,
          success,
--- a/utils/db.ts
+++ b/utils/db.ts
@@ -11,7 +11,7 @@ if (process.env.NODE_ENV === 'development') {
 }
 export const dbOperations = {
-  async saveAnthropicQuery({
+  async saveQuery({
    question,
    response,
    success,
@@ -25,7 +25,7 @@ export const dbOperations = {
    tokensUsed?: number;
  }) {
    try {
-      return await db.anthropicQuery.create({
+      return await db.aiQuery.create({
        data: {
          question,
          response,
--- a/utils/registry.ts
+++ b/utils/registry.ts
@@ -1,7 +1,7 @@
 import { ShortcutsResponse } from './types';
 import { pingCommand } from './commands/ping';
 import { timeCommand } from './commands/time';
-import { anthropicCommand } from './commands/anthropic';
+import { llamaCommand } from './commands/llama';
 type CommandHandler = (
  parameters?: Record<string, string>
@@ -18,7 +18,7 @@ export class CommandRegistry {
  private registerDefaultCommands() {
    this.register('ping', pingCommand);
    this.register('time', timeCommand);
-    this.register('anthropic', anthropicCommand);
+    this.register('llama', llamaCommand);
  }
  register(command: string, handler: CommandHandler) {
--- a/yarn.lock
+++ b/yarn.lock