// fileService.js
import * as pdfjsLib from "pdfjs-dist";
import mammoth from "mammoth";
import axios from "axios";

// Set the worker source for pdfjs-dist
pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.mjs`;

// Google Vision API URL
const GOOGLE_VISION_API_URL = `https://vision.googleapis.com/v1/images:annotate?key=${process.env.REACT_APP_GOOGLE_VISION_API_KEY}`;

/**
 * Extract text from a PDF file using pdfjsLib.
 * @param {File} file - The PDF file to extract text from.
 * @returns {Promise<string>} - The extracted text from the PDF.
 */
export const extractTextFromPDF = async (file) => {
  try {
    const pdfData = await file.arrayBuffer(); // Convert the file to an ArrayBuffer
    const pdf = await pdfjsLib.getDocument({ data: pdfData }).promise;

    let extractedText = "";
    const numPages = pdf.numPages;

    // Extract text from each page
    for (let i = 1; i <= numPages; i++) {
      const page = await pdf.getPage(i);
      const content = await page.getTextContent();
      const pageText = content.items.map((item) => item.str).join(" ");
      extractedText += pageText + "\n\n";
    }

    return extractedText.trim();
  } catch (error) {
    console.error("Error extracting text from PDF:", error);
    throw new Error("Failed to extract text from PDF.");
  }
};

// Convert canvas to base64 image
const getBase64FromCanvas = (canvas) => {
    return canvas.toDataURL("image/png").split(",")[1]; // Get base64 part
  };
// Extract text from PDF using Google Cloud Vision API with subscription restriction
export const extractTextFromPDF2 = async (pdfFile) => {
    try {
      const reader = new FileReader();
      reader.onload = async function () {
        const typedarray = new Uint8Array(this.result);
        const pdf = await pdfjsLib.getDocument(typedarray).promise;

        let extractedText = "";

        // Loop through each page of the PDF
        for (let i = 1; i <= pdf.numPages; i++) {
          const page = await pdf.getPage(i);

          // Create a canvas element to render the page
          const viewport = page.getViewport({ scale: 1.5 });
          const canvas = document.createElement("canvas");
          const context = canvas.getContext("2d");
          canvas.height = viewport.height;
          canvas.width = viewport.width;

          await page.render({ canvasContext: context, viewport }).promise;

          // Convert the canvas to a base64 image
          const base64Image = getBase64FromCanvas(canvas);

          // Prepare request body for Google Cloud Vision API
          const requestBody = {
            requests: [
              {
                image: {
                  content: base64Image,
                },
                features: [
                  {
                    type: "DOCUMENT_TEXT_DETECTION",
                  },
                ],
              },
            ],
          };

          console.log(`Processing page ${i} of ${pdf.numPages}`);

          const response = await fetch(GOOGLE_VISION_API_URL, {
            method: "POST",
            headers: {
              "Content-Type": "application/json",
            },
            body: JSON.stringify(requestBody),
          });

          const data = await response.json();

          if (data.responses && data.responses[0].fullTextAnnotation) {
            extractedText += data.responses[0].fullTextAnnotation.text + " ";
          } else {
            throw new Error("Failed to extract text from the PDF. Please try again.");
          }
        }
        return extractedText.trim();
      };

      reader.readAsArrayBuffer(pdfFile);
    } catch (error) {
      throw new Error("Error extracting text from PDF.");
    }
  };

/**
 * Extract text from a DOC or DOCX file using mammoth.
 * @param {File} file - The DOC or DOCX file to extract text from.
 * @returns {Promise<string>} - The extracted text from the DOC or DOCX file.
 */
export const extractTextFromDocOrDocx = async (file) => {
  try {
    const arrayBuffer = await file.arrayBuffer();
    const result = await mammoth.extractRawText({ arrayBuffer });
    return result.value.trim(); // Return the extracted text
  } catch (error) {
    console.error("Error extracting text from DOC/DOCX:", error);
    throw new Error("Failed to extract text from DOC/DOCX.");
  }
};

/**
 * Extract text from an image using Google Vision API for OCR.
 * @param {string} imageBase64 - The base64-encoded string of the image.
 * @returns {Promise<string>} - The extracted text from the image.
 */
export const extractTextFromImage = async (imageBase64) => {
  try {
    const requestPayload = {
      requests: [
        {
          image: {
            content: imageBase64,
          },
          features: [
            {
              type: "TEXT_DETECTION",
            },
          ],
        },
      ],
    };

    const response = await axios.post(GOOGLE_VISION_API_URL, requestPayload);
    const textAnnotations = response.data.responses[0]?.textAnnotations || [];
    const extractedText = textAnnotations.length > 0 ? textAnnotations[0].description : "";
    
    return extractedText.trim();
  } catch (error) {
    console.error("Error extracting text using Google Vision API:", error);
    throw new Error("Failed to extract text from image.");
  }
};
