import * as pdfjs from 'pdfjs-dist';

// Set worker source
pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.min.js`;

export interface ExtractedData {
  referenceNumber?: string;
  date?: string;
  amount?: string;
  clientName?: string;
  documentType?: string;
  authNumber?: string;
  remitent?: string;
  originCountry?: string;
  clientIdType?: string;
  [key: string]: string | undefined;
}

export const extractPDFData = async (pdfUrl: string): Promise<ExtractedData> => {
  try {
    const pdf = await pdfjs.getDocument(pdfUrl).promise;
    const firstPage = await pdf.getPage(1);
    const textContent = await firstPage.getTextContent();
    const text = textContent.items.map((item: any) => item.str).join(' ');
    const extractedData: ExtractedData = {};
    
    // Reference Number (Western Union reference)
    const refPattern = /NUMERO DE\s+REFERENCIA:\s*(\d+)/i;
    const refMatch = text.match(refPattern);
    if (refMatch) {
      extractedData.referenceNumber = refMatch[1];
    }

    // Date Pattern (from the receipt)
    const datePattern = /(\d{2}\/\d{2}\/\d{2})\s+(\d{2}:\d{2}:\d{2})/;
    const dateMatch = text.match(datePattern);
    if (dateMatch) {
      extractedData.date = `${dateMatch[1]} ${dateMatch[2]}`;
    }

    // Amount Pattern
    const amountPattern = /TOTAL\s+\$-([\d.]+)/;
    const amountMatch = text.match(amountPattern);
    if (amountMatch) {
      extractedData.amount = amountMatch[1];
    }

    // Client Name (Beneficiary)
    const beneficiaryPattern = /BENEFICIARIO:\s+([^:\n]+?)(?:\s+NO\.|$)/;
    const beneficiaryMatch = text.match(beneficiaryPattern);
    if (beneficiaryMatch) {
      extractedData.clientName = beneficiaryMatch[1].trim();
    }

    // Document Type
    extractedData.documentType = 'Western Union Transfer Receipt';

    // Additional Western Union specific fields
    const authPattern = /NO\.\s*AUTH\s*(\d+)/;
    const authMatch = text.match(authPattern);
    if (authMatch) {
      extractedData.authNumber = authMatch[1];
    }

    const remitentPattern = /REMITENTE:\s+([^:\n]+?)(?:\s+PAIS|$)/;
    const remitentMatch = text.match(remitentPattern);
    if (remitentMatch) {
      extractedData.remitent = remitentMatch[1].trim();
    }

    // Origin Country
    const originCountryPattern = /PAIS DE ORIGEN:\s+([^\n]+?)(?:\s+BENEFICIARIO|$)/;
    const originCountryMatch = text.match(originCountryPattern);
    if (originCountryMatch) {
      extractedData.originCountry = originCountryMatch[1].trim();
    }

    // Client ID Type
    const clientIdTypePattern = /TIPO IDENTIFICACION\s+([^\n]+?)(?:\s+IDENTIFICACION|$)/;
    const clientIdTypeMatch = text.match(clientIdTypePattern);
    if (clientIdTypeMatch) {
      extractedData.clientIdType = clientIdTypeMatch[1].trim();
    }

    return extractedData;
  } catch (error) {
    console.error('Error extracting PDF data:', error);
    return {};
  }
};
