import { useEffect, useState } from 'react';
import { pdfjs } from 'react-pdf';

async function parseTableOfContents(pdf) {
  const tocData = [];
  const pattern = /(.+?)\s+(\d+:\d+)\s+(\d+)/g;

  const pdfInstance = await pdfjs.getDocument(pdf).promise;
  const page = await pdfInstance.getPage(2);
  const content = await page.getTextContent();
  const tocText = content.items.map((item) => item.str).join(' ');
  let match;

  while ((match = pattern.exec(tocText)) !== null) { // eslint-disable-line
    let title = match[1].trim();
    if (title.includes('CONTENT  1')) {
      title = title.replace('CONTENT  1', '').trim();
    }

    const pageRange = match[2].split(':');
    const startPage = parseInt(pageRange[0], 10);
    const endPage = parseInt(pageRange[1], 10);
    tocData.push({ title, startPage, endPage });
  }

  return tocData;
}

const useExtractTOC = (pdfUrl) => {
  const [toc, setToc] = useState([]);
  useEffect(() => {
    if (!pdfUrl) return;
    parseTableOfContents(pdfUrl).then((toc) => setToc(toc));
  }, [pdfUrl]);

  return toc;
};

export default useExtractTOC;
