import { pipe } from '@fxts/core'
import { escapeRegExp } from 'lodash'

import { wordsNotCapitalizedInTitles } from '../constants/normalize'
import { organisationNames } from '../constants/organisationNames'
import { properNouns, properNounSet } from '../constants/properNouns'
import { capitalize, decapitalize, recapitalizeProperNouns } from './format'

type ReplacementPattern = [RegExp | string, ((x: string) => string) | string]

const organisationLongNames = organisationNames
  .map((tuple: [string, string[]]) => tuple[1][0])
  .filter((name) => typeof name === 'string')

const organisationLongNamesRegExp = new RegExp(
  organisationLongNames.join('|'),
  'gi',
)
const symbolReplacements: ReplacementPattern[] = [
  [/--/g, ' – '],
  ['< / ', '</'],
]

const brandNameReplacements: ReplacementPattern[] = [
  [/yeastone/i, 'YeastOne'],
  [/pubmed/i, 'PubMed'],
  [/plasmalyte/i, 'PlasmaLyte'],
  [/glidescope/i, 'GlideScope'],
  [/hemocue/i, 'HemoCue'],
  [/uptodate/i, 'UpToDate'],
  [/airduo/i, 'AirDuo'],
  [/quillichew/i, 'QuilliChew'],
]

const otherCustomReplacements: ReplacementPattern[] = [
  ['yes!', 'Yes!'],
  ['no!', 'No!'],
  ['trial study', 'trial'],
  [' e / a ', ' E/A '],
  ["e / e'", "E/e'"],
  [/HFmrEF/gi, 'HFmrEF'],
  [/HFrEF/gi, 'HFrEF'],
  [/HFpEF/gi, 'HFpEF'],
  [/HFimpEF/gi, 'HFimpEF'],
  [/HbA1C/gi, 'HbA1c'],
  [/A1C/gi, 'A1c'],
  [/-A /g, ' – A '],
  [/Bohr/, 'Böhr'],
  ['HighDose', 'High-dose'],
  ['IGA', 'IgA'],
  ['IGG', 'IgG'],
  ['IGM', 'IgM'],
  [/Option a/, 'Option A'],
  [/Option b/, 'Option B'],
  [/Option c/, 'Option C'],
  [/Option d/, 'Option D'],
  [/Option e/, 'Option E'],
  [/Sjogren/, 'Sjögren'],
  [/I Do/, 'I do'],
  [/immune fab/, 'immune fAb'],
  [/CHA²DS²-VASc/, 'CHA₂DS₂-VASc'],
  [/CHA2DS2-VASc/, 'CHA₂DS₂-VASc'],
]

const namedEntitiesWithColonsReplacements: ReplacementPattern[] = [
  [/ILUMIEN IV: OPTICAL/i, 'ILUMIEN IV - OPTICAL'],
  [/kidney disease: improving/i, 'Kidney Disease - Improving'],
]

function isProperNoun(word: string): boolean {
  return properNounSet.has(word)
}

function applyCustomReplacements(text: string, patterns: ReplacementPattern[]) {
  return patterns.reduce((str: string, replacementGroup) => {
    if (typeof replacementGroup[1] === 'string')
      return str.replace(replacementGroup[0], replacementGroup[1])
    return str.replace(replacementGroup[0], replacementGroup[1])
  }, text)
}

function applySymbolReplacements(text: string): string {
  return applyCustomReplacements(text, symbolReplacements)
}

export function applyBrandNameReplacements(text: string) {
  return applyCustomReplacements(text, brandNameReplacements)
}

function applyOtherCustomReplacements(text: string): string {
  return applyCustomReplacements(text, otherCustomReplacements)
}

function removeStrayTerminalPunctuation(text: string): string {
  return text.replace(/[.†,;:]+$/, '')
}

function padSymbolsWithWhiteSpace(text: string): string {
  return text.replace(/([^\w\s"*])/g, ' $1 ').replace(/ +/g, ' ')
}

function shouldRemainCapitalized(word: string): boolean {
  if (
    wordsNotCapitalizedInTitles.includes(word.toLowerCase()) &&
    word !== 'A'
  ) {
    return false
  }
  const capitalizedPattern = /^[A-Z][a-zA-Z]?$/ // hepatitis A, factor Xa, etc.
  const mixedCapitalizationPattern = /^([A-Z][a-z]?)+[0-9]*$/ // MgSO4, etc.
  return capitalizedPattern.test(word) || mixedCapitalizationPattern.test(word)
}

function maybeCapitalize(word: string): string {
  if (wordsNotCapitalizedInTitles.includes(word.toLowerCase())) {
    return word
  }
  return capitalize(word)
}

function maybeDecapitalize(word: string): string {
  if (shouldRemainCapitalized(word)) {
    return word
  }
  return decapitalize(word)
}

function splitOnSpacesAndDecapitalize(text: string): string {
  const words = text.split(' ')
  const result = words.map((word, index) => {
    // Duke-International, etc.
    const isWordAfterProperNounFollowedByDash =
      index > 2 &&
      isProperNoun(words[index - 2] ?? '') &&
      (words[index - 1] ?? '').trim() === '-'

    if (index > 0 && isWordAfterProperNounFollowedByDash) {
      return word
    }
    const isStartOfSentence = ['.', '?'].includes(words[index - 1] ?? '')
    if (isStartOfSentence) return word
    return maybeDecapitalize(word)
  })
  return result.join(' ')
}

function unPadSymbolsWithWhiteSpace(text: string): string {
  return text
    .replace(/([(] )/g, '(')
    .replace(/( [)])/g, ')')
    .replace(/ *- */g, '-')
    .replace(/ ,/g, ',')
    .replace(/ '/g, "'")
    .replace(/ :/g, ':')
    .replace(/ !/g, '!')
    .replace(/ \?/g, '?')
    .replace(/ \./g, '.')
    .replace(/(' s |' s$)/g, "'s ")
    .trim()
}

function recapitalizeOrganisationNames(text: string) {
  return text.replace(organisationLongNamesRegExp, (match) =>
    match.split(' ').map(maybeCapitalize).join(' '),
  )
}

function normalizeWhiteSpace(text: string): string {
  return text.replace(/\s+/g, ' ').trim()
}

export function stripHtmlTags(text: string): string {
  return text.replace(/<\/?[^>]+(>|$)/g, '')
}

function decapitalizeAWhenIsADeterminant(text: string): string {
  return text
    .replace(/: A /g, ': a ')
    .replace(/ is A /g, ' is a ')
    .replace(/ has A /g, ' has a ')
}

// E.g. Parkinson's disease
const properNounsWithPossessiveForms = properNouns.filter((noun) =>
  noun.endsWith("'s"),
)
// Parkinsons disease -> Parkinson's disease
function fixPossessiveFormsInProperNouns(title: string): string {
  for (const properNoun of properNounsWithPossessiveForms) {
    const properNounWithoutApostrophe = properNoun.replace(/'s$/, 's')
    title = title.includes(properNounWithoutApostrophe)
      ? title.replace(
          new RegExp(properNounWithoutApostrophe + "([^'])", 'g'),
          `${properNoun}$1`,
        )
      : title
  }
  return title
}

// e.g. H. pylori, H. influenzae, etc.
const properNounsWithShortenedForms = properNouns.filter((noun) =>
  noun.includes('. '),
)
// H pylori -> H. pylori, H. PYLORI -> H. pylori
function fixShortenedCompositeProperNouns(title: string): string {
  const lowerCaseTitle = title.toLowerCase()
  for (const properNoun of properNounsWithShortenedForms) {
    const properNounWithoutPeriod = properNoun.replace(/\. /, ' ')

    if (lowerCaseTitle.includes(properNounWithoutPeriod.toLowerCase())) {
      const matchPatternString = `((?:\\b|\\W|\\)))(${escapeRegExp(
        properNounWithoutPeriod,
      )})((?:\\b|\\W|\\())`
      title = title.replace(
        new RegExp(matchPatternString, 'gi'),
        `$1${properNoun}$3`,
      )
    } else if (lowerCaseTitle.includes(properNoun.toLowerCase())) {
      const matchPatternString = `((?:\\b|\\W|\\)))(${escapeRegExp(
        properNoun,
      )})((?:\\b|\\W|\\())`
      title = title.replace(
        new RegExp(matchPatternString, 'gi'),
        `$1${properNoun}$3`,
      )
    }
  }
  return title
}

export const normalizeTitle = (title: string): string => {
  return normalizeWhiteSpace(
    capitalize(
      pipe(
        title,
        stripHtmlTags,
        applySymbolReplacements,
        removeStrayTerminalPunctuation,
        padSymbolsWithWhiteSpace,
        splitOnSpacesAndDecapitalize,
        unPadSymbolsWithWhiteSpace,
        recapitalizeOrganisationNames,
        recapitalizeProperNouns,
        applyBrandNameReplacements,
        applyOtherCustomReplacements,
        decapitalizeAWhenIsADeterminant,
        fixPossessiveFormsInProperNouns,
        fixShortenedCompositeProperNouns,
      ),
    ),
  )
    .replace(/& lt ;/g, '&lt;')
    .replace(/& gt ;/g, '&gt;')
}

export function reformatNamedEntitiesWithColons(text: string) {
  // Named entities with colons inside the "name"; screw up Markdown
  return applyCustomReplacements(text, namedEntitiesWithColonsReplacements)
}

export function hasNamedEntityWithColon(text: string) {
  return namedEntitiesWithColonsReplacements.some(([pattern]) =>
    text.match(pattern),
  )
}

export function removeSpacesAroundAccentedCharacters(text: string) {
  return (
    text
      .replace(/ á /g, 'á')
      .replace(/ â /g, ' â')

      .replace(/ é /g, 'é')
      //.replace(/ à /g, 'à')
      .replace(/ ä /g, 'ä')
      .replace(/ ç ã /g, 'çã')
      .replace(/ ç õ /g, 'çõ')
      .replace(/É /g, 'É')
      .replace(/Ê /g, 'Ê')
      .replace(/ ë /g, 'ë')
      .replace(/ ê /g, 'ê')
      .replace(/ è /g, 'è')
      .replace(/ ï /g, 'ï')
      .replace(/ í /g, 'í')
      .replace(/ ó /g, 'ó')
      .replace(/ ö /g, 'ö')
      .replace(/ ô /g, 'ô')
      .replace(/ ü /g, 'ü')
      .replace(/ ú /g, 'ú')
      .replace(/ ñ /g, 'ñ')
      .replace(/d' a/g, "d'a")
      .replace(/d' i/g, "d'i")
      .replace(/œ /g, 'œ')
      .replace(/ ’ /g, "'")
      .replace(/l' /g, "l'")
  )
}

// Strip markdown formatting and normalize title case
export function normalizeMarkdownTitle(text: string) {
  return removeSpacesAroundAccentedCharacters(
    normalizeTitle(
      text
        .trim()
        .replace(/[*#]/g, '')
        .trim()
        .replace(/:$/, '')
        .trim()
        .replace(/</g, '&lt;')
        .replace(/>/g, '&gt;'),
    ),
  ).replace(/\. ([0-9])/g, '.$1')
}

// Allow the last word not to be title case
// - Fasting Plasma Glucose (FPG) test:
// Allow short words not to be title case
// - Choice of Test
export function isTitleCaseOrShortPhrase(str: string) {
  if (hasNamedEntityWithColon(str)) {
    return false
  }
  return (
    str
      .split(' ')
      .slice(0, -1)
      .every(
        (word) =>
          word.length <= 3 ||
          wordsNotCapitalizedInTitles.includes(word) ||
          word[0] === (word[0] ?? '').toUpperCase(),
      ) || str.trim().split(' ').length <= 3
  )
}
