import removePunctuation from "./remove-punctuation.js";

function cleanUrdu(s) {
    return removePunctuation(s)
        .replace(/\s\s+/, " ")
        // Remove diacritics
        .replace(/[\u064e-\u0652\u0654\u065f]/g, "")
        // Arabic ى replace with Farsi ی
        .replace(/\u0649/g, '\u06cc')
        // Replace Arabic k with farsi k
        .replace(/\u0643/g, '\u06a9')
        // Replace ي in middle with ی
        .replace(/\u064a(?=[\u0600-\u060b\u060d-\u06ff])/g, '\u06cc')
        // Replace آ with the two character version
        .replace(/آ/g, "آ");
}

export default cleanUrdu;