import re


normalized_symbols = [r")", r"(", r"[", r"]", r"-", r"_", r".", r"-"]
regex_symbols = [r"\)", r"\(", r"\[", r"\]", r"-", r"_", r"\.", r"–"]
regex_data = [
    r"[\u0600-\u06FFa-zA-Z.0-9\u0660-\u0669]{1,3} {0,2}"+symbol for symbol in regex_symbols]
regexes = list(zip(normalized_symbols, regex_symbols, regex_data))


locales = [
    ("۰", "0"),
    ("۱", "1"),
    ("۲", "2"),
    ("۳", "3"),
    ("۴", "4"),
    ("۵", "5"),
    ("۶", "6"),
    ("۷", "7"),
    ("۸", "8"),
    ("۹", "9")
]


def to_en(input):
    output = f"{input}"
    for fa, en in locales:
        output = output.replace(fa, en)
    return output


def to_fa(input):
    output = f"{input}"
    for fa, en in locales:
        output = output.replace(en, fa)
    return output


def find_matched_regex(input, start=False, end=False, log=False):

    for symbol, regex_symbol, regex_data in regexes:
        absolute_regex = regex_data
        if start:
            absolute_regex = r"^"+absolute_regex
        if end:
            absolute_regex += r"$"
        test = re.match(absolute_regex, input)
        # if log:
        #     print("test:", test, ", input:", input)
        if test:
            return symbol, regex_data


def break_condition_checker(list, break_sensitivity):
    if len(list) <= break_sensitivity:
        return False

    sunlist = list[-break_sensitivity:]
    for item in sunlist:
        if item["type"] != "break":
            return False
    return True


def element_list_trimmer(elements):
    outputs = []
    for element in elements:
        if element["type"] == "text":
            element["value"] = element["value"].strip()
        outputs.append(element)
    outputs = [element for element in outputs if element["type"]
               != "text" or element["value"]]

    must_trimmed_type = ["break", "break", "tab"]
    while outputs and outputs[0]["type"] in must_trimmed_type:
        outputs.pop(0)
    while outputs and outputs[-1]["type"] in must_trimmed_type:
        outputs.pop(-1)
    return outputs


def try_to_int(str):
    try:
        int(str)
        return True
    except:
        return False


def elements_list_to_question_blocks(input_elements, break_sensitivity=2, log=False) -> list:
    elements = element_list_trimmer(input_elements)

    blocks = []
    # sections = [{"elements":[],"reg":None, "index":None}]
    for element in elements:

        matched_regex = find_matched_regex(
            element["value"], log=elements[0] == element) if element["type"] == "text" else None

        if element["type"] == "text" and matched_regex:

            matched_regex_symbol, matched_regex_data = matched_regex
            regex = re.compile(r"^"+matched_regex_data, re.MULTILINE)

            index = re.findall(regex, element["value"])[0]
            for symbol in [*normalized_symbols, *regex_symbols]:
                index = index.replace(symbol, "")
            index = index.strip()

            # if log:

            index_condiction = not blocks or (len(blocks[-1]["index"]) <= len(
                index) and try_to_int(blocks[-1]["index"]) == try_to_int(index))

            regex_condiction = not blocks or blocks[-1]["reg"] == matched_regex_symbol
            break_condition = not blocks or not blocks[-1]["elements"] or break_condition_checker(
                blocks[-1]["elements"], break_sensitivity)

            # if log:
            #     print("target regex_condiction:", regex_condiction, ", ", "target break_condition:", break_condition)
            #     print("------------------------------------")

            if regex_condiction and break_condition and index_condiction:
                element["value"] = re.sub(regex, '', element["value"]).strip()
                blocks.append(
                    {"elements": [element], "reg": matched_regex_symbol, "index": index})
            elif blocks:
                # if not sections[-1]["reg"]:
                #     sections[-1]["reg"] = matched_regex_symbol
                # if not sections[-1]["index"]:
                #     sections[-1]["index"] = index
                blocks[-1]["elements"].append(element)

        elif blocks:
            blocks[-1]["elements"].append(element)

    return blocks


def elements_list_to_question_blocks(input_elements, break_sensitivity=2, log=False) -> list:
    elements = element_list_trimmer(input_elements)

    blocks = []
    # sections = [{"elements":[],"reg":None, "index":None}]
    for element in elements:

        matched_regex = find_matched_regex(
            element["value"], log=elements[0] == element) if element["type"] == "text" else None

        if element["type"] == "text" and matched_regex:

            matched_regex_symbol, matched_regex_data = matched_regex
            regex = re.compile(r"^"+matched_regex_data, re.MULTILINE)

            index = re.findall(regex, element["value"])[0]
            for symbol in [*normalized_symbols, *regex_symbols]:
                index = index.replace(symbol, "")
            index = index.strip()

            # if log:

            index_condiction = not blocks or (len(blocks[-1]["index"]) <= len(
                index) and try_to_int(blocks[-1]["index"]) == try_to_int(index))

            regex_condiction = not blocks or blocks[-1]["reg"] == matched_regex_symbol
            break_condition = not blocks or not blocks[-1]["elements"] or break_condition_checker(
                blocks[-1]["elements"], break_sensitivity)

            # if log:
            #     print("target regex_condiction:", regex_condiction, ", ", "target break_condition:", break_condition)
            #     print("------------------------------------")

            if regex_condiction and break_condition and index_condiction:
                element["value"] = re.sub(regex, '', element["value"]).strip()
                blocks.append(
                    {"elements": [element], "reg": matched_regex_symbol, "index": index})
            elif blocks:
                # if not sections[-1]["reg"]:
                #     sections[-1]["reg"] = matched_regex_symbol
                # if not sections[-1]["index"]:
                #     sections[-1]["index"] = index
                blocks[-1]["elements"].append(element)

        elif blocks:
            blocks[-1]["elements"].append(element)

    return blocks


def question_block_to_structured_question(block, break_sensitivity=1, log=False):
    elements = element_list_trimmer(block["elements"])

    q = {
        "reg": block["reg"],
        "index": block["index"],
        "question": {"elements": []},
        "choices": [],
    }
    for element in elements:

        matched_regex = find_matched_regex(
            element["value"]) if element["type"] == "text" else None

        if element["type"] == "text" and matched_regex:

            matched_regex_symbol, matched_regex_data = matched_regex
            regex = re.compile(r"^"+matched_regex_data, re.MULTILINE)

            index = re.findall(regex, element["value"])[0]
            for symbol in [*normalized_symbols, *regex_symbols]:
                index = index.replace(symbol, "")
            index = index.strip()

            regex_condiction = not q["choices"] or q["choices"][-1]["reg"] == matched_regex_symbol

            if regex_condiction:
                element["value"] = re.sub(regex, '', element["value"]).strip()
                q["choices"].append(
                    {"elements": [element], "reg": matched_regex_symbol, "index": index})
            else:
                q["question"]["elements"].append(element)

        else:
            q["question"]["elements"].append(element)

    return q


def section_trimmer(data):
    data["elements"] = [element for element in data["elements"]
                        if element["type"] != "text" or element["value"]]

    must_trimmed_type = ["break", "break", "tab"]
    while data["elements"] and data["elements"][0]["type"] in must_trimmed_type:
        data["elements"].pop(0)
    while data["elements"] and data["elements"][-1]["type"] in must_trimmed_type:
        data["elements"].pop(-1)
    return data


def split_xmls_element_to_questions(elements) -> list:
    question_sections = elements_list_to_question_blocks(
        elements, break_sensitivity=1)
    output = []
    for index, question_section in enumerate(question_sections):
        elements = question_section["elements"]
        sub_question_sections = question_block_to_structured_question(
            question_section, break_sensitivity=0, log=False)
        output.append(sub_question_sections)

    return output


def split_xmls_element_to_answers(elements) -> list:
    answer_sections = elements_list_to_question_blocks(
        elements, break_sensitivity=1)
    return answer_sections


def add_correctness_to_choices_answer(choices, answer):
    if not choices:
        return
    correct_index = None

    if answer and answer["elements"] and answer["elements"][0]["type"] == "text":
        regex = re.compile(
            r"^ {0,4}گزینه {0,4}[\u0600-\u06FFa-zA-Z.0-9\u0660-\u0669]{1,30} {0,4}", re.MULTILINE)

        matched_parts = re.findall(regex, answer["elements"][0]["value"])
        matched_part = matched_parts[0] if matched_parts else None

        if matched_part:
            normalized_value = answer["elements"][0]["value"].replace(
                to_en(matched_part), '', 1).replace(to_fa(matched_part), '', 1)
            normalized_value = normalized_value.strip()
            colon_regex = re.compile(r"^ *: *", re.MULTILINE)
            colon_matched_parts = re.findall(colon_regex, normalized_value)
            if colon_matched_parts:
                normalized_value = normalized_value.replace(
                    colon_matched_parts[0], "")

            answer["elements"][0]["value"] = normalized_value

            correct_index = matched_part.replace("گزینه", "").strip()

    for choice in choices:
        if correct_index and (choice["index"] == to_en(correct_index) or choice["index"] == to_fa(correct_index)):
            choice["is_correct"] = True
        else:
            choice["is_correct"] = False


def split_xmls_element_to_questions_with_answers(question_elements, answer_elements) -> list:
    questions = split_xmls_element_to_questions(question_elements)
    answers = split_xmls_element_to_answers(
        answer_elements) if answer_elements else []
    for question in questions:
        index = question["index"]
        target_answers = [answer for answer in answers if answer["index"] == to_en(
            index) or answer["index"] == to_fa(index)]
        answer = target_answers[0] if target_answers else None
        add_correctness_to_choices_answer(question["choices"], answer)
        question["answer"] = answer

        # trimmer
        question["question"]["elements"] = element_list_trimmer(
            question["question"]["elements"])

    return questions