U
    Vc*                  
   @   s   d dl Z ddddddddgZd	d
ddddddgZdd eD ZeeeeeZddddddddddg
Zdd Zdd Z	d:d d!Z
d"d# Zd$d% Zd&d' Zd;ed)d*d+Zd<ed)d,d+Zd=d.d/Zd0d1 Zed)d2d3Zed)d4d5Zd6d7 Zed)d8d9ZdS )>    N)([]-_.z\)z\(z\[z\]z\.u   –c                 C   s   g | ]}d | qS )z1[\u0600-\u06FFa-zA-Z.0-9\u0660-\u0669]{1,3} {0,2} ).0symbolr	   r	   D/var/www/wikiazma_server/doc_parser/utils/section_separator_utils.py
<listcomp>   s    r   )u   ۰0)u   ۱1)u   ۲2)u   ۳3)u   ۴4)u   ۵5)u   ۶6)u   ۷7)u   ۸8)u   ۹9c                 C   s$   |  }t D ]\}}|||}q
|S Nlocalesreplaceinputoutputfaenr	   r	   r   to_en   s    r!   c                 C   s$   |  }t D ]\}}|||}q
|S r   r   r   r	   r	   r   to_fa    s    r"   Fc           	      C   sL   t D ]B\}}}|}|rd| }|r*|d7 }t|| }|r||f  S qd S )N^$)regexesrematch)	r   startendlogr   Zregex_symbol
regex_dataZabsolute_regextestr	   r	   r   find_matched_regex'   s    r-   c                 C   s>   t | |krdS | | d  }|D ]}|d dkr" dS q"dS )NFtypebreakT)len)listbreak_sensitivityZsunlistitemr	   r	   r   break_condition_checker6   s    r4   c                 C   s   g }| D ]*}|d dkr(|d   |d< || qdd |D }dddg}|rl|d d |krl|d qL|r|d	 d |kr|d	 ql|S )
Nr.   textvaluec                 S   s$   g | ]}|d  dks|d r|qS r.   r5   r6   r	   r
   elementr	   r	   r   r   G   s     z(element_list_trimmer.<locals>.<listcomp>r/   tabr   )stripappendpop)elementsoutputsr9   must_trimmed_typer	   r	   r   element_list_trimmerA   s    
rB   c                 C   s"   zt |  W dS    Y dS X d S )NTF)int)strr	   r	   r   
try_to_intR   s
    rE      )returnc                 C   s  t | }g }|D ]x}|d dkr:t|d |d |kdnd }|d dkrt|rt|\}}td| tj}	t|	|d d }
ttD ]}|
|d}
q|
	 }
| pt
|d d	 t
|
kot|d d	 t|
k}| p|d d
 |k}| p|d d  pt|d d |}|rZ|rZ|rZt|	d|d 	 |d< ||g||
d n|r|d d | q|r|d d | q|S Nr.   r5   r6   r   )r*   r#    r;   indexregr?   r?   rK   rJ   rB   r-   r&   compile	MULTILINEfindallnormalized_symbolsregex_symbolsr   r<   r0   rE   r4   subr=   Zinput_elementsr2   r*   r?   blocksr9   matched_regexmatched_regex_symbolmatched_regex_dataregexrJ   r   Zindex_condictionregex_condictionZbreak_conditionr	   r	   r    elements_list_to_question_blocksZ   sJ    

 

 r[   c                 C   s  t | }g }|D ]x}|d dkr:t|d |d |kdnd }|d dkrt|rt|\}}td| tj}	t|	|d d }
ttD ]}|
|d}
q|
	 }
| pt
|d d	 t
|
kot|d d	 t|
k}| p|d d
 |k}| p|d d  pt|d d |}|rZ|rZ|rZt|	d|d 	 |d< ||g||
d n|r|d d | q|r|d d | q|S rH   rM   rT   r	   r	   r   r[      sJ    

 

    c                 C   s4  t | d }| d | d dg ig d}|D ]}|d dkrHt|d nd }|d dkr|r|\}}td| tj}	t|	|d d	 }
ttD ]}|
|d
}
q|
	 }
|d  p|d d d |k}|rt
|	d
|d 	 |d< |d |g||
d n|d d | q*|d d | q*|S )Nr?   rK   rJ   )rK   rJ   questionchoicesr.   r5   r6   r#   r   rI   r^   r;   rL   r]   )rB   r-   r&   rN   rO   rP   rQ   rR   r   r<   rS   r=   )blockr2   r*   r?   qr9   rV   rW   rX   rY   rJ   r   rZ   r	   r	   r   %question_block_to_structured_question   s:    

ra   c                 C   s|   dd | d D | d< dddg}| d rL| d d d |krL| d  d q | d rx| d d d |krx| d  d qL| S )	Nc                 S   s$   g | ]}|d  dks|d r|qS r7   r	   r8   r	   r	   r   r      s     z#section_trimmer.<locals>.<listcomp>r?   r/   r:   r   r.   r;   )r>   )datarA   r	   r	   r   section_trimmer   s    
rc   c                 C   sF   t | dd}g }t|D ](\}}|d } t|ddd}|| q|S )Nr\   r2   r?   r   F)r2   r*   )r[   	enumeratera   r=   )r?   Zquestion_sectionsr   rJ   Zquestion_sectionZsub_question_sectionsr	   r	   r   split_xmls_element_to_questions   s       rf   c                 C   s   t | dd}|S )Nr\   rd   )r[   )r?   Zanswer_sectionsr	   r	   r   split_xmls_element_to_answers   s
     rg   c           
      C   s2  | sd S d }|r|d r|d d d dkrt dt j}t ||d d d }|r^|d nd }|r|d d d t|ddt|dd}| }t d	t j}t ||}|r||d d}||d d d< |d
d }| D ]@}	|r$|	d t|ks|	d t|kr$d|	d< qd|	d< qd S )Nr?   r   r.   r5   uI   ^ {0,4}گزینه {0,4}[\u0600-\u06FFa-zA-Z.0-9\u0660-\u0669]{1,30} {0,4}r6   rI   r\   z^ *: *u
   گزینهrJ   T
is_correctF)r&   rN   rO   rP   r   r!   r"   r<   )
r^   answerZcorrect_indexrY   Zmatched_partsZmatched_partZnormalized_valueZcolon_regexZcolon_matched_partschoicer	   r	   r   !add_correctness_to_choices_answer  sD           *
rk   c                    s~   t | }|rt|ng }|D ]\}|d   fdd|D }|rF|d nd }t|d | ||d< t|d d |d d< q|S )	NrJ   c                    s0   g | ](}|d  t  ks(|d  t kr|qS rJ   )r!   r"   )r
   ri   rl   r	   r   r   +  s     z@split_xmls_element_to_questions_with_answers.<locals>.<listcomp>r   r^   ri   r]   r?   )rf   rg   rk   rB   )question_elementsanswer_elements	questionsanswersr]   Ztarget_answersri   r	   rl   r   ,split_xmls_element_to_questions_with_answers%  s"    
rq   )FFF)rF   F)rF   F)r\   F)r&   rQ   rR   r+   r1   zipr%   r   r!   r"   r-   r4   rB   rE   r[   ra   rc   rf   rg   rk   rq   r	   r	   r	   r   <module>   s>   
22
'!