U
    ¿d@                     @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dZeedZG dd deZ ej!G dd deZ"G dd deZ#dS )z>
Appraise evaluation framework

See LICENSE for usage details
    N)defaultdict)loads)
is_zipfile)ZipFile)User)models)format_lazy)ugettext_lazy)_get_loggerLANGUAGE_CODES_AND_NAMES)AnnotationTaskRegistry)BaseMetadata)MAX_REQUIREDANNOTATIONS_VALUE)seconds_to_timedelta)TextSegmentWithTwoTargetsd   )namec                       sV   e Zd ZdZejeedeededdZ	ej
dddedd	Z fd
dZ  ZS )$TextSegmentWithTwoTargetsWithContextzG
    Models a pair of two text segments within a document context.
    zDocument IDz(max. {value} characters)value)
max_lengthverbose_name	help_textTFzComplete document?)blankdb_indexdefaultr   c                    s   t t|  S )zV
        Validates the current TextSegmentWithTwoTargetsWithContext instance.
        )superr   is_validself	__class__ [/var/www/rival/public_html/translation-eval/EvalData/models/pairwise_assessment_document.pyr   5   s    z-TextSegmentWithTwoTargetsWithContext.is_valid)__name__
__module____qualname____doc__r   	CharFieldMAX_DOCUMENTID_LENGTH_f
documentIDBooleanFieldisCompleteDocumentr   __classcell__r#   r#   r!   r$   r   "   s   r   c                
   @   sT  e Zd ZdZejddejddeddZej	e
dded	d
ZejedeededdZej	eddddededdZejededdZejdejdddddeddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd=d*d+Zd>d,d-Zd.d/ Ze d0d1 Z!e d?d3d4Z"e d5d6 Z#e d7d8 Z$d9d: Z%d;d< Z&d2S )@PairwiseAssessmentDocumentTaska  
    Models a pairwise assessment document evaluation task.

    Note: this task is, similarily to other models, a shameless copy of
    DirectAssessmentContextTask, with one additional method for retrieving all
    items belonging to the same document in the task called
    `next_document_for_user`, and a helper method `get_results_for_each_item`.
    The underlying model is the same as for
    DirectAssessmentContextTask.
    zCampaign.CampaignTz %(app_label)s_%(class)s_campaign%(app_label)s_%(class)ssCampaignr   	on_deleterelated_namerelated_query_namer   z%(app_label)s_%(class)s_itemsItems)r6   r7   r   zRequired annotationsz(value in range=[1,{value}])r   r   r   z"%(app_label)s_%(class)s_assignedTozAssigned toz(users working on this task))r   r   r6   r7   r   r   zBatch numberz	(1-based)zCampaign.CampaignDataz!%(app_label)s_%(class)s_batchDataz
Batch data)r5   r   r   nullr6   r7   r   c                 C   s
   t | jS N)str	batchDatar   r#   r#   r$   dataName|   s    z'PairwiseAssessmentDocumentTask.dataNamec                 C   s   t | j jjS r;   )r<   itemsfirstmetadatamarketr   r#   r#   r$   
marketName   s    z)PairwiseAssessmentDocumentTask.marketNamec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr+      r   	r<   r?   r@   rA   rB   splitlenr   keysr    tokensr#   r#   r$   marketSourceLanguage   s    z3PairwiseAssessmentDocumentTask.marketSourceLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S rD   rF   rJ   r#   r#   r$   marketSourceLanguageCode   s    z7PairwiseAssessmentDocumentTask.marketSourceLanguageCodec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr+   rE      rF   rJ   r#   r#   r$   marketTargetLanguage   s    z3PairwiseAssessmentDocumentTask.marketTargetLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S rN   rF   rJ   r#   r#   r$   marketTargetLanguageCode   s    z7PairwiseAssessmentDocumentTask.marketTargetLanguageCodec                 C   s*   t jj| dd|djddd}tt|S )NFT)task	activated	completed	createdByitem_idflat) PairwiseAssessmentDocumentResultobjectsfiltervalues_listrH   set)r    userresultsr#   r#   r$   completed_items_for_user   s        z7PairwiseAssessmentDocumentTask.completed_items_for_userc                 C   s&   ddl m} |jj|| jd}| S )Nr   )TrustedUser)r^   campaign)Campaign.modelsra   rZ   r[   rb   exists)r    r^   ra   trusted_userr#   r#   r$   is_trusted_user   s    z.PairwiseAssessmentDocumentTask.is_trusted_userFc                 C   s"  |  |}d }d}| j dD ]X}tjj|dd|d}| srtd	|j
|j|j| |rj|jdkrr|} q||d7 }q"|std		| j
 tjj| ddd
jddd}tt|}	d}
|rd}
| j|
 }td	|	| |	|krtd	| j
 |   |   |r||fS |S )Nr   idFT)itemrS   rT   rU   z6Identified next item: {}/{} (itemID={}) for trusted={}TGTrO   zNo next item found for task {0})rR   rS   rT   rV   rW   r   F   zUnique annotations={0}/{1}zCompleting task {0})rf   r?   allorder_byrY   rZ   r[   rd   printformatrg   itemTypeitemIDLOGGERinfor\   rH   r]   requiredAnnotationscompletesave)r    r^   return_completed_itemsre   	next_itemcompleted_itemsrh   resultannotationsuniqueAnnotationsrequired_user_results_total_requiredr#   r#   r$   next_item_for_user   s`    
      
   


z1PairwiseAssessmentDocumentTask.next_item_for_userc                 C   s  | j |dd\}}|s6|s$|g g fS ||ddg g dfS | jj|jdd}g }d}|D ]4}|| |j|jkrvd}|jrX|r q|  qX| 	||}	|s|||	fS t
dd |	D }
tjj| dd|d	 }| jjdd
 }td|||
t
|| ||||
||	|fS )z6Returns the next item and all items from its document.T)rv   r   )r-   rg   Fc                 S   s   g | ]}|d k	r|qS r;   r#   ).0resr#   r#   r$   
<listcomp>   s      zIPairwiseAssessmentDocumentTask.next_document_for_user.<locals>.<listcomp>)rR   item__isCompleteDocumentrT   rU   )r/   z[Completed {}/{} documents, {}/{} items in the current document, completed {} items in total)r~   r?   r[   r-   rl   appendrg   r/   clearget_results_for_each_itemrH   rY   rZ   countrm   rn   )r    r^   return_statisticsrw   rx   _itemsblock_itemscurrent_blockrh   block_resultscompleted_items_in_blockcompleted_blockstotal_blocksr#   r#   r$   next_document_for_user   sj    




z5PairwiseAssessmentDocumentTask.next_document_for_userc                 C   s   g }|D ]0}t jj|jd|| ddd }|| qt|t|krRtd t	||D ]"\}}|r\|j|j
jkr\td q\|S )z7Returns the latest result object for each item or none.T)item__idrT   rU   rR   r   dateModifiedz/Warning: incorrect number of retrieved results!z.Warning: incorrect order of items and results!)rY   rZ   r[   rg   rl   r@   r   rH   rm   ziprh   )r    r   r^   r   rh   ry   r#   r#   r$   r     s&     

z8PairwiseAssessmentDocumentTask.get_results_for_each_itemc                 C   s<   | j j|ddddD ]}||}|d k	r|  S qd S )NTF)
assignedTorS   rT   z-id)rZ   r[   rl   r~   )clsr^   active_taskrw   r#   r#   r$   get_task_for_user9  s      

z0PairwiseAssessmentDocumentTask.get_task_for_userNc                 C   s   | j jdd|d}|r"|j|d}|dD ]2}|j }||jk r,|r,||j kr,|  S q,d S ]:}t| |d pzd	}||d
 k rd| j j	|d	 d  S qdd S ]:}|j
 jj}|j|ksq|j }||jk r|  S qd S )NTF)rS   rT   +items__metadata__market__targetLanguageCode)rb   rg   rs   r      r   rO   pk)rZ   r[   rl   r   r   rs   rk   r\   rm   getr?   r@   rA   rB   targetLanguageCode)r   coderb   r^   active_tasksr   active_usersrB   r#   r#   r$   get_next_free_task_for_languageD  s@    


       



z>PairwiseAssessmentDocumentTask.get_next_free_task_for_languagec                 C   s   |  ||S r;   )r   )r   r   rb   r#   r#   r$   ,get_next_free_task_for_language_and_campaignp  s    zKPairwiseAssessmentDocumentTask.get_next_free_task_for_language_and_campaignc           $      C   s`  |j }|jj}|j}d}|drt|sBd|}	t|	 dS t|}
dd |
	 D }|D ]4}|

|d}tjdkrt|}q`t|dd}q`ntt|
 dd}d	d
lm} | }d	}d	}d	}|D ]^}|d	kr||krd|}	t|	 | }t||   dS td||d d  d	}g }d	}|d D ]:}|d7 }t|d }t|d }||krtd||d  |}||krtd||d d |}|d }|d	 d }|d	 d }|d	 dd}d}d} d}!t|dkr|d d }|d d } |d dd}!t|d |d |dd||||| |!||d |d |d |d d}"||" |d r@|d7 }q@t|| d krd!t|| }	t|	 q|d7 }|D ]}"||"_ |"  qt||d d" |d d ||d#}#|#  |#jj|  |#  d$t||d d }	t|	 qd%||}	t|	 | }t||  dS )&z[
        Creates new PairwiseAssessmentDocumentTask instances based on JSON input.
        Nz.zipz!Batch {0} not a valid ZIP archivec                 S   s   g | ]}| d r|qS )z.json)endswithr   xr#   r#   r$   r     s     
 zCPairwiseAssessmentDocumentTask.import_from_json.<locals>.<listcomp>zutf-8)rE   	   r   )encodingr   )datetimez'Stopping after max_count={0} iterationszLoading batch:rR   batchNor?   rO   	segmentIDsegmentTextzNew max length ID:zNew max length text:targetstargetID
targetTexttargetContextLeft ZsegmentContextLeftrp   ro   r-   r/   )r   r   contextLeft	target1IDtarget1Texttarget1ContextLeft	target2IDtarget2Texttarget2ContextLeftrU   rp   ro   r-   r/   r   z)Expected 100 items for task but found {0}rs   )rb   rs   r   r=   rU   z&Success processing batch {0}, task {1}zMax length ID={0}, text={1})rA   dataFiler   r   r   rn   rq   warnr   namelistreaddecodesysversion_infor   r<   r   nowrr   rm   rH   encoder   r   r   ru   r1   r?   add)$r   rb   
batch_user
batch_data	max_count
batch_meta
batch_name
batch_file
batch_json_msg	batch_zipbatch_json_filesbatch_json_filebatch_contentr   t1current_countmax_length_idmax_length_text
batch_taskt2	doc_items	new_itemscount_itemsrh   current_length_idcurrent_length_textitem_targetsitem_tgt1_idxitem_tgt1_txtZitem_tgt1_ctxitem_tgt2_idxitem_tgt2_txtZitem_tgt2_ctxnew_itemnew_taskr#   r#   r$   import_from_jsont  s    
















 

z/PairwiseAssessmentDocumentTask.import_from_jsonc                 C   sD   t | dr| j sdS t | ds&dS | jD ]}| s, dS q,dS )zS
        Validates the current DA task, checking campaign and items exist.
        rb   Fr?   T)hasattrrb   r   r?   )r    rh   r#   r#   r$   r     s    

z'PairwiseAssessmentDocumentTask.is_validc                 C   s   d | jj| j| jS )Nz{0}.{1}[{2}])rn   r"   r%   rb   rg   r   r#   r#   r$   _generate_str_name  s    z1PairwiseAssessmentDocumentTask._generate_str_name)F)T)NN)'r%   r&   r'   r(   r   
ForeignKeyPROTECTr+   rb   ManyToManyFieldr   r?   PositiveSmallIntegerFieldr,   r   rs   r   r   PositiveIntegerFieldr   r=   r>   rC   rL   rM   rP   rQ   r`   rf   r~   r   r   classmethodr   r   r   r   r   r   r#   r#   r#   r$   r1   <   s   	

 
3
C

+

 r1   c                
   @   sX  e Zd ZdZejededdZejddededdZej	eded	dZ
ej	ed
ed	dZejedejddeddZejedddejddeddZdd Zdd Zdd Zed3ddZedd Zedd Zedd Zed d! Zed"d# Zed$d% Zed4d'd(Zed)d* Zed5d+d,Zed6d/d0Z ed1d2 Z!d-S )7rY   z@
    Models a direct assessment document evaluation result.
    z	Score (1)z(value in range=[1,100])r9   Tz	Score (2))r   r:   r   r   z
Start timez(in seconds)zEnd timez%(app_label)s_%(class)s_itemr2   Itemr4   z%(app_label)s_%(class)s_taskTask)r   r   r:   r5   r6   r7   r   c                 C   s   d | jj| j| j| jS )Nz{0}.{1}={2}+{3})rn   r"   r%   rh   score1score2r   r#   r#   r$   r   E  s    z3PairwiseAssessmentDocumentResult._generate_str_namec                 C   s   | j | j }t|dS )NrO   )end_time
start_timeround)r    dr#   r#   r$   durationM  s    z)PairwiseAssessmentDocumentResult.durationc                 C   s   | j jS r;   )rh   ro   r   r#   r#   r$   	item_typeQ  s    z*PairwiseAssessmentDocumentResult.item_typec                 C   s0   | j j|ddd}|r(|d  S | S )NFTrU   rS   rT   r   )rZ   r[   r\   distinctr   )r   r^   unique_only_queryr#   r#   r$   get_completed_for_userT  s    z7PairwiseAssessmentDocumentResult.get_completed_for_userc                 C   sx   t t}| jj|dddddD ]*}|d  dkr8q"||d   d7  < q"t| }td	d
 | D }||fS )NFTr   task__iditem__itemTyperO   tgtr   c                 S   s   g | ]}|d kr|qS )rj   r#   r   r#   r#   r$   r   h  s      zLPairwiseAssessmentDocumentResult.get_hit_status_for_user.<locals>.<listcomp>)	r   intrZ   r[   r\   lowerrH   rI   values)r   r^   	user_data	user_item
total_hitscompleted_hitsr#   r#   r$   get_hit_status_for_user[  s       z8PairwiseAssessmentDocumentResult.get_hit_status_for_userc                 C   sB   | j j|ddd}g }|D ]}|j|j }|| qtt|S )NFTr   )rZ   r[   r   r   r   r   sum)r   r^   r_   	durationsry   r   r#   r#   r$   get_time_for_userl  s    z2PairwiseAssessmentDocumentResult.get_time_for_userc                 C   s   t t}d}| jjd|d}d}|j| D ]X}|d }|d }|d }|d }	|d	 }
d
|d |d }|| ||	|
||f q*|S )Nri   CHKTrT   item__itemType__in)item__target1IDr   r   r   rU   item__itemID*item__metadata__market__sourceLanguageCode*item__metadata__market__targetLanguageCoder   rO   r   rE      {0}-{1}      )r   listrZ   r[   r\   rn   r   )r   system_scoresvalue_typesqsvalue_namesry   systemIDr   r   annotatorIDr   marketIDr#   r#   r$   get_system_annotationsw  s    
z7PairwiseAssessmentDocumentResult.get_system_annotationsc                    s  ddl m  tt}| jjdd}d}|j| D ]4}|d  dkrFq0|d }|d }|| | q0tt}|D ]J}t	jj
|d	}d
 fdd|j D }	|	sd}	||	 ||  qri }
|D ]H}t|| }d}|D ]}|| |dkr|d7 }q|t|f|
|< q|
S )Nr   r   TrT   )rU   r   r   rO   r   r   r   ;c                    s    g | ]}|j   kr|j qS r#   r   rI   r   r   r#   r$   r     s   zRPairwiseAssessmentDocumentResult.compute_accurate_group_status.<locals>.<listcomp>NoGroupInforj   )Dashboard.modelsr   r   r  rZ   r[   r\   r   r   r   r   joingroupsrk   extendr]   r   rH   )r   user_statusr  r  ry   r  taskIDgroup_statusr^   
usergroups
group_hits
group_nametask_idscompleted_taskstask_idr#   r   r$   compute_accurate_group_status  s<    

z>PairwiseAssessmentDocumentResult.compute_accurate_group_statusc           $         s*  ddl m  tt}i }| jjdd}d}|j| D ]T}|d }|d }|d }	|d }
|d	 }|d
 }tt|t| d}|d }|d }d	|d |d }|d }|d }|d }|d }|d }|d }||kr|| d }|| d }|| d }nPt
jj|d}|j}|j}d fdd|j D }|sFd}|||f||< ||d |  ||||||||	|
|||||||f q4|}dg}|D ]0}|| D ] }|ddd |D  qqddlm} dd lm}  || d!|}!t|!d"&}"|D ]}#|"|# |"d# q W 5 Q R X d S )$Nr   r   Tr  )item__targetIDr   item__target2IDr   r   r   rU   r
  r  r  "item__metadata__market__domainNamer   r   task__campaign__campaignNameitem__documentIDr   rO   r   rE   r  r  r     r     r   
                  r   r  c                    s    g | ]}|j   kr|j qS r#   r  r   r   r#   r$   r     s   zQPairwiseAssessmentDocumentResult.dump_all_results_to_csv_file.<locals>.<listcomp>r  -ztaskID,segmentID,username,email,groups,system1ID,score1,system2ID,score2,startTime,endTime,durationInSeconds,itemType,campaignName,documentID,isCompleteDocument,c                 S   s   g | ]}t |qS r#   r<   r   ar#   r#   r$   r     s     r  BASE_DIRmediaw
)r  r   r   r  rZ   r[   r\   r   floatrn   r   r   usernameemailr  r   rk   r   os.pathAppraise.settingsr@  openwrite)$r   csv_filer  r   r  r  ry   	system1IDr   	system2IDr   r   r   r   r  r   r  
domainNamero   r#  campaignNamer-   r/   rE  	useremailr%  r^   r   slir  r@  media_file_pathoutfilecr#   r   r$   dump_all_results_to_csv_file  s    

"
z=PairwiseAssessmentDocumentResult.dump_all_results_to_csv_filec                 C   s$  t t}| jjdd}d}|j| D ]}||d kr$||d kr$||d ksNq$|d }|d }	|d	 }
|d
 }|d }|d }tt|t| d}|d }|d }d|d |d }|d }|d }|d }|d }tjj	|d}|j
}|j}||d |  |||||	|
|||||f q$|S )NTr  )r	  r   r-  r   r   r   rU   r
  r  r  r.  r   r0  r   r3  r2  r   r   rO   r   rE   r  r  r  r1  r  r4  r5  r6  r   r9  )r   r  rZ   r[   r\   r   rD  rn   r   r   rE  rF  r   )r   srcCodetgtCodedomainr  r  r  ry   rL  r   rM  r   r   r   r   r  r   r  rN  ro   r-   r/   r^   rE  rP  r#   r#   r$   get_csv(  sV    


z(PairwiseAssessmentDocumentResult.get_csvFc              	   C   s   |  |||}dg}|r(d|d  |d< |D ]@}|| D ]2}	|sL|	dd  n|	}
|ddd |
D  q8q,ddlm} dd	lm} ||d
|}t|d$}|D ]}|| |d qW 5 Q R X d S )Nz_username,email,segmentID,score1,score2,durationInSeconds,itemType,documentID,isCompleteDocumentz	systemID,r   rO   r:  c                 S   s   g | ]}t |qS r#   r;  r<  r#   r#   r$   r   u  s     z>PairwiseAssessmentDocumentResult.write_csv.<locals>.<listcomp>r>  r?  rA  rB  rC  )r[  r   r  rG  rH  r@  rI  rJ  )r   rX  rY  rZ  csvFileallDatar   rQ  rR  rS  er  r@  rT  rU  rV  r#   r#   r$   	write_csvi  s     
z*PairwiseAssessmentDocumentResult.write_csvc                 C   s   t t}d}| jjd|d}|r,|j|d}d}|j| D ]}|d d}|d d}|d }	|d	 }	|d
 }
|d }|d }|d }|D ]}|| |	|
||f q|D ]}|| |	|||f qq:|S )Nr  Tr  task__campaign__id)r	  r-  r
  r   r   r0  r   r   +rO   r   rE   r  r  r  )r   r  rZ   r[   r\   rG   r   )r   campaign_idr  r  r  r  ry   system1_idssystem2_ids
segment_idr   r   r-   r/   	system_idr#   r#   r$   get_system_scores  s0    	

z2PairwiseAssessmentDocumentResult.get_system_scoresc                 C   s  g }d}|r|d7 }| j jd|d}|r4|j|d}|sD|jdd}d}	|rT|	d }	|r`|	d	 }	|j|	 D ]J}
|
d
 |
d |
d |
d |
d |
d |
d |
d |
d f	|
dd  |
d
 |
d |
d |
d |
d |
d |
d |
d |
d f	|
dd  g}|r"|d
 d|d
< |d d|d< |D ]}|d d kr<q&|d
 }|d }|r|d}|D ](}|f|f |dd   }|| q`n&|}|f|f |dd   }|| q&qj|S )Nr  )BADREFTr  r`  )createdBy__is_active)createdBy__usernamer	  r-  r
  r   r  r  r   r   r0  r   )r   r   )task__batchNorV   r   rO   rE   r  r  r  r1  r   r3  r4  r   r2  rb  )r   )rO   )rZ   r[   r\   rG   r   )r   rc  extended_csvexpand_multi_sysinclude_inactiveadd_batch_infosystem_data
item_typesr  attributes_to_extract_resultr_   ry   user_idsys_ids
system_idsrg  datar#   r#   r$   get_system_data  st    	





z0PairwiseAssessmentDocumentResult.get_system_dataNrE   c              	      s   | j d d}d}dd |D dd |D  }i }|D ]4 i | <  fdd|D D ]}|| |  |< qRq4i }|D ] tdd |   D }	g }
|  D ]>}|  | }t||	 }|
|t|t|t| |f qtt|
fdd	d
d| < qr|S )N)rc  )csdefilvtrr  ruzhc                 S   s   g | ]}d  |qS )zen-{0}rn   r   r#   r#   r$   r   #  s     zFPairwiseAssessmentDocumentResult.get_system_status.<locals>.<listcomp>c                 S   s   g | ]}d  |qS )z{0}-enr  r   r#   r#   r$   r   #  s    c                    s   g | ]} |kr|qS r#   r#   r   )r   r#   r$   r   *  s      c                 S   s   g | ]}t |qS r#   )rH   r   r#   r#   r$   r   /  s     c                    s   |   S r;   r#   )r   )
sort_indexr#   r$   <lambda>7      zDPairwiseAssessmentDocumentResult.get_system_status.<locals>.<lambda>T)keyreverse)rh  r  r   r   rH   r  sorted)r   rc  r  r  non_english_codescodesry  r  output_datatotal_annotationsoutput_localr   zr#   )r   r  r$   get_system_status  s,    $
z2PairwiseAssessmentDocumentResult.get_system_statusc                 C   s*   | j jdd||djddd}tt|S )NFT)rS   rT   rU   task__campaignrV   rW   )rZ   r[   r\   rH   r]   )r   r^   rb   r_   r#   r#   r$   'completed_results_for_user_and_campaign<  s     zHPairwiseAssessmentDocumentResult.completed_results_for_user_and_campaign)T)F)FTFF)NrE   )"r%   r&   r'   r(   r   r   r+   r   r   
FloatFieldr   r   r   r   r   rh   r1   rR   r   r   r   r   r   r  r  r  r+  rW  r[  r_  rh  rz  r  r  r#   r#   r#   r$   rY     s     	




*
g
@
+    h&rY   )$r(   r   collectionsr   jsonr   zipfiler   r   django.contrib.auth.modelsr   	django.dbr   django.utils.textr   r,   django.utils.translationr	   r+   Appraise.utilsr
   r  r   ZEvalData.models.base_modelsr   r   r   r   r   r*   r%   rq   r   registerr1   rY   r#   r#   r#   r$   <module>   s2   
   ]