U
    ¿dx                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlT ee dZ!e"j#G dd de$Z%G dd de$Z&dS )z>
Appraise evaluation framework

See LICENSE for usage details
    N)defaultdict)loads)
format_exc)
is_zipfile)ZipFile)messages)User)ValidationError)models)format_lazy)utc)ugettext_lazy)_get_loggerLANGUAGE_CODES_AND_NAMES)*)namec                
   @   sB  e Zd ZdZejddejddeddZej	e
dded	d
ZejedeededdZej	eddddededdZejededdZejdejdddddeddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd9d*d+Zed,d- Zed:d/d0Z ed1d2 Z!ed3d4 Z"d5d6 Z#d7d8 Z$d.S );PairwiseAssessmentTaskz5
    Models a direct assessment evaluation task.
    zCampaign.CampaignTz %(app_label)s_%(class)s_campaign%(app_label)s_%(class)ssCampaigndb_index	on_deleterelated_namerelated_query_nameverbose_namez%(app_label)s_%(class)s_itemsItems)r   r   r   zRequired annotationsz(value in range=[1,{value}]))valuer   	help_textz"%(app_label)s_%(class)s_assignedTozAssigned toz(users working on this task))blankr   r   r   r   r   zBatch numberz	(1-based)zCampaign.CampaignDataz!%(app_label)s_%(class)s_batchDataz
Batch data)r   r    r   nullr   r   r   c                 C   s
   t | jS N)str	batchDataself r'   R/var/www/rival/public_html/translation-eval/EvalData/models/pairwise_assessment.pydataNameY   s    zPairwiseAssessmentTask.dataNamec                 C   s   t | j jjS r"   )r#   itemsfirstmetadatamarketr%   r'   r'   r(   
marketName\   s    z!PairwiseAssessmentTask.marketNamec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S N_   r   	r#   r*   r+   r,   r-   splitlenr   keysr&   tokensr'   r'   r(   marketSourceLanguage_   s    z+PairwiseAssessmentTask.marketSourceLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S r/   r2   r6   r'   r'   r(   marketSourceLanguageCodee   s    z/PairwiseAssessmentTask.marketSourceLanguageCodec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr0   r1      r2   r6   r'   r'   r(   marketTargetLanguagek   s    z+PairwiseAssessmentTask.marketTargetLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S r:   r2   r6   r'   r'   r(   marketTargetLanguageCodeq   s    z/PairwiseAssessmentTask.marketTargetLanguageCodec                 C   s*   t jj| dd|djddd}tt|S )NFT)task	activated	completed	createdByitem_idflat)PairwiseAssessmentResultobjectsfiltervalues_listr4   set)r&   userresultsr'   r'   r(   completed_items_for_userw   s        z/PairwiseAssessmentTask.completed_items_for_userc                 C   s&   ddl m} |jj|| jd}| S )Nr   )TrustedUser)rJ   campaign)Campaign.modelsrM   rF   rG   rN   exists)r&   rJ   rM   trusted_userr'   r'   r(   is_trusted_user~   s    z&PairwiseAssessmentTask.is_trusted_userFc                 C   s   |  |}d }d}| j dD ]V}tjj|dd|d}| sptd	|j
|j| |rh|jdrp|} qz|d7 }q"|std		| j
 tjj| ddd
jddd}tt|}	d}
|rd}
| j|
 }td	|	| |	|krtd	| j
 |   |   |r||fS |S )Nr   idFT)itemr?   r@   rA   z-identified next item: {0}/{1} for trusted={2}TGTr;   zNo next item found for task {0})r>   r?   r@   rB   rC   d   F   zUnique annotations={0}/{1}zCompleting task {0})rR   r*   allorder_byrE   rF   rG   rP   printformatrS   itemType
startswithLOGGERinforH   r4   rI   requiredAnnotationscompletesave)r&   rJ   return_completed_itemsrQ   	next_itemcompleted_itemsrT   resultannotationsuniqueAnnotationsrequired_user_results_total_requiredr'   r'   r(   next_item_for_user   s^    
     
   


z)PairwiseAssessmentTask.next_item_for_userc                 C   s<   | j j|ddddD ]}||}|d k	r|  S qd S )NTF)
assignedTor?   r@   z-id)rF   rG   rY   rk   )clsrJ   active_taskrd   r'   r'   r(   get_task_for_user   s      

z(PairwiseAssessmentTask.get_task_for_userNc                 C   s0  t d| t d| t d| | jjdd|d}t dt| |r^|j|d}|d	D ]2}|j }||jk rh|rh||j	 krh|  S qht d
 d S ]:}t | |d pd}||d k r| jj|d d  S qd S ]B}|j jj}|j|ks
 q|j }||jk  r|  S  qd S )Nz.  Looking for next free task for language: {0}z  Campaign: {0}z  User: {0}TF)r?   r@   +items__metadata__market__targetLanguageCodez!    Number of active tasks: ({0}))rN   rS   z    No next free task availabler`   rl      r   r;   pk)rZ   r[   rF   rG   r4   rY   rl   countr`   rX   rH   getr*   r+   r,   r-   targetLanguageCode)rm   coderN   rJ   active_tasksrn   active_usersr-   r'   r'   r(   get_next_free_task_for_language   sJ    


       
z6PairwiseAssessmentTask.get_next_free_task_for_languagec                 C   s   |  ||S r"   )rz   )rm   rw   rN   r'   r'   r(   ,get_next_free_task_for_language_and_campaign   s    zCPairwiseAssessmentTask.get_next_free_task_for_language_and_campaignc           #      C   s  |j }|jj}|j}d}|drt|sBd|}	t|	 dS t|}
dd |
	 D }|D ]4}|

|d}tjdkrt|}q`t|dd}q`ntt|
 dd}d	d
lm} | }d	}d	}d	}|D ]}|d	kr||krd|}	t|	 | }t||   dS td||d d  g }d	}|d D ]}|d7 }t|d }t|d }||kr~t||d  |}||krt||d d |}|d }|d	 d }|d	 d }d}d}t|dkr|d d }|d d }|dd}|dd} t|d |d ||||||d |d || d}!||! q<t|dksfd|}	t|	 q|d7 }|jj|ddi |  t||d d |d d ||d }"|"  |"jj|  |"  d!t||d d }	t|	 qd"||}	t|	 | }t||  dS )#zS
        Creates new PairwiseAssessmentTask instances based on JSON input.
        Nz.zipz!Batch {0} not a valid ZIP archivec                 S   s   g | ]}| d r|qS )z.json)endswith.0xr'   r'   r(   
<listcomp>
  s     
 z;PairwiseAssessmentTask.import_from_json.<locals>.<listcomp>zutf-8)r1   	   r   )encodingr   )datetimez'Stopping after max_count={0} iterationszLoading batch:r>   batchNor*   r;   sourceID
sourceTexttargetstargetID
targetTextcontextLeftcontextRightitemIDr\   )	segmentIDsegmentText	target1IDtarget1Text	target2IDtarget2TextrA   r   r\   r   r   rV   z)Expected 100 items for task but found {0}bulkFr`   )rN   r`   r   r$   rA   z&Success processing batch {0}, task {1}zMax length ID={0}, text={1})r,   dataFiler   r|   r   r[   r^   warnr   namelistreaddecodesysversion_infor   r#   r   nowr_   rZ   r4   encoderu   TextSegmentWithTwoTargetsappendtextsegment_setaddrb   r   r*   )#rm   rN   
batch_user
batch_data	max_count
batch_meta
batch_name
batch_file
batch_json_msg	batch_zipbatch_json_filesbatch_json_filebatch_contentr   t1current_countmax_length_idmax_length_text
batch_taskt2	new_itemsZcount_itemsrT   current_length_idcurrent_length_textZitem_targetsZitem_tgt1_idxZitem_tgt1_txtZitem_tgt2_idxZitem_tgt2_txtcontext_leftcontext_rightnew_itemnew_taskr'   r'   r(   import_from_json   s    













 

z'PairwiseAssessmentTask.import_from_jsonc                 C   sD   t | dr| j sdS t | ds&dS | jD ]}| s, dS q,dS )zS
        Validates the current DA task, checking campaign and items exist.
        rN   Fr*   T)hasattrrN   is_validr*   )r&   rT   r'   r'   r(   r     s    

zPairwiseAssessmentTask.is_validc                 C   s   d | jj| j| jS )Nz{0}.{1}[{2}])r[   	__class____name__rN   rS   r%   r'   r'   r(   _generate_str_name  s    z)PairwiseAssessmentTask._generate_str_name)F)NN)%r   
__module____qualname____doc__r
   
ForeignKeyPROTECTr0   rN   ManyToManyFieldr   r*   PositiveSmallIntegerFieldfMAX_REQUIREDANNOTATIONS_VALUEr`   r   rl   PositiveIntegerFieldr   r$   r)   r.   r8   r9   r<   r=   rL   rR   rk   classmethodro   rz   r{   r   r   r   r'   r'   r'   r(   r       s   	

 
3

2

 	r   c                
   @   sX  e Zd ZdZejededdZejddededdZej	eded	dZ
ej	ed
ed	dZejedejddeddZejedddejddeddZdd Zdd Zdd Zed3ddZedd Zedd Zedd Zed d! Zed"d# Zed$d% Zed4d'd(Zed)d* Zed5d+d,Zed6d/d0Z ed1d2 Z!d-S )7rE   zC
    Models a contrastive direct assessment evaluation result.
    z	Score (1)z(value in range=[1,100])r   Tz	Score (2))r    r!   r   r   z
Start timez(in seconds)zEnd timez%(app_label)s_%(class)s_itemr   Itemr   z%(app_label)s_%(class)s_taskTask)r    r   r!   r   r   r   r   c                 C   s   d | jj| j| j| jS )Nz{0}.{1}={2}+{3})r[   r   r   rT   score1score2r%   r'   r'   r(   r     s    z+PairwiseAssessmentResult._generate_str_namec                 C   s   | j | j }t|dS )Nr;   )end_time
start_timeround)r&   dr'   r'   r(   duration  s    z!PairwiseAssessmentResult.durationc                 C   s   | j jS r"   )rT   r\   r%   r'   r'   r(   	item_type  s    z"PairwiseAssessmentResult.item_typec                 C   s0   | j j|ddd}|r(|d  S | S )NFTrA   r?   r@   item__id)rF   rG   rH   distinctrt   )rm   rJ   unique_only_queryr'   r'   r(   get_completed_for_user  s    z/PairwiseAssessmentResult.get_completed_for_userc                 C   sx   t t}| jj|dddddD ]*}|d  dkr8q"||d   d7  < q"t| }td	d
 | D }||fS )NFTr   task__iditem__itemTyper;   tgtr   c                 S   s   g | ]}|d kr|qS )rW   r'   r}   r'   r'   r(   r     s      zDPairwiseAssessmentResult.get_hit_status_for_user.<locals>.<listcomp>)	r   intrF   rG   rH   lowerr4   r5   values)rm   rJ   	user_data	user_item
total_hitscompleted_hitsr'   r'   r(   get_hit_status_for_user  s       z0PairwiseAssessmentResult.get_hit_status_for_userc                 C   sB   | j j|ddd}g }|D ]}|j|j }|| qtt|S )NFTr   )rF   rG   r   r   r   seconds_to_timedeltasum)rm   rJ   rK   	durationsrf   r   r'   r'   r(   get_time_for_user  s    z*PairwiseAssessmentResult.get_time_for_userc                 C   s   t t}d}| jjd|d}d}|j| D ]X}|d }|d }|d }|d }	|d	 }
d
|d |d }|| ||	|
||f q*|S )NrU   CHKTr@   item__itemType__in)item__target1IDr   r   r   rA   item__itemID*item__metadata__market__sourceLanguageCode*item__metadata__market__targetLanguageCoder   r;   rq   r1      {0}-{1}      )r   listrF   rG   rH   r[   r   )rm   system_scoresvalue_typesqsvalue_namesrf   systemIDr   r   annotatorIDr   marketIDr'   r'   r(   get_system_annotations  s    
z/PairwiseAssessmentResult.get_system_annotationsc                    s  ddl m  tt}| jjdd}d}|j| D ]4}|d  dkrFq0|d }|d }|| | q0tt}|D ]J}t	jj
|d	}d
 fdd|j D }	|	sd}	||	 ||  qri }
|D ]H}t|| }d}|D ]}|| |dkr|d7 }q|t|f|
|< q|
S )Nr   r   Tr@   )rA   r   r   r;   r   rq   rr   ;c                    s    g | ]}|j   kr|j qS r'   r   r5   r}   r   r'   r(   r   (  s   zJPairwiseAssessmentResult.compute_accurate_group_status.<locals>.<listcomp>NoGroupInforW   )Dashboard.modelsr   r   r   rF   rG   rH   r   r   r   ru   joingroupsrX   extendrI   rt   r4   )rm   user_statusr   r   rf   r   taskIDgroup_statusrJ   
usergroups
group_hits
group_nametask_idscompleted_taskstask_idr'   r   r(   compute_accurate_group_status  s<    

z6PairwiseAssessmentResult.compute_accurate_group_statusc           "         s  ddl m  tt}i }| jjdd}d}|j| D ]>}|d }|d }|d }	|d }
|d	 }|d
 }tt|t| d}|d }|d }d	|d |d }|d }|d }|d }|d }||kr|| d }|| d }|| d }nPt
jj|d}|j}|j}d fdd|j D }|s4d}|||f||< ||d |  ||||||||	|
|||||f q4|}dg}|D ]0}|| D ] }|ddd |D  qqddlm} ddlm} ||d|}t|d &} |D ]}!| |! | d! qW 5 Q R X d S )"Nr   r   Tr   )r   r   item__target2IDr   r   r   rA   r   r   r   "item__metadata__market__domainNamer   r   task__campaign__campaignNamer;   rq   r1   r   r   r      r      r   
            rr   r   c                    s    g | ]}|j   kr|j qS r'   r  r}   r   r'   r(   r   q  s   zIPairwiseAssessmentResult.dump_all_results_to_csv_file.<locals>.<listcomp>r  -ztaskID,segmentID,username,email,groups,system1ID,score1,system2ID,score2,startTime,endTime,durationInSeconds,itemType,campaignName,c                 S   s   g | ]}t |qS r'   r#   r~   ar'   r'   r(   r     s     r  BASE_DIRmediaw
)r  r   r   r   rF   rG   rH   r   floatr[   r   ru   usernameemailr  r  rX   r   os.pathAppraise.settingsr!  openwrite)"rm   csv_filer   r   r   r   rf   	system1IDr   	system2IDr   r   r   r   r   r   r   
domainNamer\   r  campaignNamer&  	useremailr
  rJ   r   slir  r!  media_file_pathoutfilecr'   r   r(   dump_all_results_to_csv_file?  s    
"
z5PairwiseAssessmentResult.dump_all_results_to_csv_filec                 C   s  t t}| jjdd}d}|j| D ]}||d kr$||d kr$||d ksNq$|d }|d }	|d	 }
|d
 }|d }|d }tt|t| d}|d }|d }d|d |d }|d }|d }tjj	|d}|j
}|j}||d |  |||||	|
|||f	 q$|S )NTr   )r   r   r  r   r   r   rA   r   r   r   r  r   r  r  r   r   r;   rq   r1   r   r   r   r  r   r  rr   r  )r   r   rF   rG   rH   r   r%  r[   r   ru   r&  r'  r   )rm   srcCodetgtCodedomainr   r   r   rf   r-  r   r.  r   r   r   r   r   r   r   r/  r\   rJ   r&  r1  r'   r'   r(   get_csv  sN    


z PairwiseAssessmentResult.get_csvFc              	   C   s   |  |||}dg}|r(d|d  |d< |D ]@}|| D ]2}	|sL|	dd  n|	}
|ddd |
D  q8q,ddlm} dd	lm} ||d
|}t|d$}|D ]}|| |d qW 5 Q R X d S )NzAusername,email,segmentID,score1,score2,durationInSeconds,itemTypez	systemID,r   r;   r  c                 S   s   g | ]}t |qS r'   r  r  r'   r'   r(   r     s     z6PairwiseAssessmentResult.write_csv.<locals>.<listcomp>r  r   r"  r#  r$  )r<  r   r  r(  r)  r!  r*  r+  )rm   r9  r:  r;  csvFileallDatar   r2  r3  r4  er  r!  r5  r6  r7  r'   r'   r(   	write_csv  s    
z"PairwiseAssessmentResult.write_csvc                 C   s   t t}d}| jjd|d}|r,|j|d}d}|j| D ]p}|d d}|d d}|d	 }	|d
 }
|d }|D ]}|| |	|
f qv|D ]}|| |	|f qq:|S )Nr   Tr   task__campaign__id)r   r  r   r   r   r   +r;   rq   r1   r   )r   r   rF   rG   rH   r3   r   )rm   campaign_idr   r   r   r   rf   Zsystem1_idsZsystem2_ids
segment_idr   r   	system_idr'   r'   r(   get_system_scores  s"    z*PairwiseAssessmentResult.get_system_scoresc              
   C   s  g }d}|r|d7 }| j jd|d}|r4|j|d}|sD|jdd}d}	|rT|	d }	|r`|	d	 }	|j|	 D ]2}
|
d
 |
d |
d |
d |
d |
d |
d f|
dd  |
d
 |
d |
d |
d |
d |
d |
d f|
dd  g}|r
|d
 d|d
< |d d|d< |D ]}|d d kr$q|d
 }|d }|rt|d}|D ](}|f|f |dd   }|| qHn&|}|f|f |dd   }|| qqj|S )Nr   )BADREFTr   rA  )createdBy__is_active)	createdBy__usernamer   r  r   r   r   r   r   r   )r   r   )task__batchNorB   r   r;   r1   r   r   r   r  r   rq   r  rC  )r   )r;   )rF   rG   rH   r3   r   )rm   rD  extended_csvexpand_multi_sysinclude_inactiveadd_batch_infosystem_data
item_typesr   attributes_to_extract_resultrK   rf   user_idZsys_ids
system_idsrF  datar'   r'   r(   get_system_data  sl    	



z(PairwiseAssessmentResult.get_system_dataNr1   c              	      s   | j d d}d}dd |D dd |D  }i }|D ]4 i | <  fdd|D D ]}|| |  |< qRq4i }|D ] tdd |   D }	g }
|  D ]>}|  | }t||	 }|
|t|t|t| |f qtt|
fdd	d
d| < qr|S )N)rD  )csdefilvtrr]  ruzhc                 S   s   g | ]}d  |qS )zen-{0}r[   r}   r'   r'   r(   r     s     z>PairwiseAssessmentResult.get_system_status.<locals>.<listcomp>c                 S   s   g | ]}d  |qS )z{0}-enr`  r}   r'   r'   r(   r     s    c                    s   g | ]} |kr|qS r'   r'   r}   )rw   r'   r(   r     s      c                 S   s   g | ]}t |qS r'   )r4   r}   r'   r'   r(   r     s     c                    s   |   S r"   r'   )r   )
sort_indexr'   r(   <lambda>      z<PairwiseAssessmentResult.get_system_status.<locals>.<lambda>T)keyreverse)rG  r   r   r   r4   r   sorted)rm   rD  ra  r   non_english_codescodesrW  rd  output_datatotal_annotationsoutput_localr   zr'   )rw   ra  r(   get_system_statusx  s,    $
z*PairwiseAssessmentResult.get_system_statusc                 C   s*   | j jdd||djddd}tt|S )NFT)r?   r@   rA   task__campaignrB   rC   )rF   rG   rH   r4   rI   )rm   rJ   rN   rK   r'   r'   r(   'completed_results_for_user_and_campaign  s     z@PairwiseAssessmentResult.completed_results_for_user_and_campaign)T)F)FTFF)Nr1   )"r   r   r   r   r
   r   r0   r   r   
FloatFieldr   r   r   r   r   rT   r   r>   r   r   r   r   r   r   r   r   r  r8  r<  r@  rG  rX  rm  ro  r'   r'   r'   r(   rE     s     	




*
a
:
"    c&rE   )'r   r   collectionsr   jsonr   	tracebackr   zipfiler   r   Zdjango.contribr   django.contrib.auth.modelsr   django.core.exceptionsr	   	django.dbr
   django.utils.textr   r   Zdjango.utils.timezoner   django.utils.translationr   r0   Appraise.utilsr   r  r   ZEvalData.models.base_modelsr   r^   AnnotationTaskRegistryregisterBaseMetadatar   rE   r'   r'   r'   r(   <module>   s,   
  w