U
    ¿dN                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ eedZej G dd deZ!G dd deZ"dS )z>
Appraise evaluation framework

See LICENSE for usage details
    N)defaultdict)loads)
is_zipfile)ZipFile)User)models)format_lazy)ugettext_lazy)_get_loggerLANGUAGE_CODES_AND_NAMES)AnnotationTaskRegistry)BaseMetadata)MAX_REQUIREDANNOTATIONS_VALUE)seconds_to_timedelta)TextPairWithContext)namec                
   @   sT  e Zd ZdZejddejddeddZej	e
dded	d
ZejedeededdZej	eddddededdZejededdZejdejdddddeddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd=d*d+Zd>d,d-Zd.d/ Ze d0d1 Z!e d?d3d4Z"e d5d6 Z#e d7d8 Z$d9d: Z%d;d< Z&d2S )@DirectAssessmentDocumentTaska  
    Models a direct assessment document evaluation task.

    Note: this task is, similarily to other models, a shameless copy of
    DirectAssessmentContextTask, with one additional method for retrieving all
    items belonging to the same document in the task called
    `next_document_for_user`, and a helper method `get_results_for_each_item`.
    The underlying model is the same as for
    DirectAssessmentContextTask.
    zCampaign.CampaignTz %(app_label)s_%(class)s_campaign%(app_label)s_%(class)ssCampaigndb_index	on_deleterelated_namerelated_query_nameverbose_namez%(app_label)s_%(class)s_itemsItems)r   r   r   zRequired annotationsz(value in range=[1,{value}]))valuer   	help_textz"%(app_label)s_%(class)s_assignedTozAssigned toz(users working on this task))blankr   r   r   r   r   zBatch numberz	(1-based)zCampaign.CampaignDataz!%(app_label)s_%(class)s_batchDataz
Batch data)r   r    r   nullr   r   r   c                 C   s
   t | jS N)str	batchDataself r'   Y/var/www/rival/public_html/translation-eval/EvalData/models/direct_assessment_document.pydataName`   s    z%DirectAssessmentDocumentTask.dataNamec                 C   s   t | j jjS r"   )r#   itemsfirstmetadatamarketr%   r'   r'   r(   
marketNamec   s    z'DirectAssessmentDocumentTask.marketNamec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S N_   r   	r#   r*   r+   r,   r-   splitlenr   keysr&   tokensr'   r'   r(   marketSourceLanguagef   s    z1DirectAssessmentDocumentTask.marketSourceLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S r/   r2   r6   r'   r'   r(   marketSourceLanguageCodel   s    z5DirectAssessmentDocumentTask.marketSourceLanguageCodec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr0   r1      r2   r6   r'   r'   r(   marketTargetLanguager   s    z1DirectAssessmentDocumentTask.marketTargetLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S r:   r2   r6   r'   r'   r(   marketTargetLanguageCodex   s    z5DirectAssessmentDocumentTask.marketTargetLanguageCodec                 C   s*   t jj| dd|djddd}tt|S )NFT)task	activated	completed	createdByitem_idflat)DirectAssessmentDocumentResultobjectsfiltervalues_listr4   set)r&   userresultsr'   r'   r(   completed_items_for_user~   s        z5DirectAssessmentDocumentTask.completed_items_for_userc                 C   s&   ddl m} |jj|| jd}| S )Nr   )TrustedUser)rJ   campaign)Campaign.modelsrM   rF   rG   rN   exists)r&   rJ   rM   trusted_userr'   r'   r(   is_trusted_user   s    z,DirectAssessmentDocumentTask.is_trusted_userFc                 C   s"  |  |}d }d}| j dD ]X}tjj|dd|d}| srtd	|j
|j|j| |rj|jdkrr|} q||d7 }q"|std		| j
 tjj| ddd
jddd}tt|}	d}
|rd}
| j|
 }td	|	| |	|krtd	| j
 |   |   |r||fS |S )Nr   idFT)itemr?   r@   rA   z6Identified next item: {}/{} (itemID={}) for trusted={}TGTr;   zNo next item found for task {0})r>   r?   r@   rB   rC   d   F   zUnique annotations={0}/{1}zCompleting task {0})rR   r*   allorder_byrE   rF   rG   rP   printformatrS   itemTypeitemIDLOGGERinforH   r4   rI   requiredAnnotationscompletesave)r&   rJ   return_completed_itemsrQ   	next_itemcompleted_itemsrT   resultannotationsuniqueAnnotationsrequired_user_results_total_requiredr'   r'   r(   next_item_for_user   s`    
      
   


z/DirectAssessmentDocumentTask.next_item_for_userc                 C   s  | j |dd\}}|s6|s$|g g fS ||ddg g dfS | jj|jdd}g }d}|D ]4}|| |j|jkrvd}|jrX|r q|  qX| 	||}	|s|||	fS t
dd |	D }
tjj| dd|d	 }| jjdd
 }td|||
t
|| ||||
||	|fS )z6Returns the next item and all items from its document.T)rc   r   )
documentIDrS   Fc                 S   s   g | ]}|d k	r|qS r"   r'   ).0resr'   r'   r(   
<listcomp>   s      zGDirectAssessmentDocumentTask.next_document_for_user.<locals>.<listcomp>)r>   item__isCompleteDocumentr@   rA   )isCompleteDocumentz[Completed {}/{} documents, {}/{} items in the current document, completed {} items in total)rk   r*   rG   rl   rY   appendrS   rq   clearget_results_for_each_itemr4   rE   rF   countrZ   r[   )r&   rJ   Zreturn_statisticsrd   re   Z_itemsblock_itemsZcurrent_blockrT   block_resultsZcompleted_items_in_blockZcompleted_blocksZtotal_blocksr'   r'   r(   next_document_for_user   sj    




z3DirectAssessmentDocumentTask.next_document_for_userc                 C   s   g }|D ]0}t jj|jd|| ddd }|| qt|t|krRtd t	||D ]"\}}|r\|j|j
jkr\td q\|S )z7Returns the latest result object for each item or none.T)item__idr@   rA   r>   ry   dateModifiedz/Warning: incorrect number of retrieved results!z.Warning: incorrect order of items and results!)rE   rF   rG   rS   rY   r+   rr   r4   rZ   ziprT   )r&   rv   rJ   rw   rT   rf   r'   r'   r(   rt     s&     

z6DirectAssessmentDocumentTask.get_results_for_each_itemc                 C   s<   | j j|ddddD ]}||}|d k	r|  S qd S )NTF)
assignedTor?   r@   z-id)rF   rG   rY   rk   )clsrJ   active_taskrd   r'   r'   r(   get_task_for_user  s      

z.DirectAssessmentDocumentTask.get_task_for_userNc                 C   s   | j jdd|d}|r"|j|d}|dD ]2}|j }||jk r,|r,||j kr,|  S q,d S ]:}t| |d pzd	}||d
 k rd| j j	|d	 d  S qdd S ]:}|j
 jj}|j|ksq|j }||jk r|  S qd S )NTF)r?   r@   +items__metadata__market__targetLanguageCode)rN   rS   r`   r|      r   r;   pk)rF   rG   rY   r|   ru   r`   rX   rH   rZ   getr*   r+   r,   r-   targetLanguageCode)r}   coderN   rJ   active_tasksr~   active_usersr-   r'   r'   r(   get_next_free_task_for_language(  s@    


       



z<DirectAssessmentDocumentTask.get_next_free_task_for_languagec                 C   s   |  ||S r"   )r   )r}   r   rN   r'   r'   r(   ,get_next_free_task_for_language_and_campaignT  s    zIDirectAssessmentDocumentTask.get_next_free_task_for_language_and_campaignc                 C   s  |j }|jj}|j}d}|drt|sBd|}	t|	 dS t|}
dd |
	 D }|D ]4}|

|d}tjdkrt|}q`t|dd}q`ntt|
 dd}d	d
lm} | }d	}d	}d	}|D ]}|d	kr||krd|}	t|	 | }t||   dS t||d d  d	}g }|d D ]}t|d }t|d }||krrt||d  |}||krt||d d |}t|d |d |dd|dd|d |d |dd|dd||d |d |d |d d}|| |d r:|d7 }q:t|| dkrFdt|| }	t|	 q|d7 }|D ]}||_ |  qRt||d d |d d ||d }|  |jj|  |  d!t||d d }	t|	 qd"||}	t|	 | }t||  dS )#zY
        Creates new DirectAssessmentDocumentTask instances based on JSON input.
        Nz.zipz!Batch {0} not a valid ZIP archivec                 S   s   g | ]}| d r|qS )z.json)endswithrm   xr'   r'   r(   ro   i  s     
 zADirectAssessmentDocumentTask.import_from_json.<locals>.<listcomp>zutf-8)r1   	   r   )encodingr   )datetimez'Stopping after max_count={0} iterationsr>   batchNor*   targetID
targetTextsourceID
sourceTextsourceContextLeftsourceContextRighttargetContextLefttargetContextRightr]   r\   rl   rq   )r   r   r   r   r   r   r   r   rA   r]   r\   rl   rq   r;   rV   z)Expected 100 items for task but found {0}r`   )rN   r`   r   r$   rA   z&Success processing batch {0}, task {1}zMax length ID={0}, text={1})r,   dataFiler   r   r   r[   r^   warnr   namelistreaddecodesysversion_infor   r#   r   nowr_   rZ   r4   encoder   r   rr   rb   r   r*   add)r}   rN   
batch_user
batch_data	max_count
batch_meta
batch_name
batch_file
batch_json_msg	batch_zipbatch_json_filesbatch_json_filebatch_contentr   t1current_countmax_length_idmax_length_text
batch_taskt2	doc_items	new_itemsrT   current_length_idcurrent_length_textnew_itemnew_taskr'   r'   r(   import_from_jsonX  s    



















 

z-DirectAssessmentDocumentTask.import_from_jsonc                 C   sD   t | dr| j sdS t | ds&dS | jD ]}| s, dS q,dS )zS
        Validates the current DA task, checking campaign and items exist.
        rN   Fr*   T)hasattrrN   is_validr*   )r&   rT   r'   r'   r(   r     s    

z%DirectAssessmentDocumentTask.is_validc                 C   s   d | jj| j| jS )Nz{0}.{1}[{2}])r[   	__class____name__rN   rS   r%   r'   r'   r(   _generate_str_name  s    z/DirectAssessmentDocumentTask._generate_str_name)F)T)NN)'r   
__module____qualname____doc__r   
ForeignKeyPROTECTr0   rN   ManyToManyFieldr   r*   PositiveSmallIntegerFieldfr   r`   r   r|   PositiveIntegerFieldr   r$   r)   r.   r8   r9   r<   r=   rL   rR   rk   rx   rt   classmethodr   r   r   r   r   r   r'   r'   r'   r(   r       s   	

 
3
C

+

|r   c                
   @   s>  e Zd ZdZejededdZejededdZ	ejededdZ
ejedejd	d
eddZejedddejdd
eddZdd Zdd Zdd Zed1ddZedd Zedd Zedd Zedd Zed d! Zed"d# Zed2d%d&Zed'd( Zed3d)d*Zed4d-d.Zed/d0 Z d+S )5rE   z@
    Models a direct assessment document evaluation result.
    Scorez(value in range=[1,100])r   z
Start timez(in seconds)zEnd timeTz%(app_label)s_%(class)s_itemr   Itemr   z%(app_label)s_%(class)s_taskTask)r    r   r!   r   r   r   r   c                 C   s   d | jj| j| jS )Nz{0}.{1}={2})r[   r   r   rT   scorer%   r'   r'   r(   r     s    z1DirectAssessmentDocumentResult._generate_str_namec                 C   s   | j | j }t|dS )Nr;   )end_time
start_timeround)r&   dr'   r'   r(   duration  s    z'DirectAssessmentDocumentResult.durationc                 C   s   | j jS r"   )rT   r\   r%   r'   r'   r(   	item_type  s    z(DirectAssessmentDocumentResult.item_typec                 C   s0   | j j|ddd}|r(|d  S | S )NFTrA   r?   r@   ry   )rF   rG   rH   distinctru   )r}   rJ   unique_only_queryr'   r'   r(   get_completed_for_user  s    z5DirectAssessmentDocumentResult.get_completed_for_userc                 C   sx   t t}| jj|dddddD ]*}|d  dkr8q"||d   d7  < q"t| }td	d
 | D }||fS )NFTr   task__iditem__itemTyper;   tgtr   c                 S   s   g | ]}|d kr|qS )rW   r'   r   r'   r'   r(   ro   -  s      zJDirectAssessmentDocumentResult.get_hit_status_for_user.<locals>.<listcomp>)	r   intrF   rG   rH   lowerr4   r5   values)r}   rJ   	user_data	user_item
total_hitscompleted_hitsr'   r'   r(   get_hit_status_for_user   s       z6DirectAssessmentDocumentResult.get_hit_status_for_userc                 C   sB   | j j|ddd}g }|D ]}|j|j }|| qtt|S )NFTr   )rF   rG   r   r   rr   r   sum)r}   rJ   rK   	durationsrf   r   r'   r'   r(   get_time_for_user1  s    z0DirectAssessmentDocumentResult.get_time_for_userc                 C   s~   t t}d}| jjd|d}d}|j| D ]N}|d }|d }|d }|d }	d	|d
 |d }
||
 |||	|f q*|S )NrU   CHKTr@   item__itemType__in)item__targetIDr   rA   item__itemID*item__metadata__market__sourceLanguageCode*item__metadata__market__targetLanguageCoder   r;   r   r1   {0}-{1}      )r   listrF   rG   rH   r[   rr   )r}   system_scoresvalue_typesqsvalue_namesrf   systemIDr   annotatorID	segmentIDmarketIDr'   r'   r(   get_system_annotations<  s    z5DirectAssessmentDocumentResult.get_system_annotationsc                    s  ddl m  tt}| jjdd}d}|j| D ]4}|d  dkrFq0|d }|d }|| | q0tt}|D ]J}t	jj
|d	}d
 fdd|j D }	|	sd}	||	 ||  qri }
|D ]H}t|| }d}|D ]}|| |dkr|d7 }q|t|f|
|< q|
S )Nr   r   Tr@   )rA   r   r   r;   r   r   r   ;c                    s    g | ]}|j   kr|j qS r'   r   r5   r   r   r'   r(   ro   i  s   zPDirectAssessmentDocumentResult.compute_accurate_group_status.<locals>.<listcomp>NoGroupInforW   )Dashboard.modelsr   r   r   rF   rG   rH   r   rr   r   r   joingroupsrX   extendrI   ru   r4   )r}   user_statusr   r   rf   r   taskIDgroup_statusrJ   
usergroups
group_hits
group_nametask_idscompleted_taskstask_idr'   r   r(   compute_accurate_group_statusU  s<    

z<DirectAssessmentDocumentResult.compute_accurate_group_statusc           "         s  ddl m  tt}i }| jjdd}d}|j| D ]>}|d }|d }|d }	|d }
tt|
t|	 d}|d	 }|d
 }d	|d |d }|d }|d }|d }|d }|d }|d }||kr|| d }|| d }|| d }nPt
jj|d}|j}|j}d fdd|j D }|s4d}|||f||< ||d |  ||||||||	|
|||||f q4|}dg}|D ]0}|| D ] }|ddd |D  qqddlm} ddlm} ||d|}t|d &} |D ]}!| |! | d! qW 5 Q R X d S )"Nr   r   Tr   )r   r   r   r   rA   r   r   r   "item__metadata__market__domainNamer   r   task__campaign__campaignNameitem__documentIDrp   r;   r   r1   r   r   r            r   
            r   r   c                    s    g | ]}|j   kr|j qS r'   r   r   r   r'   r(   ro     s   zODirectAssessmentDocumentResult.dump_all_results_to_csv_file.<locals>.<listcomp>r  -ztaskID,systemID,username,email,groups,segmentID,score,startTime,endTime,durationInSeconds,itemType,campaignName,documentID,isCompleteDocument,c                 S   s   g | ]}t |qS r'   r#   rm   ar'   r'   r(   ro     s     r  BASE_DIRmediaw
)r  r   r   r   rF   rG   rH   r   floatr[   r   r   usernameemailr  r  rX   rr   os.pathAppraise.settingsr!  openwrite)"r}   csv_filer   r   r   r   rf   r   r   r   r   r   r   r   r   
domainNamer\   r  campaignNamerl   rq   r&  	useremailr	  rJ   r   slir  r!  media_file_pathoutfilecr'   r   r(   dump_all_results_to_csv_file  s    
"
z;DirectAssessmentDocumentResult.dump_all_results_to_csv_filec                 C   s  t t}| jjdd}d}|j| D ]}||d kr$||d kr$||d ksNq$|d }|d }	|d	 }
|d
 }tt|t|
 d}|d }|d }d|d |d }|d }|d }|d }|d }tjj	|d}|j
}|j}||d |  |||||	||||f	 q$|S )NTr   )r   r   r   r   rA   r   r   r   r  r   r  rp   r  r  r  r   r;   r   r1   r   r   r   r   r  r  r   r  )r   r   rF   rG   rH   r   r%  r[   r   r   r&  r'  rr   )r}   srcCodetgtCodedomainr   r   r   rf   r   r   r   r   r   r   r   r   r-  r\   rl   rq   rJ   r&  r/  r'   r'   r(   get_csv  sN    


z&DirectAssessmentDocumentResult.get_csvFc              	   C   s   |  |||}dg}|r(d|d  |d< |D ]@}|| D ]2}	|sL|	dd  n|	}
|ddd |
D  q8q,ddlm} dd	lm} ||d
|}t|d$}|D ]}|| |d qW 5 Q R X d S )NzWusername,email,segmentID,score,durationInSeconds,itemType,documentID,isCompleteDocumentz	systemID,r   r;   r  c                 S   s   g | ]}t |qS r'   r  r  r'   r'   r(   ro   *  s     z<DirectAssessmentDocumentResult.write_csv.<locals>.<listcomp>r  r   r"  r#  r$  )r:  rr   r  r(  r)  r!  r*  r+  )r}   r7  r8  r9  csvFileallDatar   r0  r1  r2  er  r!  r3  r4  r5  r'   r'   r(   	write_csv  s     
z(DirectAssessmentDocumentResult.write_csvc                 C   s   t t}d}| jjd|d}|r,|j|d}d}|j| D ]R}|d d}|d }|d	 }	|d
 }
|d }|D ]}|| ||	|
|f qpq:|S )Nr   Tr   task__campaign__id)r   r   r   r  rp   r   +r;   r   r1   r   )r   r   rF   rG   rH   r3   rr   )r}   campaign_idr   r   r   r   rf   
system_ids
segment_idr   rl   rq   	system_idr'   r'   r(   get_system_scores5  s"    
z0DirectAssessmentDocumentResult.get_system_scoresc                 C   s   g }d}|r|d7 }| j jd|d}|r4|j|d}|sD|jdd}d}	|rT|	d }	|r`|	d	 }	|j|	 D ]}
|
d
 }|
d dd}|dd}|r|d}|D ]&}|f|f |
dd   }|| qqj|}|f|f |
dd   }|| qj|S )Nr   )BADREFTr   r?  )createdBy__is_active)	createdBy__usernamer   r   r   r   r   r   r  rp   )r   r   )task__batchNorB   r   r;   zTransformer+R2LTransformer_R2LzR2L+BackR2L_BackrA  r   )rF   rG   rH   replacer3   rr   )r}   rB  extended_csvexpand_multi_sysinclude_inactiveadd_batch_infosystem_data
item_typesr   attributes_to_extractrf   user_id
_fixed_idsrC  rE  datar'   r'   r(   get_system_dataX  s6    	
z.DirectAssessmentDocumentResult.get_system_dataNr1   c              	      s   | j d d}d}dd |D dd |D  }i }|D ]4 i | <  fdd|D D ]}|| |  |< qRq4i }|D ] tdd |   D }	g }
|  D ]>}|  | }t||	 }|
|t|t|t| |f qtt|
fdd	d
d| < qr|S )N)rB  )csdefilvtrr^  ruzhc                 S   s   g | ]}d  |qS )zen-{0}r[   r   r'   r'   r(   ro     s     zDDirectAssessmentDocumentResult.get_system_status.<locals>.<listcomp>c                 S   s   g | ]}d  |qS )z{0}-enra  r   r'   r'   r(   ro     s    c                    s   g | ]} |kr|qS r'   r'   r   )r   r'   r(   ro     s      c                 S   s   g | ]}t |qS r'   )r4   r   r'   r'   r(   ro     s     c                    s   |   S r"   r'   )r   )
sort_indexr'   r(   <lambda>      zBDirectAssessmentDocumentResult.get_system_status.<locals>.<lambda>T)keyreverse)rF  r   r   rr   r4   r   sorted)r}   rB  rb  r   non_english_codescodesrX  re  output_datatotal_annotationsoutput_localr   zr'   )r   rb  r(   get_system_status  s,    $
z0DirectAssessmentDocumentResult.get_system_statusc                 C   s*   | j jdd||djddd}tt|S )NFT)r?   r@   rA   task__campaignrB   rC   )rF   rG   rH   r4   rI   )r}   rJ   rN   rK   r'   r'   r(   'completed_results_for_user_and_campaign  s     zFDirectAssessmentDocumentResult.completed_results_for_user_and_campaign)T)F)FTFF)Nr1   )!r   r   r   r   r   r   r0   r   
FloatFieldr   r   r   r   r   rT   r   r>   r   r   r   r   r   r   r   r   r  r6  r:  r>  rF  rY  rn  rp  r'   r'   r'   r(   rE     s|      	




*
b
:
"    C&rE   )#r   r   collectionsr   jsonr   zipfiler   r   django.contrib.auth.modelsr   	django.dbr   django.utils.textr   r   django.utils.translationr	   r0   Appraise.utilsr
   r  r   ZEvalData.models.base_modelsr   r   r   r   Z)EvalData.models.direct_assessment_contextr   r   r^   registerr   rE   r'   r'   r'   r(   <module>   s.   
   K