U
    ¿dDv                     @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dZeedZG dd deZ ej!G dd deZ"G dd deZ#dS )z>
Appraise evaluation framework

See LICENSE for usage details
    N)defaultdict)loads)
is_zipfile)ZipFile)User)models)format_lazy)ugettext_lazy)_get_loggerLANGUAGE_CODES_AND_NAMES)AnnotationTaskRegistry)BaseMetadata)MAX_REQUIREDANNOTATIONS_VALUE)seconds_to_timedelta)TextPaird   )namec                       s   e Zd ZdZejeedeededdZ	ej
dddedd	Zejdded
dZejddeddZejddeddZejddeddZ fddZ  ZS )TextPairWithContextzG
    Models a pair of two text segments and corresponding context.
    zDocument IDz(max. {value} characters)value)
max_lengthverbose_name	help_textTFzComplete document?)blankdb_indexdefaultr   zSource context (left))r   nullr   zSource context (right)zTarget context (left)zTarget context (right)c                    s   t t|  S )zT
        Validates the current TextPairWithContext instance, checking text.
        )superr   is_validself	__class__ X/var/www/rival/public_html/translation-eval/EvalData/models/direct_assessment_context.pyr   E   s    zTextPairWithContext.is_valid)__name__
__module____qualname____doc__r   	CharFieldMAX_DOCUMENTID_LENGTH_f
documentIDBooleanFieldisCompleteDocument	TextFieldsourceContextLeftsourceContextRighttargetContextLefttargetContextRightr   __classcell__r$   r$   r"   r%   r   "   sB           r   c                
   @   sB  e Zd ZdZejddejddeddZej	e
dded	d
ZejedeededdZej	eddddededdZejededdZejdejdddddeddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd9d*d+Zed,d- Zed:d/d0Z ed1d2 Z!ed3d4 Z"d5d6 Z#d7d8 Z$d.S );DirectAssessmentContextTaskz=
    Models a direct assessment context evaluation task.
    zCampaign.CampaignTz %(app_label)s_%(class)s_campaign%(app_label)s_%(class)ssCampaignr   	on_deleterelated_namerelated_query_namer   z%(app_label)s_%(class)s_itemsItems)r<   r=   r   zRequired annotationsz(value in range=[1,{value}])r   r   r   z"%(app_label)s_%(class)s_assignedTozAssigned toz(users working on this task))r   r   r<   r=   r   r   zBatch numberz	(1-based)zCampaign.CampaignDataz!%(app_label)s_%(class)s_batchDataz
Batch data)r;   r   r   r   r<   r=   r   c                 C   s
   t | jS N)str	batchDatar    r$   r$   r%   dataName   s    z$DirectAssessmentContextTask.dataNamec                 C   s   t | j jjS r@   )rA   itemsfirstmetadatamarketr    r$   r$   r%   
marketName   s    z&DirectAssessmentContextTask.marketNamec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr,      r   	rA   rD   rE   rF   rG   splitlenr   keysr!   tokensr$   r$   r%   marketSourceLanguage   s    z0DirectAssessmentContextTask.marketSourceLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S rI   rK   rO   r$   r$   r%   marketSourceLanguageCode   s    z4DirectAssessmentContextTask.marketSourceLanguageCodec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr,   rJ      rK   rO   r$   r$   r%   marketTargetLanguage   s    z0DirectAssessmentContextTask.marketTargetLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S rS   rK   rO   r$   r$   r%   marketTargetLanguageCode   s    z4DirectAssessmentContextTask.marketTargetLanguageCodec                 C   s*   t jj| dd|djddd}tt|S )NFT)task	activated	completed	createdByitem_idflat)DirectAssessmentContextResultobjectsfiltervalues_listrM   set)r!   userresultsr$   r$   r%   completed_items_for_user   s        z4DirectAssessmentContextTask.completed_items_for_userc                 C   s&   ddl m} |jj|| jd}| S )Nr   )TrustedUser)rc   campaign)Campaign.modelsrf   r_   r`   rg   exists)r!   rc   rf   trusted_userr$   r$   r%   is_trusted_user   s    z+DirectAssessmentContextTask.is_trusted_userFc                 C   s  |  |}d }d}| j dD ]T}tjj|dd|d}| sntd	|j
|j| |rf|jdkrn|} qx|d7 }q"|std		| j
 tjj| ddd
jddd}tt|}	d}
|rd}
| j|
 }td	|	| |	|krtd	| j
 |   |   |r||fS |S )Nr   idFT)itemrX   rY   rZ   z-identified next item: {0}/{1} for trusted={2}TGTrT   zNo next item found for task {0})rW   rX   rY   r[   r\   r   F   zUnique annotations={0}/{1}zCompleting task {0})rk   rD   allorder_byr^   r_   r`   ri   printformatrl   itemTypeLOGGERinfora   rM   rb   requiredAnnotationscompletesave)r!   rc   return_completed_itemsrj   	next_itemcompleted_itemsrm   resultannotationsuniqueAnnotationsrequired_user_results_total_requiredr$   r$   r%   next_item_for_user   s^    
     
   


z.DirectAssessmentContextTask.next_item_for_userc                 C   s<   | j j|ddddD ]}||}|d k	r|  S qd S )NTF)
assignedTorX   rY   z-id)r_   r`   rq   r   )clsrc   active_taskr{   r$   r$   r%   get_task_for_user   s      

z-DirectAssessmentContextTask.get_task_for_userNc                 C   s   | j jdd|d}|r"|j|d}|dD ]2}|j }||jk r,|r,||j kr,|  S q,d S ]:}t| |d pzd	}||d
 k rd| j j	|d	 d  S qdd S ]:}|j
 jj}|j|ksq|j }||jk r|  S qd S )NTF)rX   rY   +items__metadata__market__targetLanguageCode)rg   rl   rw   r      r   rT   pk)r_   r`   rq   r   countrw   rp   ra   rr   getrD   rE   rF   rG   targetLanguageCode)r   coderg   rc   active_tasksr   active_usersrG   r$   r$   r%   get_next_free_task_for_language   s@    


       



z;DirectAssessmentContextTask.get_next_free_task_for_languagec                 C   s   |  ||S r@   )r   )r   r   rg   r$   r$   r%   ,get_next_free_task_for_language_and_campaign  s    zHDirectAssessmentContextTask.get_next_free_task_for_language_and_campaignc                 C   s  |j }|jj}|j}d}|drt|sBd|}	t|	 dS t|}
dd |
	 D }|D ]4}|

|d}tjdkrt|}q`t|dd}q`ntt|
 dd}d	d
lm} | }d	}d	}d	}|D ]}|d	kr||krd|}	t|	 | }t||   dS t||d d  d	}g }|d D ]}t|d }t|d }||krrt||d  |}||krt||d d |}t|d |d |dd|dd|d |d |dd|dd||d |d |d |d d}|| |d r:|d7 }q:t|| dkrFdt|| }	t|	 q|d7 }|D ]}||_ |  qRt||d d |d d ||d }|  |jj|  |  d!t||d d }	t|	 qd"||}	t|	 | }t||  dS )#zX
        Creates new DirectAssessmentContextTask instances based on JSON input.
        Nz.zipz!Batch {0} not a valid ZIP archivec                 S   s   g | ]}| d r|qS )z.json)endswith.0xr$   r$   r%   
<listcomp>/  s     
 z@DirectAssessmentContextTask.import_from_json.<locals>.<listcomp>zutf-8)rJ   	   r   )encodingr   )datetimez'Stopping after max_count={0} iterationsrW   batchNorD   targetID
targetTextsourceID
sourceTextr2   r3   r4   r5   itemIDrt   r.   r0   )r   r   r2   r3   r   r   r4   r5   rZ   r   rt   r.   r0   rT   r   z)Expected 100 items for task but found {0}rw   )rg   rw   r   rB   rZ   z&Success processing batch {0}, task {1}zMax length ID={0}, text={1})rF   dataFiler   r   r   rs   ru   warnr   namelistreaddecodesysversion_infor   rA   r   nowrv   rr   rM   encoder   r   appendry   r7   rD   add)r   rg   
batch_user
batch_data	max_count
batch_meta
batch_name
batch_file
batch_json_msg	batch_zipbatch_json_filesbatch_json_filebatch_contentr   t1current_countmax_length_idmax_length_text
batch_taskt2Z	doc_items	new_itemsrm   current_length_idcurrent_length_textnew_itemnew_taskr$   r$   r%   import_from_json  s    



















 

z,DirectAssessmentContextTask.import_from_jsonc                 C   sD   t | dr| j sdS t | ds&dS | jD ]}| s, dS q,dS )zS
        Validates the current DA task, checking campaign and items exist.
        rg   FrD   T)hasattrrg   r   rD   )r!   rm   r$   r$   r%   r     s    

z$DirectAssessmentContextTask.is_validc                 C   s   d | jj| j| jS )Nz{0}.{1}[{2}])rs   r#   r&   rg   rl   r    r$   r$   r%   _generate_str_name  s    z.DirectAssessmentContextTask._generate_str_name)F)NN)%r&   r'   r(   r)   r   
ForeignKeyPROTECTr,   rg   ManyToManyFieldr   rD   PositiveSmallIntegerFieldr-   r   rw   r   r   PositiveIntegerFieldr   rB   rC   rH   rQ   rR   rU   rV   re   rk   r   classmethodr   r   r   r   r   r   r$   r$   r$   r%   r7   L   s   	

 
3

+

|r7   c                
   @   s>  e Zd ZdZejededdZejededdZ	ejededdZ
ejedejd	d
eddZejedddejdd
eddZdd Zdd Zdd Zed1ddZedd Zedd Zedd Zedd Zed d! Zed"d# Zed2d%d&Zed'd( Zed3d)d*Zed4d-d.Zed/d0 Z d+S )5r^   z?
    Models a direct assessment context evaluation result.
    Scorez(value in range=[1,100])r?   z
Start timez(in seconds)zEnd timeTz%(app_label)s_%(class)s_itemr8   Itemr:   z%(app_label)s_%(class)s_taskTask)r   r   r   r;   r<   r=   r   c                 C   s   d | jj| j| jS )Nz{0}.{1}={2})rs   r#   r&   rm   scorer    r$   r$   r%   r     s    z0DirectAssessmentContextResult._generate_str_namec                 C   s   | j | j }t|dS )NrT   )end_time
start_timeround)r!   dr$   r$   r%   duration  s    z&DirectAssessmentContextResult.durationc                 C   s   | j jS r@   )rm   rt   r    r$   r$   r%   	item_type  s    z'DirectAssessmentContextResult.item_typec                 C   s0   | j j|ddd}|r(|d  S | S )NFTrZ   rX   rY   item__id)r_   r`   ra   distinctr   )r   rc   unique_only_queryr$   r$   r%   get_completed_for_user  s    z4DirectAssessmentContextResult.get_completed_for_userc                 C   sx   t t}| jj|dddddD ]*}|d  dkr8q"||d   d7  < q"t| }td	d
 | D }||fS )NFTr   task__iditem__itemTyperT   tgtr   c                 S   s   g | ]}|d kr|qS )ro   r$   r   r$   r$   r%   r     s      zIDirectAssessmentContextResult.get_hit_status_for_user.<locals>.<listcomp>)	r   intr_   r`   ra   lowerrM   rN   values)r   rc   	user_data	user_item
total_hitscompleted_hitsr$   r$   r%   get_hit_status_for_user  s       z5DirectAssessmentContextResult.get_hit_status_for_userc                 C   sB   | j j|ddd}g }|D ]}|j|j }|| qtt|S )NFTr   )r_   r`   r   r   r   r   sum)r   rc   rd   	durationsr}   r   r$   r$   r%   get_time_for_user  s    z/DirectAssessmentContextResult.get_time_for_userc                 C   s~   t t}d}| jjd|d}d}|j| D ]N}|d }|d }|d }|d }	d	|d
 |d }
||
 |||	|f q*|S )Nrn   CHKTrY   item__itemType__in)item__targetIDr   rZ   item__itemID*item__metadata__market__sourceLanguageCode*item__metadata__market__targetLanguageCoder   rT   r   rJ   {0}-{1}      )r   listr_   r`   ra   rs   r   )r   system_scoresvalue_typesqsvalue_namesr}   systemIDr   annotatorID	segmentIDmarketIDr$   r$   r%   get_system_annotations  s    z4DirectAssessmentContextResult.get_system_annotationsc                    s  ddl m  tt}| jjdd}d}|j| D ]4}|d  dkrFq0|d }|d }|| | q0tt}|D ]J}t	jj
|d	}d
 fdd|j D }	|	sd}	||	 ||  qri }
|D ]H}t|| }d}|D ]}|| |dkr|d7 }q|t|f|
|< q|
S )Nr   r   TrY   )rZ   r   r   rT   r   r   r   ;c                    s    g | ]}|j   kr|j qS r$   r   rN   r   r   r$   r%   r   /  s   zODirectAssessmentContextResult.compute_accurate_group_status.<locals>.<listcomp>NoGroupInforo   )Dashboard.modelsr   r   r   r_   r`   ra   r   r   r   r   joingroupsrp   extendrb   r   rM   )r   user_statusr   r   r}   r   taskIDgroup_statusrc   
usergroups
group_hits
group_nametask_idscompleted_taskstask_idr$   r   r%   compute_accurate_group_status  s<    

z;DirectAssessmentContextResult.compute_accurate_group_statusc           "         s  ddl m  tt}i }| jjdd}d}|j| D ]>}|d }|d }|d }	|d }
tt|
t|	 d}|d	 }|d
 }d	|d |d }|d }|d }|d }|d }|d }|d }||kr|| d }|| d }|| d }nPt
jj|d}|j}|j}d fdd|j D }|s4d}|||f||< ||d |  ||||||||	|
|||||f q4|}dg}|D ]0}|| D ] }|ddd |D  qqddlm} ddlm} ||d|}t|d &} |D ]}!| |! | d! qW 5 Q R X d S )"Nr   r   Tr   )r   r   r   r   rZ   r   r   r   "item__metadata__market__domainNamer   r   task__campaign__campaignNameitem__documentIDitem__isCompleteDocumentrT   r   rJ   r   r   r            r   
            r   r   c                    s    g | ]}|j   kr|j qS r$   r  r   r   r$   r%   r   y  s   zNDirectAssessmentContextResult.dump_all_results_to_csv_file.<locals>.<listcomp>r  -ztaskID,systemID,username,email,groups,segmentID,score,startTime,endTime,durationInSeconds,itemType,campaignName,documentID,isCompleteDocument,c                 S   s   g | ]}t |qS r$   rA   r   ar$   r$   r%   r     s     r  BASE_DIRmediaw
)r  r   r   r   r_   r`   ra   r   floatrs   r   r   usernameemailr  r  rp   r   os.pathAppraise.settingsr#  openwrite)"r   csv_filer   r   r   r   r}   r   r   r   r   r   r   r   r   
domainNamert   r  campaignNamer.   r0   r(  	useremailr
  rc   r   slir  r#  media_file_pathoutfilecr$   r   r%   dump_all_results_to_csv_fileF  s    
"
z:DirectAssessmentContextResult.dump_all_results_to_csv_filec                 C   s  t t}| jjdd}d}|j| D ]}||d kr$||d kr$||d ksNq$|d }|d }	|d	 }
|d
 }tt|t|
 d}|d }|d }d|d |d }|d }|d }|d }|d }tjj	|d}|j
}|j}||d |  |||||	||||f	 q$|S )NTr   )r   r   r   r   rZ   r   r   r   r  r   r  r  r  r  r  r   rT   r   rJ   r   r   r   r   r  r  r   r  )r   r   r_   r`   ra   r   r'  rs   r   r   r(  r)  r   )r   srcCodetgtCodedomainr   r   r   r}   r   r   r   r   r   r   r   r   r/  rt   r.   r0   rc   r(  r1  r$   r$   r%   get_csv  sN    


z%DirectAssessmentContextResult.get_csvFc              	   C   s   |  |||}dg}|r(d|d  |d< |D ]@}|| D ]2}	|sL|	dd  n|	}
|ddd |
D  q8q,ddlm} dd	lm} ||d
|}t|d$}|D ]}|| |d qW 5 Q R X d S )NzWusername,email,segmentID,score,durationInSeconds,itemType,documentID,isCompleteDocumentz	systemID,r   rT   r  c                 S   s   g | ]}t |qS r$   r  r  r$   r$   r%   r     s     z;DirectAssessmentContextResult.write_csv.<locals>.<listcomp>r!  r"  r$  r%  r&  )r<  r   r  r*  r+  r#  r,  r-  )r   r9  r:  r;  csvFileallDatar   r2  r3  r4  er  r#  r5  r6  r7  r$   r$   r%   	write_csv  s     
z'DirectAssessmentContextResult.write_csvc                 C   s   t t}d}| jjd|d}|r,|j|d}d}|j| D ]R}|d d}|d }|d	 }	|d
 }
|d }|D ]}|| ||	|
|f qpq:|S )Nr   Tr   task__campaign__id)r   r   r   r  r  r   +rT   r   rJ   r   )r   r   r_   r`   ra   rL   r   )r   campaign_idr   r   r   r   r}   
system_ids
segment_idr   r.   r0   	system_idr$   r$   r%   get_system_scores  s"    
z/DirectAssessmentContextResult.get_system_scoresc                 C   s   g }d}|r|d7 }| j jd|d}|r4|j|d}|sD|jdd}d}	|rT|	d }	|r`|	d	 }	|j|	 D ]}
|
d
 }|
d dd}|dd}|r|d}|D ]&}|f|f |
dd   }|| qqj|}|f|f |
dd   }|| qj|S )Nr   )BADREFTr   rA  )createdBy__is_active)	createdBy__usernamer   r   r   r   r   r   r  r  )r   r   )task__batchNor[   r   rT   zTransformer+R2LTransformer_R2LzR2L+BackR2L_BackrC  r   )r_   r`   ra   replacerL   r   )r   rD  extended_csvexpand_multi_sysinclude_inactiveadd_batch_infosystem_data
item_typesr   attributes_to_extractr}   user_id
_fixed_idsrE  rG  datar$   r$   r%   get_system_data  s6    	
z-DirectAssessmentContextResult.get_system_dataNrJ   c              	      s   | j d d}d}dd |D dd |D  }i }|D ]4 i | <  fdd|D D ]}|| |  |< qRq4i }|D ] tdd |   D }	g }
|  D ]>}|  | }t||	 }|
|t|t|t| |f qtt|
fdd	d
d| < qr|S )N)rD  )csdefilvtrr`  ruzhc                 S   s   g | ]}d  |qS )zen-{0}rs   r   r$   r$   r%   r   p  s     zCDirectAssessmentContextResult.get_system_status.<locals>.<listcomp>c                 S   s   g | ]}d  |qS )z{0}-enrc  r   r$   r$   r%   r   p  s    c                    s   g | ]} |kr|qS r$   r$   r   )r   r$   r%   r   w  s      c                 S   s   g | ]}t |qS r$   )rM   r   r$   r$   r%   r   |  s     c                    s   |   S r@   r$   )r   )
sort_indexr$   r%   <lambda>      zADirectAssessmentContextResult.get_system_status.<locals>.<lambda>T)keyreverse)rH  r   r   r   rM   r   sorted)r   rD  rd  r   non_english_codescodesrZ  rg  output_datatotal_annotationsoutput_localr   zr$   )r   rd  r%   get_system_statusb  s,    $
z/DirectAssessmentContextResult.get_system_statusc                 C   s*   | j jdd||djddd}tt|S )NFT)rX   rY   rZ   task__campaignr[   r\   )r_   r`   ra   rM   rb   )r   rc   rg   rd   r$   r$   r%   'completed_results_for_user_and_campaign  s     zEDirectAssessmentContextResult.completed_results_for_user_and_campaign)T)F)FTFF)NrJ   )!r&   r'   r(   r)   r   r   r,   r   
FloatFieldr   r   r   r   r   rm   r7   rW   r   r   r   r   r   r   r   r   r  r8  r<  r@  rH  r[  rp  rr  r$   r$   r$   r%   r^     s|      	




*
b
:
"    C&r^   )$r)   r   collectionsr   jsonr   zipfiler   r   django.contrib.auth.modelsr   	django.dbr   django.utils.textr   r-   django.utils.translationr	   r,   Appraise.utilsr
   r  r   ZEvalData.models.base_modelsr   r   r   r   r   r+   r&   ru   r   registerr7   r^   r$   r$   r$   r%   <module>   s0   
*  d