U
    ¿d}                     @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ eedZ G dd deZ!ej"G dd deZ#G dd deZ$dS )z>
Appraise evaluation framework

See LICENSE for usage details
    N)defaultdict)loads)
is_zipfile)ZipFile)User)models)format_lazy)ugettext_lazy)_get_loggerLANGUAGE_CODES_AND_NAMES)AnnotationTaskRegistry)BaseMetadata)MAX_REQUIREDANNOTATIONS_VALUE)MAX_SEGMENTID_LENGTH)MAX_SEGMENTTEXT_LENGTH)seconds_to_timedelta)TextPairnamec                       sp   e Zd ZdZdZejeedee	deddZ
ejdedd	Zejded
d	Zdd Z fddZ  ZS )TextPairWithDomainzL
    Models a pair of two multi-line text segments with domain and URL.
    
Domainz(max. {value} characters)value)
max_lengthverbose_name	help_textTz
Source URL)blankr   z
Target URLc                 C   s   t | j| j| j| jS )zo
        Returns pairs of source and target sentences created from source
        and target segments.
        )zip
sourceTextsplitSENTENCE_DELIMITER
targetTextself r&   N/var/www/rival/public_html/translation-eval/EvalData/models/data_assessment.pyget_sentence_pairs9   s    z%TextPairWithDomain.get_sentence_pairsc                    s   t | jtdrdS t| j}|dk s.|tkr2dS t | jtdrFdS t| j}|dk s`|tkrddS | j | j}| j | j}t|t|krdS t| j	}|dk s|tkrdS t| j
}|dk s|tkrdS tt|  S )zS
        Validates the current TextPairWithDomain instance, checking text.
        zThis is a test sentence.F   )
isinstancer    typelenr   r#   stripr!   r"   	sourceURL	targetURLsuperr   is_valid)r%   _lenZ	_src_segsZ	_tgt_segs	__class__r&   r'   r1   D   s*    



zTextPairWithDomain.is_valid)__name__
__module____qualname____doc__r"   r   	CharFieldr   _fdocumentDomain	TextFieldr.   r/   r(   r1   __classcell__r&   r&   r3   r'   r   "   s"   r   c                
   @   sB  e Zd ZdZejddejddeddZej	e
dded	d
ZejedeededdZej	eddddededdZejededdZejdejdddddeddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd9d*d+Zed,d- Zed:d/d0Z ed1d2 Z!ed3d4 Z"d5d6 Z#d7d8 Z$d.S );DataAssessmentTaskz:
    Models a direct data assessment evaluation task.
    zCampaign.CampaignTz %(app_label)s_%(class)s_campaign%(app_label)s_%(class)ssCampaigndb_index	on_deleterelated_namerelated_query_namer   z%(app_label)s_%(class)s_itemsZItems)rE   rF   r   zRequired annotationsz(value in range=[1,{value}])r   r   r   z"%(app_label)s_%(class)s_assignedTozAssigned toz(users working on this task))r   rC   rE   rF   r   r   zBatch numberz	(1-based)zCampaign.CampaignDataz!%(app_label)s_%(class)s_batchDataz
Batch data)rD   r   rC   nullrE   rF   r   c                 C   s
   t | jS N)str	batchDatar$   r&   r&   r'   dataName   s    zDataAssessmentTask.dataNamec                 C   s   t | j jjS rI   )rJ   itemsfirstmetadatamarketr$   r&   r&   r'   
marketName   s    zDataAssessmentTask.marketNamec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr:      r   	rJ   rM   rN   rO   rP   r!   r,   r   keysr%   tokensr&   r&   r'   marketSourceLanguage   s    z'DataAssessmentTask.marketSourceLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S rR   rT   rV   r&   r&   r'   marketSourceLanguageCode   s    z+DataAssessmentTask.marketSourceLanguageCodec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr:   rS   r)   rT   rV   r&   r&   r'   marketTargetLanguage   s    z'DataAssessmentTask.marketTargetLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S rZ   rT   rV   r&   r&   r'   marketTargetLanguageCode   s    z+DataAssessmentTask.marketTargetLanguageCodec                 C   s*   t jj| dd|djddd}tt|S )NFT)task	activated	completed	createdByitem_idflat)DataAssessmentResultobjectsfiltervalues_listr,   set)r%   userresultsr&   r&   r'   completed_items_for_user   s        z+DataAssessmentTask.completed_items_for_userc                 C   s<   |j jdd rdS ddlm} |jj|| jd}| S )NAppenr   Fr   )TrustedUser)ri   campaign)groupsrf   existsZCampaign.modelsrm   re   rn   )r%   ri   rm   trusted_userr&   r&   r'   is_trusted_user   s
    z"DataAssessmentTask.is_trusted_userFc                 C   s  |  |}d }d}| j dD ]T}tjj|dd|d}| sntd	|j
|j| |rf|jdkrn|} qx|d7 }q"|std		| j
 tjj| ddd
jddd}tt|}	d}
|rd}
| j|
 }td	|	| |	|krtd	| j
 |   |   |r||fS |S )Nr   idFT)itemr^   r_   r`   z-identified next item: {0}/{1} for trusted={2}TGTr)   zNo next item found for task {0})r]   r^   r_   ra   rb   d   F   zUnique annotations={0}/{1}zCompleting task {0})rr   rM   allorder_byrd   re   rf   rp   printformatrs   itemTypeLOGGERinforg   r,   rh   requiredAnnotationscompletesave)r%   ri   Zreturn_completed_itemsrq   	next_itemcompleted_itemsrt   resultannotationsZuniqueAnnotationsZrequired_user_resultsZ_total_requiredr&   r&   r'   next_item_for_user   s^    
     
   


z%DataAssessmentTask.next_item_for_userc                 C   s<   | j j|ddddD ]}||}|d k	r|  S qd S )NTF)
assignedTor^   r_   z-id)re   rf   ry   r   )clsri   active_taskr   r&   r&   r'   get_task_for_user  s      

z$DataAssessmentTask.get_task_for_userNc                 C   s  | j jdd|d}|r|j|d}|jjdd rtj jdd||ddd	}tt}|D ]}||d
  |d  q\d}|D ]}	t	||	 dkr|d
7 }q|dkrd
|j|t	||j}
t|
 d S |dD ]2}|j }||jk r|r||j kr|  S qd S ]@}t| |d p&d}||d
 k r| j j|d d  S qd S ]B}|j jj}|j|ksvqT|j }||jk rT|  S qTd S )NTF)r^   r_   Z+items__metadata__market__targetLanguageCode)rn   rl   r   r^   r_   r`   Ztask__campaignra   task_idr)   r   rv   rS   zQUser {0} has already completed {1} tasks and created {2} results for campaign {3}rs   r   r      pk)re   rf   ro   rp   rd   rg   r   listappendr,   r{   usernamecampaignNamer}   r~   ry   r   countr   rx   rz   getrM   rN   rO   rP   targetLanguageCode)r   codern   ri   Zactive_tasksr   completed_tasksrt   Zvalidated_tasksr   _msgr   Zactive_usersrP   r&   r&   r'   get_next_free_task_for_language  sv     
	



       
z2DataAssessmentTask.get_next_free_task_for_languagec                 C   s   |  ||S rI   )r   )r   r   rn   r&   r&   r'   ,get_next_free_task_for_language_and_campaignX  s    z?DataAssessmentTask.get_next_free_task_for_language_and_campaignc                 C   s  |j }|jj}|j}d}|drt|sBd|}	t|	 dS t|}
dd |
	 D }|D ]4}|

|d}tjdkrt|}q`t|dd}q`ntt|
 dd}d	d
lm} | }d	}d	}d	}|D ]}|d	kr ||kr d|}	t|	 t|	 | }t||   dS td||d d  g }|d D ]}t|d }t|d }||krztd||d  |}||krtd||d d |}t|d |d |d |d ||d |d |d |d |d d
}|| q@t|dksdt|}	t|	 t|	 q|d7 }|jj|dd i |  t||d d! |d d ||d"}|  |jj|  |  d#t||d d }	t|	 t|	 qd$||}	t|	 t|	 | }t||  dS )%zO
        Creates new DataAssessmentTask instances based on JSON input.
        Nz.zipz!Batch {0} not a valid ZIP archivec                 S   s   g | ]}| d r|qS )z.json)endswith.0xr&   r&   r'   
<listcomp>m  s     
 z7DataAssessmentTask.import_from_json.<locals>.<listcomp>zutf-8)rS   	   r   )encodingr   )datetimez'Stopping after max_count={0} iterationszBatch name/no:r]   batchNorM   targetIDr#   zLongest target IDzLongest targetTextsourceIDr    itemIDr|   r<   r.   r/   )
r   r    r   r#   r`   r   r|   r<   r.   r/   rv   z)Expected 100 items for task but found {0}r)   bulkFr   )rn   r   r   rK   r`   z&Success processing batch {0}, task {1}zMax length ID={0}, text={1})rO   ZdataFiler   r   r   r{   r}   warnr   Znamelistreaddecodesysversion_infor   rJ   r   nowr~   rz   r,   encoder   r   textpair_setaddr   r?   rM   )r   rn   Z
batch_userZ
batch_data	max_countZ
batch_metaZ
batch_nameZ
batch_fileZ
batch_jsonr   Z	batch_zipZbatch_json_filesZbatch_json_fileZbatch_contentr   t1Zcurrent_countZmax_length_idZmax_length_textZ
batch_taskt2Z	new_itemsrt   Zcurrent_length_idZcurrent_length_textZnew_itemnew_taskr&   r&   r'   import_from_json\  s    












 



z#DataAssessmentTask.import_from_jsonc                 C   sD   t | dr| j sdS t | ds&dS | jD ]}| s, dS q,dS )zS
        Validates the current DA task, checking campaign and items exist.
        rn   FrM   T)hasattrrn   r1   rM   )r%   rt   r&   r&   r'   r1     s    

zDataAssessmentTask.is_validc                 C   s   d | jj| j| jS )Nz{0}.{1}[{2}])r{   r4   r5   rn   rs   r$   r&   r&   r'   _generate_str_name  s    z%DataAssessmentTask._generate_str_name)F)NN)%r5   r6   r7   r8   r   
ForeignKeyPROTECTr:   rn   ManyToManyFieldr   rM   PositiveSmallIntegerFieldr;   r   r   r   r   PositiveIntegerFieldr   rK   rL   rQ   rX   rY   r[   r\   rk   rr   r   classmethodr   r   r   r   r1   r   r&   r&   r&   r'   r?   g   s   	

 

3

J

r?   c                
   @   sX  e Zd ZdZejededdZejddededdZej	ededdZ
ej	ed	eddZejedejd
deddZejedddejddeddZdd Zdd Zdd Zed2ddZedd Zedd Zedd Zedd  Zed!d" Zed#d$ Zed3d&d'Zed(d) Zed4d*d+Zed5d.d/Z ed0d1 Z!d,S )6rd   z<
    Models a direct data assessment evaluation result.
    ZScorez(value in range=[1,100])rG   T)r   rH   r   r   z
Start timez(in seconds)zEnd timez%(app_label)s_%(class)s_itemr@   ZItemrB   z%(app_label)s_%(class)s_taskTask)r   rC   rH   rD   rE   rF   r   c                 C   s   d | jj| j| jS )Nz{0}.{1}={2})r{   r4   r5   rt   scorer$   r&   r&   r'   r     s    z'DataAssessmentResult._generate_str_namec                 C   s   | j | j }t|dS )Nr)   )end_time
start_timeround)r%   dr&   r&   r'   duration   s    zDataAssessmentResult.durationc                 C   s   | j jS rI   )rt   r|   r$   r&   r&   r'   	item_type$  s    zDataAssessmentResult.item_typec                 C   s0   | j j|ddd}|r(|d  S | S )NFTr`   r^   r_   Zitem__id)re   rf   rg   distinctr   )r   ri   Zunique_only_queryr&   r&   r'   get_completed_for_user'  s    z+DataAssessmentResult.get_completed_for_userc                 C   sx   t t}| jj|dddddD ]*}|d  dkr8q"||d   d7  < q"t| }td	d
 | D }||fS )NFTr   task__iditem__itemTyper)   tgtr   c                 S   s   g | ]}|d kr|qS )rw   r&   r   r&   r&   r'   r   ;  s      z@DataAssessmentResult.get_hit_status_for_user.<locals>.<listcomp>)	r   intre   rf   rg   lowerr,   rU   values)r   ri   	user_dataZ	user_itemZ
total_hitsZcompleted_hitsr&   r&   r'   get_hit_status_for_user.  s       z,DataAssessmentResult.get_hit_status_for_userc                 C   sB   | j j|ddd}g }|D ]}|j|j }|| qtt|S )NFTr   )re   rf   r   r   r   r   sum)r   ri   rj   Z	durationsr   r   r&   r&   r'   get_time_for_user?  s    z&DataAssessmentResult.get_time_for_userc                 C   s   t t}d}| jjd|d}d}|j| D ]V}|d }|d }|d }|d }	|d	 }
d
|d |d }|| ||	|
|f q*|S )Nru   CHKTr_   Zitem__itemType__in)item__targetIDr   rankr`   item__itemID*item__metadata__market__sourceLanguageCode*item__metadata__market__targetLanguageCoder   r)   r   rS      {0}-{1}      )r   r   re   rf   rg   r{   r   )r   system_scoresvalue_typesqsvalue_namesr   systemIDr   r   annotatorID	segmentIDmarketIDr&   r&   r'   get_system_annotationsJ  s    	z+DataAssessmentResult.get_system_annotationsc                    s  ddl m  tt}| jjdd}d}|j| D ]4}|d  dkrFq0|d }|d }|| | q0tt}|D ]J}t	jj
|d	}d
 fdd|j D }	|	sd}	||	 ||  qri }
|D ]H}t|| }d}|D ]}|| |dkr|d7 }q|t|f|
|< q|
S )Nr   r   Tr_   )r`   r   r   r)   r   r   r   ;c                    s    g | ]}|j   kr|j qS r&   r   rU   r   r   r&   r'   r   y  s   zFDataAssessmentResult.compute_accurate_group_status.<locals>.<listcomp>NoGroupInforw   )Dashboard.modelsr   r   r   re   rf   rg   r   r   r   r   joinro   rx   extendrh   r   r,   )r   Zuser_statusr   r   r   r   taskIDZgroup_statusri   
usergroupsZ
group_hits
group_nameZtask_idsr   r   r&   r   r'   compute_accurate_group_statuse  s<    

z2DataAssessmentResult.compute_accurate_group_statusc           !         s
  ddl m  tt}i }| jjdd}d}|j| D ]4}|d }|d }|d }	|d }
|d	 }tt|t|
 d}|d
 }|d }d	|d |d }|d }|d }|d }|d }||kr|| d }|| d }|| d }nPt
jj|d}|j}|j}d fdd|j D }|s,d}|||f||< ||d |  ||||||||	|
||||f q4|}dg}|D ]0}|| D ] }|ddd |D  qqzddlm} ddlm} ||d|}t|d&}|D ]} ||  |d  qW 5 Q R X d S )!Nr   r   Tr   )r   r   r   r   r   r`   r   r   r   "item__metadata__market__domainNamer   r   Ztask__campaign__campaignNamer)   r   rS   r   r   r   r         r   
         r   r   c                    s    g | ]}|j   kr|j qS r&   r   r   r   r&   r'   r     s   zEDataAssessmentResult.dump_all_results_to_csv_file.<locals>.<listcomp>r   -zttaskID,systemID,username,email,groups,segmentID,score,rank,startTime,endTime,durationInSeconds,itemType,campaignName,c                 S   s   g | ]}t |qS r&   rJ   r   ar&   r&   r'   r     s     r   BASE_DIRmediawr   )r   r   r   r   re   rf   rg   r   floatr{   r   r   r   emailr   ro   rx   r   os.pathAppraise.settingsr  openwrite)!r   Zcsv_filer   r   r   r   r   r   r   r   r   r   r   r   r   r   
domainNamer|   r   r   r   	useremailr   ri   r   slir   r  media_file_pathoutfilecr&   r   r'   dump_all_results_to_csv_file  s~    
"
z1DataAssessmentResult.dump_all_results_to_csv_filec                 C   s  t t}| jjdd}d}|j| D ]}||d kr$||d kr$||d ksNq$|d }|d }	|d	 }
|d
 }|d }tt|t| d}|d }|d }d|d |d }|d }|d }tjj	|d}|j
}|j}||d |  |||||	|
||f q$|S )NTr   )r   r   r   r   r   r`   r   r   r   r   r   r   r   r   r   r)   r   rS   r   r   r   r   r   r   r   )r   r   re   rf   rg   r   r  r{   r   r   r   r  r   )r   srcCodetgtCodedomainr   r   r   r   r   r   r   r   r   r   r   r   r   r  r|   ri   r   r  r&   r&   r'   get_csv  sJ    


zDataAssessmentResult.get_csvFc              	   C   s   |  |||}dg}|r(d|d  |d< |D ]@}|| D ]2}	|sL|	dd  n|	}
|ddd |
D  q8q,ddlm} dd	lm} ||d
|}t|d$}|D ]}|| |d qW 5 Q R X d S )Nz>username,email,segmentID,score,rank,durationInSeconds,itemTypez	systemID,r   r)   r   c                 S   s   g | ]}t |qS r&   r   r   r&   r&   r'   r   0  s     z2DataAssessmentResult.write_csv.<locals>.<listcomp>r   r  r  r  r   )r  r   r   r  r  r  r	  r
  )r   r  r  r  ZcsvFileZallDatar   r  r  r  er   r  r  r  r  r&   r&   r'   	write_csv&  s    
zDataAssessmentResult.write_csvc                 C   s~   t t}d}| jjd|d}|r,|j|d}d}|j| D ]>}|d d}|d }|d	 }	|D ]}
||
 ||	f q`q:|S )
Nr   Tr   Ztask__campaign__id)r   r   r   r   +r)   r   )r   r   re   rf   rg   r!   r   )r   campaign_idr   r   r   r   r   
system_idsZ
segment_idr   	system_idr&   r&   r'   get_system_scores;  s    z&DataAssessmentResult.get_system_scoresc                 C   s   g }d}|r|d7 }| j jd|d}|r4|j|d}|sD|jdd}d}	|rT|	d }	|r`|	d	 }	|j|	 D ]}
|
d
 }|
d dd}|dd}|r|d}|D ]&}|f|f |
dd   }|| qqj|}|f|f |
dd   }|| qj|S )Nr   )BADREFTr   r  )ZcreatedBy__is_active)
ZcreatedBy__usernameZitem__documentDomainZitem__targetURLr   r   r   r   r   r   r   )r   r   )Ztask__batchNora   r   r)   zTransformer+R2LZTransformer_R2LzR2L+BackZR2L_Backr  r   )re   rf   rg   replacer!   r   )r   r  Zextended_csvZexpand_multi_sysZinclude_inactiveZadd_batch_infoZsystem_dataZ
item_typesr   Zattributes_to_extractr   user_idZ
_fixed_idsr  r  datar&   r&   r'   get_system_dataT  s6    	
z$DataAssessmentResult.get_system_dataNrS   c              	      s   | j d d}d}dd |D dd |D  }i }|D ]4 i | <  fdd|D D ]}|| |  |< qRq4i }|D ] tdd |   D }	g }
|  D ]>}|  | }t||	 }|
|t|t|t| |f qtt|
fdd	d
d| < qr|S )N)r  )csdefilvtrr*  ruzhc                 S   s   g | ]}d  |qS )zen-{0}r{   r   r&   r&   r'   r     s     z:DataAssessmentResult.get_system_status.<locals>.<listcomp>c                 S   s   g | ]}d  |qS )z{0}-enr-  r   r&   r&   r'   r     s    c                    s   g | ]} |kr|qS r&   r&   r   )r   r&   r'   r     s      c                 S   s   g | ]}t |qS r&   )r,   r   r&   r&   r'   r     s     c                    s   |   S rI   r&   )r   )
sort_indexr&   r'   <lambda>      z8DataAssessmentResult.get_system_status.<locals>.<lambda>T)keyreverse)r  r   r   r   r,   r   sorted)r   r  r.  r   Znon_english_codescodesr$  r1  Zoutput_dataZtotal_annotationsZoutput_localr   zr&   )r   r.  r'   get_system_status  s,    $
z&DataAssessmentResult.get_system_statusc                 C   s*   | j jdd||djddd}tt|S )NFTr   ra   rb   )re   rf   rg   r,   rh   )r   ri   rn   rj   r&   r&   r'   'completed_results_for_user_and_campaign  s     z<DataAssessmentResult.completed_results_for_user_and_campaign)T)F)FTFF)NrS   )"r5   r6   r7   r8   r   r   r:   r   r   
FloatFieldr   r   r   r   r   rt   r?   r]   r   r   r   r   r   r   r   r   r   r  r  r  r  r%  r6  r7  r&   r&   r&   r'   rd     s      	




*
^
6
    D&rd   )%r8   r   collectionsr   jsonr   zipfiler   r   django.contrib.auth.modelsr   	django.dbr   django.utils.textr   r;   django.utils.translationr	   r:   Appraise.utilsr
   r   r   ZEvalData.models.base_modelsr   r   r   r   r   r   r   r5   r}   r   registerr?   rd   r&   r&   r&   r'   <module>   s4   
E   