U
    ben                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ eedZejG dd deZ G dd deZ!dS )z>
Appraise evaluation framework

See LICENSE for usage details
    N)defaultdict)loads)
is_zipfile)ZipFile)User)models)format_lazy)ugettext_lazy)_get_loggerLANGUAGE_CODES_AND_NAMES)AnnotationTaskRegistry)BaseMetadata)MAX_REQUIREDANNOTATIONS_VALUE)seconds_to_timedelta)TextPair)namec                
   @   sB  e Zd ZdZejddejddeddZej	e
dded	d
ZejedeededdZej	eddddededdZejededdZejdejdddddeddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd9d*d+Zed,d- Zed:d/d0Z ed1d2 Z!ed3d4 Z"d5d6 Z#d7d8 Z$d.S );DirectAssessmentTaskz5
    Models a direct assessment evaluation task.
    zCampaign.CampaignTz %(app_label)s_%(class)s_campaign%(app_label)s_%(class)ssCampaigndb_index	on_deleterelated_namerelated_query_nameverbose_namez%(app_label)s_%(class)s_itemsItems)r   r   r   zRequired annotationsz(value in range=[1,{value}]))valuer   	help_textz"%(app_label)s_%(class)s_assignedTozAssigned toz(users working on this task))blankr   r   r   r   r   zBatch numberz	(1-based)zCampaign.CampaignDataz!%(app_label)s_%(class)s_batchDataz
Batch data)r   r    r   nullr   r   r   c                 C   s
   t | jS N)str	batchDataself r'   P/var/www/rival/public_html/translation-eval/EvalData/models/direct_assessment.pydataNameY   s    zDirectAssessmentTask.dataNamec                 C   s   t | j jjS r"   )r#   itemsfirstmetadatamarketr%   r'   r'   r(   
marketName\   s    zDirectAssessmentTask.marketNamec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S N_   r   	r#   r*   r+   r,   r-   splitlenr   keysr&   tokensr'   r'   r(   marketSourceLanguage_   s    z)DirectAssessmentTask.marketSourceLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S r/   r2   r6   r'   r'   r(   marketSourceLanguageCodee   s    z-DirectAssessmentTask.marketSourceLanguageCodec                 C   sD   t | j jjd}t|dkr@|d t kr@t|d  S d S Nr0   r1      r2   r6   r'   r'   r(   marketTargetLanguagek   s    z)DirectAssessmentTask.marketTargetLanguagec                 C   s@   t | j jjd}t|dkr<|d t kr<|d S d S r:   r2   r6   r'   r'   r(   marketTargetLanguageCodeq   s    z-DirectAssessmentTask.marketTargetLanguageCodec                 C   s*   t jj| dd|djddd}tt|S )NFT)task	activated	completed	createdByitem_idflat)DirectAssessmentResultobjectsfiltervalues_listr4   set)r&   userresultsr'   r'   r(   completed_items_for_userw   s        z-DirectAssessmentTask.completed_items_for_userc                 C   s&   ddl m} |jj|| jd}| S )Nr   )TrustedUser)rJ   campaign)Campaign.modelsrM   rF   rG   rN   exists)r&   rJ   rM   trusted_userr'   r'   r(   is_trusted_user~   s    z$DirectAssessmentTask.is_trusted_userFc                 C   s  |  |}d }d}| j dD ]T}tjj|dd|d}| sntd	|j
|j| |rf|jdkrn|} qx|d7 }q"|std		| j
 tjj| ddd
jddd}tt|}	d}
|rd}
| j|
 }td	|	| |	|krtd	| j
 |   |   |r||fS |S )Nr   idFT)itemr?   r@   rA   z-identified next item: {0}/{1} for trusted={2}TGTr;   zNo next item found for task {0})r>   r?   r@   rB   rC   d   F   zUnique annotations={0}/{1}zCompleting task {0})rR   r*   allorder_byrE   rF   rG   rP   printformatrS   itemTypeLOGGERinforH   r4   rI   requiredAnnotationscompletesave)r&   rJ   return_completed_itemsrQ   	next_itemcompleted_itemsrT   resultannotationsuniqueAnnotationsrequired_user_results_total_requiredr'   r'   r(   next_item_for_user   s^    
     
   


z'DirectAssessmentTask.next_item_for_userc                 C   s<   | j j|ddddD ]}||}|d k	r|  S qd S )NTF)
assignedTor?   r@   z-id)rF   rG   rY   rj   )clsrJ   active_taskrc   r'   r'   r(   get_task_for_user   s      

z&DirectAssessmentTask.get_task_for_userNc                 C   s   | j jdd|d}|r"|j|d}|dD ]2}|j }||jk r,|r,||j kr,|  S q,d S ]:}t| |d pzd	}||d
 k rd| j j	|d	 d  S qdd S ]:}|j
 jj}|j|ksq|j }||jk r|  S qd S )NTF)r?   r@   +items__metadata__market__targetLanguageCode)rN   rS   r_   rk      r   r;   pk)rF   rG   rY   rk   countr_   rX   rH   rZ   getr*   r+   r,   r-   targetLanguageCode)rl   coderN   rJ   active_tasksrm   active_usersr-   r'   r'   r(   get_next_free_task_for_language   s@    


       



z4DirectAssessmentTask.get_next_free_task_for_languagec                 C   s   |  ||S r"   )ry   )rl   rv   rN   r'   r'   r(   ,get_next_free_task_for_language_and_campaign   s    zADirectAssessmentTask.get_next_free_task_for_language_and_campaignc                 C   s  |j }|jj}|j}d}|drt|sBd|}	t|	 dS t|}
dd |
	 D }|D ]4}|

|d}tjdkrt|}q`t|dd}q`ntt|
 dd}d	d
lm} | }d	}d	}d	}|D ]}|d	kr||krd|}	t|	 | }t||   dS t||d d  g }|d D ]}t|d }t|d }||krnt||d  |}||krt||d d |}t|d |d |d |d ||d |d d}|| q6t|dksdt|}	t|	 q|d7 }|jj|ddi |  t||d d |d d ||d}|  |jj|  |  dt||d d }	t|	 qd||}	t|	 | }t||  dS )zQ
        Creates new DirectAssessmentTask instances based on JSON input.
        Nz.zipz!Batch {0} not a valid ZIP archivec                 S   s   g | ]}| d r|qS )z.json)endswith.0xr'   r'   r(   
<listcomp>  s     
 z9DirectAssessmentTask.import_from_json.<locals>.<listcomp>zutf-8)r1   	   r   )encodingr   )datetimez'Stopping after max_count={0} iterationsr>   batchNor*   targetID
targetTextsourceID
sourceTextitemIDr\   )r   r   r   r   rA   r   r\   rV   z)Expected 100 items for task but found {0}r;   bulkFr_   )rN   r_   r   r$   rA   z&Success processing batch {0}, task {1}zMax length ID={0}, text={1})r,   dataFiler   r{   r   r[   r]   warnr   namelistreaddecodesysversion_infor   r#   r   nowr^   rZ   r4   encoder   appendtextpair_setaddra   r   r*   )rl   rN   
batch_user
batch_data	max_count
batch_meta
batch_name
batch_file
batch_json_msg	batch_zipbatch_json_filesbatch_json_filebatch_contentr   t1current_countmax_length_idmax_length_text
batch_taskt2	new_itemsrT   current_length_idcurrent_length_textnew_itemnew_taskr'   r'   r(   import_from_json   s    









	


 

z%DirectAssessmentTask.import_from_jsonc                 C   sD   t | dr| j sdS t | ds&dS | jD ]}| s, dS q,dS )zS
        Validates the current DA task, checking campaign and items exist.
        rN   Fr*   T)hasattrrN   is_validr*   )r&   rT   r'   r'   r(   r   f  s    

zDirectAssessmentTask.is_validc                 C   s   d | jj| j| jS )Nz{0}.{1}[{2}])r[   	__class____name__rN   rS   r%   r'   r'   r(   _generate_str_namev  s    z'DirectAssessmentTask._generate_str_name)F)NN)%r   
__module____qualname____doc__r   
ForeignKeyPROTECTr0   rN   ManyToManyFieldr   r*   PositiveSmallIntegerFieldfr   r_   r   rk   PositiveIntegerFieldr   r$   r)   r.   r8   r9   r<   r=   rL   rR   rj   classmethodrn   ry   rz   r   r   r   r'   r'   r'   r(   r       s   	

 
3

+

sr   c                
   @   s>  e Zd ZdZejededdZejededdZ	ejededdZ
ejedejd	d
eddZejedddejdd
eddZdd Zdd Zdd Zed1ddZedd Zedd Zedd Zedd Zed d! Zed"d# Zed2d%d&Zed'd( Zed3d)d*Zed4d-d.Zed/d0 Z d+S )5rE   z7
    Models a direct assessment evaluation result.
    Scorez(value in range=[1,100])r   z
Start timez(in seconds)zEnd timeTz%(app_label)s_%(class)s_itemr   Itemr   z%(app_label)s_%(class)s_taskTask)r    r   r!   r   r   r   r   c                 C   s   d | jj| j| jS )Nz{0}.{1}={2})r[   r   r   rT   scorer%   r'   r'   r(   r     s    z)DirectAssessmentResult._generate_str_namec                 C   s   | j | j }t|dS )Nr;   )end_time
start_timeround)r&   dr'   r'   r(   duration  s    zDirectAssessmentResult.durationc                 C   s   | j jS r"   )rT   r\   r%   r'   r'   r(   	item_type  s    z DirectAssessmentResult.item_typec                 C   s0   | j j|ddd}|r(|d  S | S )NFTrA   r?   r@   item__id)rF   rG   rH   distinctrs   )rl   rJ   unique_only_queryr'   r'   r(   get_completed_for_user  s    z-DirectAssessmentResult.get_completed_for_userc                 C   sx   t t}| jj|dddddD ]*}|d  dkr8q"||d   d7  < q"t| }td	d
 | D }||fS )NFTr   task__iditem__itemTyper;   tgtr   c                 S   s   g | ]}|d kr|qS )rW   r'   r|   r'   r'   r(   r     s      zBDirectAssessmentResult.get_hit_status_for_user.<locals>.<listcomp>)	r   intrF   rG   rH   lowerr4   r5   values)rl   rJ   	user_data	user_item
total_hitscompleted_hitsr'   r'   r(   get_hit_status_for_user  s       z.DirectAssessmentResult.get_hit_status_for_userc                 C   sB   | j j|ddd}g }|D ]}|j|j }|| qtt|S )NFTr   )rF   rG   r   r   r   r   sum)rl   rJ   rK   	durationsre   r   r'   r'   r(   get_time_for_user  s    z(DirectAssessmentResult.get_time_for_userc                 C   s~   t t}d}| jjd|d}d}|j| D ]N}|d }|d }|d }|d }	d	|d
 |d }
||
 |||	|f q*|S )NrU   CHKTr@   item__itemType__in)item__targetIDr   rA   item__itemID*item__metadata__market__sourceLanguageCode*item__metadata__market__targetLanguageCoder   r;   rp   r1   {0}-{1}      )r   listrF   rG   rH   r[   r   )rl   system_scoresvalue_typesqsvalue_namesre   systemIDr   annotatorID	segmentIDmarketIDr'   r'   r(   get_system_annotations  s    z-DirectAssessmentResult.get_system_annotationsc                    s  ddl m  tt}| jjdd}d}|j| D ]4}|d  dkrFq0|d }|d }|| | q0tt}|D ]J}t	jj
|d	}d
 fdd|j D }	|	sd}	||	 ||  qri }
|D ]H}t|| }d}|D ]}|| |dkr|d7 }q|t|f|
|< q|
S )Nr   r   Tr@   )rA   r   r   r;   r   rp   rq   ;c                    s    g | ]}|j   kr|j qS r'   r   r5   r|   r   r'   r(   r     s   zHDirectAssessmentResult.compute_accurate_group_status.<locals>.<listcomp>NoGroupInforW   )Dashboard.modelsr   r   r   rF   rG   rH   r   r   r   rt   joingroupsrX   extendrI   rs   r4   )rl   user_statusr   r   re   r   taskIDgroup_statusrJ   
usergroups
group_hits
group_nametask_idscompleted_taskstask_idr'   r   r(   compute_accurate_group_status  s<    

z4DirectAssessmentResult.compute_accurate_group_statusc                     s   ddl m  tt}i }| jjdd}d}|j| D ]*}|d }|d }|d }	|d }
tt|
t|	 d}|d	 }|d
 }d	|d |d }|d }|d }|d }|d }||kr|| d }|| d }|| d }nPt
jj|d}|j}|j}d fdd|j D }|s$d}|||f||< ||d |  ||||||||	|
|||f q4|}dg}|D ]0}|| D ] }|ddd |D  q|qpddlm} ddlm} ||d|}t|d&}|D ]}|| |d qW 5 Q R X d S ) Nr   r   Tr   )r   r   r   r   rA   r   r   r   "item__metadata__market__domainNamer   r   task__campaign__campaignNamer;   rp   r1   r   r   r            r   
      rq   r   c                    s    g | ]}|j   kr|j qS r'   r   r|   r   r'   r(   r   A  s   zGDirectAssessmentResult.dump_all_results_to_csv_file.<locals>.<listcomp>r   -zotaskID,systemID,username,email,groups,segmentID,score,startTime,endTime,durationInSeconds,itemType,campaignName,c                 S   s   g | ]}t |qS r'   r#   r}   ar'   r'   r(   r   d  s     r   BASE_DIRmediaw
)r   r   r   r   rF   rG   rH   r   floatr[   r   rt   usernameemailr   r   rX   r   os.pathAppraise.settingsr  openwrite) rl   csv_filer   r   r   r   re   r   r   r   r   r   r   r   r   
domainNamer\   r   campaignNamer  	useremailr   rJ   r~   slir   r  media_file_pathoutfilecr'   r   r(   dump_all_results_to_csv_file  sz    
"
z3DirectAssessmentResult.dump_all_results_to_csv_filec              
   C   s   t t}| jjdd}d}|j| D ]}||d kr$||d kr$||d ksNq$|d }|d }	|d	 }
|d
 }tt|t|
 d}|d }|d }d|d |d }|d }|d }tjj	|d}|j
}|j}||d |  |||||	||f q$|S )NTr   )
r   r   r   r   rA   r   r   r   r  r   r  r  r  r   r;   rp   r1   r   r   r   r   rq   r	  )r   r   rF   rG   rH   r   r  r[   r   rt   r  r  r   )rl   srcCodetgtCodedomainr   r   r   re   r   r   r   r   r   r   r   r   r  r\   rJ   r  r  r'   r'   r(   get_csvo  sF    


zDirectAssessmentResult.get_csvFc              	   C   s   |  |||}dg}|r(d|d  |d< |D ]@}|| D ]2}	|sL|	dd  n|	}
|ddd |
D  q8q,ddlm} dd	lm} ||d
|}t|d$}|D ]}|| |d qW 5 Q R X d S )Nz9username,email,segmentID,score,durationInSeconds,itemTypez	systemID,r   r;   r
  c                 S   s   g | ]}t |qS r'   r  r  r'   r'   r(   r     s     z4DirectAssessmentResult.write_csv.<locals>.<listcomp>r  r  r  r  r  )r)  r   r   r  r  r  r  r  )rl   r&  r'  r(  csvFileallDatar~   r  r   r!  er   r  r"  r#  r$  r'   r'   r(   	write_csv  s    
z DirectAssessmentResult.write_csvc                 C   s~   t t}d}| jjd|d}|r,|j|d}d}|j| D ]>}|d d}|d }|d	 }	|D ]}
||
 ||	f q`q:|S )
Nr   Tr   task__campaign__id)r   r   r   r   +r;   rp   )r   r   rF   rG   rH   r3   r   )rl   campaign_idr   r   r   r   re   
system_ids
segment_idr   	system_idr'   r'   r(   get_system_scores  s    z(DirectAssessmentResult.get_system_scoresc                 C   s   g }d}|r|d7 }| j jd|d}|r4|j|d}|sD|jdd}d}	|rT|	d }	|r`|	d	 }	|j|	 D ]}
|
d
 }|
d dd}|dd}|r|d}|D ]&}|f|f |
dd   }|| qqj|}|f|f |
dd   }|| qj|S )Nr   )BADREFTr   r.  )createdBy__is_active)createdBy__usernamer   r   r   r   r   r   )r   r   )task__batchNorB   r   r;   zTransformer+R2LTransformer_R2LzR2L+BackR2L_Backr0  rp   )rF   rG   rH   replacer3   r   )rl   r1  extended_csvexpand_multi_sysinclude_inactiveadd_batch_infosystem_data
item_typesr   attributes_to_extractre   user_id
_fixed_idsr2  r4  datar'   r'   r(   get_system_data  s6    	

z&DirectAssessmentResult.get_system_dataNr1   c              	      s   | j d d}d}dd |D dd |D  }i }|D ]4 i | <  fdd|D D ]}|| |  |< qRq4i }|D ] tdd |   D }	g }
|  D ]>}|  | }t||	 }|
|t|t|t| |f qtt|
fdd	d
d| < qr|S )N)r1  )csdefilvtrrM  ruzhc                 S   s   g | ]}d  |qS )zen-{0}r[   r|   r'   r'   r(   r   "  s     z<DirectAssessmentResult.get_system_status.<locals>.<listcomp>c                 S   s   g | ]}d  |qS )z{0}-enrP  r|   r'   r'   r(   r   "  s    c                    s   g | ]} |kr|qS r'   r'   r|   )rv   r'   r(   r   )  s      c                 S   s   g | ]}t |qS r'   )r4   r|   r'   r'   r(   r   .  s     c                    s   |   S r"   r'   )r~   )
sort_indexr'   r(   <lambda>6      z:DirectAssessmentResult.get_system_status.<locals>.<lambda>T)keyreverse)r5  r   r   r   r4   r   sorted)rl   r1  rQ  r   non_english_codescodesrG  rT  output_datatotal_annotationsoutput_localr~   zr'   )rv   rQ  r(   get_system_status  s,    $
z(DirectAssessmentResult.get_system_statusc                 C   s*   | j jdd||djddd}tt|S )NFT)r?   r@   rA   task__campaignrB   rC   )rF   rG   rH   r4   rI   )rl   rJ   rN   rK   r'   r'   r(   'completed_results_for_user_and_campaign;  s     z>DirectAssessmentResult.completed_results_for_user_and_campaign)T)F)FTFF)Nr1   )!r   r   r   r   r   r   r0   r   
FloatFieldr   r   r   r   r   rT   r   r>   r   r   r   r   r   r   r   r   r  r%  r)  r-  r5  rH  r]  r_  r'   r'   r'   r(   rE   z  s|      	




*
\
4
    A&rE   )"r   r   collectionsr   jsonr   zipfiler   r   django.contrib.auth.modelsr   	django.dbr   django.utils.textr   r   django.utils.translationr	   r0   Appraise.utilsr
   r   r   ZEvalData.models.base_modelsr   r   r   r   r   r   r]   registerr   rE   r'   r'   r'   r(   <module>   s,   
  [