Skip to content

Commit

Permalink
Merge pull request #1 from Toloka/update
Browse files Browse the repository at this point in the history
Sync with changes in crowdkit, minor QOL improvements
  • Loading branch information
btseytlin authored Jun 7, 2022
2 parents 9fe8143 + 1a29205 commit d9f72bc
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 24 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ TOLOKA_API_TOKEN=<your token>
```
python toloka_monitoring/setup_toloka_project.py
```
The script will print `TOLOKA_PROJECT_ID` and `TOLOKA_POOL_ID`.
The script will print `TOLOKA_PROJECT_ID`

Put them into `toloka_monitoring/config.py`.
Put it into `toloka_monitoring/config.py`.

5. Start the API and make predictions:
```
Expand Down
Binary file modified preview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion toloka_monitoring/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

if __name__ == '__main__':
app = create_app()
app.run(port=8000, host="0.0.0.0")
app.run(port=8000, host="0.0.0.0", debug=False)
1 change: 0 additions & 1 deletion toloka_monitoring/_template_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
SQLALCHEMY_DATABASE_URI='sqlite:///db.sqlite'
TOLOKA_API_TOKEN='your token here'
TOLOKA_PROJECT_ID=00000
TOLOKA_POOL_ID=00000000
21 changes: 15 additions & 6 deletions toloka_monitoring/api/templates/monitoring.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
</div>
<div id="templates" >
<div id="statusLineTemplate" class="statusLine">
<canvas id="f1_score_chart" width="400" height="400"></canvas>
{% if metrics %}

<canvas id="f1_score_chart" width="300" height="200"></canvas>
<script>
const DATA_COUNT = 7;
const NUMBER_CFG = {count: DATA_COUNT, min: -100, max: 100};

let labels = {{ metrics['time_created_str']|tojson }};

Expand All @@ -37,10 +37,15 @@
type: 'line',
data: data,
options: {
layout: {
padding: 20
},
scales: {
x: [{
type: 'timeseries',
}],

y: {
min: 0,
max: 1.2,
},
}
},
responsive: true,
Expand All @@ -55,6 +60,10 @@
const ctx = document.getElementById('f1_score_chart').getContext('2d');
const myChart = new Chart(ctx, config);
</script>
{% else %}
<h3>No metrics in database<br/>
Run monitoring script to see a cool plot.</h3>
{% endif %}
</div>
</div>
</div>
Expand Down
15 changes: 8 additions & 7 deletions toloka_monitoring/compute_monitoring_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

import numpy as np

from crowdkit.aggregation import DawidSkene
from crowdkit.aggregation import MajorityVote
from sklearn.metrics import confusion_matrix

import toloka.client as toloka
from toloka_monitoring.api.db import init_db, db_session_factory
from toloka_monitoring.api.models import Prediction, PredictionLabel, MonitoringCounts
from toloka_monitoring.config import SQLALCHEMY_DATABASE_URI, TOLOKA_API_TOKEN, TOLOKA_POOL_ID

from toloka_monitoring.config import SQLALCHEMY_DATABASE_URI, TOLOKA_API_TOKEN, TOLOKA_PROJECT_ID
from toloka_monitoring.setup_toloka_project import create_pool

def get_predictions_for_labelling(session, limit=10):
query = session.query(Prediction).join(PredictionLabel, isouter=True)\
Expand Down Expand Up @@ -56,9 +56,9 @@ def annotate_with_toloka(predictions, toloka_client, toloka_pool_id):
answers_df = answers_df.rename(columns={
'INPUT:pred_id': 'task',
'OUTPUT:label': 'label',
'ASSIGNMENT:worker_id': 'performer',
'ASSIGNMENT:worker_id': 'worker',
})
aggregated_answers = DawidSkene(n_iter=100).fit_predict(answers_df)
aggregated_answers = MajorityVote().fit_predict(answers_df)
return aggregated_answers.to_dict()


Expand Down Expand Up @@ -102,10 +102,11 @@ def compute_monitoring_metrics():
init_db(db_uri)
session = db_session_factory()
toloka_client = toloka.TolokaClient(TOLOKA_API_TOKEN, 'PRODUCTION')
toloka_pool_id = TOLOKA_POOL_ID
project = toloka_client.get_project(TOLOKA_PROJECT_ID)
pool = create_pool(toloka_client, project.id)
predictions = get_predictions_for_labelling(session)
print(f'Annotating {len(predictions)} predictions with Toloka')
crowd_annotations = get_prediction_crowd_annotations(predictions, toloka_client, toloka_pool_id)
crowd_annotations = get_prediction_crowd_annotations(predictions, toloka_client, pool.id)
for prediction_crowd_annotation in crowd_annotations:
session.add(prediction_crowd_annotation)

Expand Down
12 changes: 5 additions & 7 deletions toloka_monitoring/setup_toloka_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import toloka.client as toloka
import toloka.client.project.template_builder as tb
from toloka.client.primitives.operators import CompareOperator
from toloka_monitoring.config import SQLALCHEMY_DATABASE_URI, TOLOKA_API_TOKEN, TOLOKA_POOL_ID
from toloka_monitoring.config import TOLOKA_API_TOKEN


def create_project():
Expand Down Expand Up @@ -80,10 +80,10 @@ def create_project():
return project


def create_pool(toloka_client, project):
def create_pool(toloka_client, project_id):
global_skill = toloka_client.get_skill("25627")
pool = toloka.Pool(
project_id=project.id,
project_id=project_id,
private_name='Monitoring pool',
may_contain_adult_content=False,
reward_per_assignment=0.01,
Expand All @@ -102,6 +102,7 @@ def create_pool(toloka_client, project):
operator=CompareOperator.GT,
value=30),
])
pool = toloka_client.create_pool(pool)
return pool


Expand All @@ -111,7 +112,4 @@ def create_pool(toloka_client, project):
project = create_project()
project = toloka_client.create_project(project)

pool = create_pool(toloka_client, project)
pool = toloka_client.create_pool(pool)

print(f'Toloka project: {project.id}, pool: {pool.id}')
print(f'Toloka project: {project.id}')

0 comments on commit d9f72bc

Please sign in to comment.