Skip to content

Commit

Permalink
Merge pull request #139 from drjova/elasticsearch-only-option
Browse files Browse the repository at this point in the history
  webstat: downloads and pageviews as events
  • Loading branch information
egabancho committed Mar 10, 2016
2 parents 83776c6 + b4ed2ac commit d424e56
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 194 deletions.
46 changes: 20 additions & 26 deletions modules/bibdocfile/lib/bibdocfile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file is part of Invenio.
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 CERN.
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -95,6 +95,7 @@
from sets import Set as set
# pylint: enable=W0622

#from invenio.webstat import register_customevent
from invenio.shellutils import escape_shell_arg, run_shell_command
from invenio.dbquery import run_sql, DatabaseError
from invenio.errorlib import register_exception
Expand Down Expand Up @@ -123,7 +124,6 @@
CFG_BIBDOCFILE_ADDITIONAL_KNOWN_MIMETYPES, \
CFG_BIBDOCFILE_PREFERRED_MIMETYPES_MAPPING, \
CFG_BIBCATALOG_SYSTEM, \
CFG_ELASTICSEARCH_LOGGING, \
CFG_ELASTICSEARCH_BOT_AGENT_STRINGS
from invenio.bibcatalog import BIBCATALOG_SYSTEM
from invenio.bibdocfile_config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE, \
Expand All @@ -132,10 +132,6 @@

import invenio.template

if CFG_ELASTICSEARCH_LOGGING:
import logging

_DOWNLOAD_LOG = logging.getLogger('events.downloads')

def _plugin_bldr(dummy, plugin_code):
"""Preparing the plugin dictionary structure"""
Expand Down Expand Up @@ -2983,29 +2979,27 @@ def register_download(self, ip_address, version, docformat, user_agent,
docformat = docformat.upper()
if not version:
version = self.get_latest_version()
if CFG_ELASTICSEARCH_LOGGING:
log_entry = {
'id_bibrec': recid,
'id_bibdoc': self.id,
'file_version': version,
'file_format': docformat,
'id_user': userid,
'client_host': ip_address,
'user_agent': user_agent
}
if user_agent is not None:

try:
from invenio.webstat import register_customevent
## register event in webstat
download_register_event = [
recid, self.id, version, docformat, userid, ip_address,
user_agent
]
is_bot = False
if user_agent:
for bot in CFG_ELASTICSEARCH_BOT_AGENT_STRINGS:
if bot in user_agent:
log_entry['bot'] = True
is_bot = True
break
_DOWNLOAD_LOG.info(log_entry)
else:
return run_sql("INSERT INTO rnkDOWNLOADS "
"(id_bibrec,id_bibdoc,file_version,file_format,"
"id_user,client_host,download_time) VALUES "
"(%s,%s,%s,%s,%s,INET_ATON(%s),NOW())",
(recid, self.id, version, docformat,
userid, ip_address,))
download_register_event.append(is_bot)
register_customevent("downloads", download_register_event)
except:
register_exception(
("Do the webstat tables exists? Try with 'webstatadmin"
" --load-config'")
)

def get_incoming_relations(self, rel_type=None):
"""Return all relations in which this BibDoc appears on target position
Expand Down
48 changes: 23 additions & 25 deletions modules/bibrank/lib/bibrank_downloads_similarity.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011, 2012 CERN.
# Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011, 2012, 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -20,19 +20,15 @@
__revision__ = \
"$Id$"

from invenio.config import \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_CERN_SITE, \
CFG_ELASTICSEARCH_LOGGING, \
CFG_ELASTICSEARCH_BOT_AGENT_STRINGS
from invenio.config import (
CFG_ACCESS_CONTROL_LEVEL_SITE, CFG_CERN_SITE,
CFG_ELASTICSEARCH_BOT_AGENT_STRINGS
)
from invenio.dbquery import run_sql
from invenio.bibrank_downloads_indexer import database_tuples_to_single_list
from invenio.search_engine_utils import get_fieldvalues
from invenio.errorlib import register_exception

if CFG_ELASTICSEARCH_LOGGING:
import logging

_PAGEVIEW_LOG = logging.getLogger('events.pageviews')

def record_exists(recID):
"""Return 1 if record RECID exists.
Expand Down Expand Up @@ -62,24 +58,26 @@ def register_page_view_event(recid, uid, client_ip_address, user_agent):
# do not register access if we are in read-only access control
# site mode:
return []
if CFG_ELASTICSEARCH_LOGGING:
log_event = {
'id_bibrec': recid,
'id_user': uid,
'client_host': client_ip_address,
'user_agent': user_agent
}
if user_agent is not None:

# register event in webstat
try:
from invenio.webstat import register_customevent
pageviews_register_event = [
recid, uid, client_ip_address, user_agent
]
is_bot = False
if user_agent:
for bot in CFG_ELASTICSEARCH_BOT_AGENT_STRINGS:
if bot in user_agent:
log_event['bot'] = True
is_bot = True
break
_PAGEVIEW_LOG.info(log_event)
else:
return run_sql("INSERT INTO rnkPAGEVIEWS " \
" (id_bibrec,id_user,client_host,view_time) " \
" VALUES (%s,%s,INET_ATON(%s),NOW())", \
(recid, uid, client_ip_address))
pageviews_register_event.append(is_bot)
register_customevent("pageviews", pageviews_register_event)
except:
register_exception(
("Do the webstat tables exists? Try with 'webstatadmin"
" --load-config'")
)

def calculate_reading_similarity_list(recid, type="pageviews"):
"""Calculate reading similarity data to use in reading similarity
Expand Down
66 changes: 66 additions & 0 deletions modules/miscutil/lib/upgrades/invenio_2015_08_24_custom_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""Add downloads and pageviews as custom events."""

from invenio.webstat import create_customevent

depends_on = ['invenio_release_1_3_0']


def info():
"""Return upgrade recipe information."""
return "Adds downloads and pageviews as custom events."


def do_upgrade():
"""Carry out the upgrade."""
# create the downloads
create_customevent(
event_id="downloads",
name="downloads",
cols=[
"id_bibrec", "id_bibdoc", "file_version", "file_format",
"id_user", "client_host", "user_agent", "bot"
]
)
# create the pageviews
create_customevent(
event_id="pageviews",
name="pageviews",
cols=[
"id_bibrec", "id_user", "client_host", "user_agent", "bot"
]
)
return 1


def estimate():
"""Estimate running time of upgrade in seconds (optional)."""
return 1


def pre_upgrade():
"""Pre-upgrade checks."""
pass # because slashes would still work


def post_upgrade():
"""Post-upgrade checks."""
pass
92 changes: 64 additions & 28 deletions modules/webstat/etc/webstat.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
[general]
visitors_box = True
search_box = True
record_box = True
bibsched_box = True
record_box = False
bibsched_box = False
basket_box = True
alert_box = True
loan_box = True
apache_box = True
uptime_box = True
waiting_box = True
loan_box = True
waiting_box = False
max_ingestion_health = 25

[webstat_custom_event_1]
Expand All @@ -27,43 +27,79 @@ param2 = alert
param3 = user

[webstat_custom_event_3]
name = journals
param1 = action
param2 = journal_name
param3 = issue_number
param4 = category
param5 = language
param6 = articleid
name = media_download
param1 = file
param2 = type
param3 = format
param4 = ip

[webstat_custom_event_4]
name = ejournal
param1 = publication
param2 = volume
param3 = year
param4 = page
param5 = source
param6 = ip

[webstat_custom_event_5]
name = media_view
param1 = file
param2 = type
param3 = format
param4 = ip
param5 = external
param6 = recid

[webstat_custom_event_6]
name = websubmissions
param1 = doctype

[webstat_custom_event_5]
[webstat_custom_event_7]
name = loanrequest
param1 = request_id
param2 = loan_id

[webstat_custom_event_6]
name = login
param1 = IP
param2 = UID
param3 = email
[webstat_custom_event_8]
name = journals
param1 = action
param2 = journal_name
param3 = issue_number
param4 = category
param5 = language
param6 = articleid

[webstat_custom_event_7]
name = apikeyusage
[webstat_custom_event_9]
name = ebidding_opening
param1 = user_id
param2 = key_id
param3 = path
param4 = query
param2 = etendering_ref
param3 = action

[webstat_custom_event_10]
name = downloads
param1 = id_bibrec
param2 = id_bibdoc
param3 = file_version
param4 = file_format
param5 = id_user
param6 = client_host
param7 = user_agent
param7 = bot

[webstat_custom_event_11]
name = pageviews
param1 = id_bibrec
param2 = id_user
param3 = client_host
param4 = user_agent
param5 = bot

[apache_log_analyzer]
profile = nil
nb-histogram-items-to-print = 20
exclude-ip-list = ("137.138.249.162" "137.138.246.86")
home-collection = "Atlantis Institute of Fictive Science"
search-interface-url = "/collection/"
search-interface-url-old-style = "/?"
exclude-ip-list = '("137.138.198.205")
home-collection = "CERN Document Server"
search-interface-url = "/?"
detailed-record-url = "/record/"
search-engine-url = "/search?"
search-engine-url-old-style = "/search.py?"
Expand All @@ -72,5 +108,5 @@ add-to-basket-url = "/yourbaskets/add"
display-basket-url = "/yourbaskets/display"
display-public-basket-url = "/yourbaskets/display_public"
alert-url = "/youralerts/"
display-your-alerts-url = "/youralerts/display"
display-your-searches-url = "/yoursearches/display"
display-your-alerts-url = "/youralerts/list"
display-your-searches-url = "/youralerts/display"
Loading

0 comments on commit d424e56

Please sign in to comment.