Skip to content
Snippets Groups Projects
Commit a64c7b37 authored by E. Madison Bray's avatar E. Madison Bray
Browse files

[enhancement] add an option for recommendation mode for the baseline recsystem

now in addition to random recommendations it can support recommendation based
on a simple popularity metric; this opens the possibility of adding still more
recommendation modes as well
parent 8520b443
No related branches found
No related tags found
No related merge requests found
......@@ -43,6 +43,9 @@ sys.excepthook = lambda *exc_info: log.exception(
articles = None
"""Articles cache; initialized in `initialize`."""
recommendation_mode = 'random'
"""Recommendation mode, either 'random' or 'popular'; defaults to 'random'."""
async def initialize(api_base_uri, token):
"""Start-up tasks to perform before starting the main client loop."""
......@@ -73,6 +76,19 @@ async def new_article(article):
articles.push(article)
def popularity(article):
"""
Returns a measure of an article's popularity.
The formula is ``max(clicks, 1) * max(likes - dislikes, 1)``.
You could replace this with a more sophisticated measure of popularity.
"""
m = article['metrics']
return max(1, m['clicks']) * max(1, (m['likes'] - m['dislikes']))
@method
async def recommend(user_id, limit=RECOMMEND_DEFAULT_LIMIT, since_id=None,
max_id=None):
......@@ -82,15 +98,26 @@ async def recommend(user_id, limit=RECOMMEND_DEFAULT_LIMIT, since_id=None,
# of up to limit articles from the given range.
if since_id is None:
# If no since_id is given (i.e. we are being asked for the most recent
# articles, just take the top `limit * 2` articles and then take a
# random selection from them
# articles), just take the top `limit * 2` articles
start = -2 * limit
else:
start = since_id + 1
end = max_id
selection = articles[start:end]
if recommendation_mode == 'popular':
# TODO: This uses the article metrics that the recsystem already has
# retrieved, and may be out of date. Recsystems need to handle article
# interaction events from the event stream to keep up-to-date article
# metrics.
selection = sorted(selection, key=popularity)[-limit:]
selection = [a['article_id'] for a in selection]
return sorted(selection, reverse=True)
else:
# recommendation_mode == 'random'
limit = min(limit, len(selection))
sample = sorted(random.sample(range(len(selection)), limit), reverse=True)
sample = sorted(random.sample(range(len(selection)), limit),
reverse=True)
return [selection[idx]['article_id'] for idx in sample]
......@@ -316,11 +343,19 @@ class FileOrToken(click.File):
@click.option('-t', '--token', required=True, type=FileOrToken(),
help='authentication token for the recsystem; if a valid '
'filename is given the token is read from a file instead')
@click.option('-m', '--mode', type=click.Choice(['random', 'popular']),
default='random',
help='the recommendation mode: random simply returns a random '
'selection of articles, whereas popular returns the most '
'popular (in terms of rating and clicks) articles in the '
'requested range')
@click.option('--log-level', default='INFO',
type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR'],
case_sensitive=False),
help='minimum log level to output')
def main(api_base_uri, token, log_level):
def main(api_base_uri, token, mode, log_level):
global recommendation_mode
recommendation_mode = mode
logging.basicConfig(level=log_level)
log.setLevel(log_level)
coloredlogs.install(level=log_level, logger=log)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment