Skip to content

Commit 09bb51d

Browse files
committed
migrate usage reports independent of each other
1 parent 7b1fee9 commit 09bb51d

2 files changed

Lines changed: 15 additions & 34 deletions

File tree

osf/management/commands/migrate_osfmetrics_6to8.py

Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -162,28 +162,22 @@ def migrate_usage_reports(osfid: str, until_when: str):
162162
_item_is_component = is_osf_component(_osfobj) if _osfobj else False
163163

164164
def _each_new():
165-
# go in sorted order to build cumulative counts
166-
# (only a few dozen of these per item; should be fine to sort and load all at once)
167165
_each_hit = _es6_scan_range(
168166
es6_reports.PublicItemUsageReport,
169167
until_when=until_when,
170168
addl_filter={'terms': {'item_osfid': _synonymous_osfids(osfid)}},
171-
sort='report_yearmonth',
172169
)
170+
# (only a few dozen of these per item; should be fine to load all at once)
173171
_hits = list(_each_hit)
174172
if _osfobj and not _hits:
175173
# this item has usages, but only before the monthly usage reparts started
176174
# -- create one for cumulative counts (if the object still exists)
177175
yield _backfill_old_usage_report(_osfobj, _item_is_component, until_when)
178176
else:
179-
_prior_report = None
180177
for _hit in _hits:
181-
yield (
182-
_prior_report := _convert_public_usage_report(
183-
_hit['_source'],
184-
_prior_report,
185-
item_is_component=_item_is_component,
186-
)
178+
yield _convert_public_usage_report(
179+
_hit['_source'],
180+
item_is_component=_item_is_component,
187181
)
188182

189183
_es8_bulk_save(es8_metrics.MonthlyPublicItemUsageReportEs8, _each_new())
@@ -402,26 +396,13 @@ def _convert_preprint_metric(
402396

403397
def _convert_public_usage_report(
404398
source: dict,
405-
prior_report: es8_metrics.MonthlyPublicItemUsageReportEs8 | None,
406399
item_is_component: bool,
407400
) -> es8_metrics.MonthlyPublicItemUsageReportEs8:
408-
if prior_report is None:
409-
_c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
410-
osfid=source['item_osfid'],
411-
until_when=YearMonth.from_str(source['report_yearmonth']).month_end(),
412-
is_preprint=('preprint' in source.get('item_type', ())),
413-
)
414-
else:
415-
_c_views = prior_report.cumulative_view_count + source.get('view_count', 0)
416-
_c_view_sess = prior_report.cumulative_view_session_count + (
417-
source.get('view_session_count', 0) or source.get('view_count', 0)
418-
)
419-
_c_downloads = prior_report.cumulative_download_count + source.get(
420-
'download_count', 0
421-
)
422-
_c_download_sess = prior_report.cumulative_download_session_count + (
423-
source.get('download_session_count', 0) or source.get('download_count')
424-
)
401+
_c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
402+
osfid=source['item_osfid'],
403+
until_when=YearMonth.from_str(source['report_yearmonth']).month_end(),
404+
is_preprint=('preprint' in source.get('item_type', ())),
405+
)
425406
return es8_metrics.MonthlyPublicItemUsageReportEs8(
426407
cycle_coverage=_semverish_from_yearmonth(source['report_yearmonth']),
427408
item_iri=osfid_iri(source['item_osfid']),
@@ -437,11 +418,11 @@ def _convert_public_usage_report(
437418
provider_ids=source.get('provider_id'),
438419
platform_iris=source.get('platform_iri') or [website_settings.DOMAIN],
439420
view_count=source.get('view_count', 0),
440-
view_session_count=source.get('view_session_count', 0),
421+
view_session_count=source.get('view_session_count') or source.get('view_count', 0),
441422
cumulative_view_count=_c_views,
442423
cumulative_view_session_count=_c_view_sess or _c_views,
443424
download_count=source.get('download_count', 0),
444-
download_session_count=source.get('download_session_count', 0),
425+
download_session_count=source.get('download_session_count') or source.get('download_count', 0),
445426
cumulative_download_count=_c_downloads,
446427
cumulative_download_session_count=_c_download_sess or _c_downloads,
447428
)
@@ -552,7 +533,7 @@ def _cumulative_countedusage_downloads(osfid, until_when) -> tuple[int, int]:
552533

553534

554535
def _cumulative_preprint_count(preprint_metric_cls, osfid: str, until_when: str) -> int:
555-
'''aggregate views on each preprint'''
536+
'''aggregate counts on given preprint'''
556537
# copied/adapted from osf.metrics.preprint_metrics
557538
_search = (
558539
preprint_metric_cls.search()
@@ -562,12 +543,11 @@ def _cumulative_preprint_count(preprint_metric_cls, osfid: str, until_when: str)
562543
)
563544
_search.aggs.metric('agg_count', 'sum', field='count')
564545
_response = _search.execute()
565-
_view_count = (
546+
return (
566547
int(_response.aggregations.agg_count.value)
567548
if hasattr(_response.aggregations, 'agg_count')
568549
else 0
569550
)
570-
return _view_count
571551

572552

573553
def _synonymous_osfids(osfid: str) -> list[str]:

osf/metrics/reporters/public_item_usage.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from osf.metrics.counted_usage import (
1515
CountedAuthUsage,
1616
get_provider_id,
17+
get_item_type as get_legacy_item_type,
1718
)
1819
from osf.metrics.preprint_metrics import (
1920
PreprintDownload,
@@ -80,7 +81,7 @@ def report(self, **report_kwargs):
8081
raise _SkipItem
8182
_report_es6 = PublicItemUsageReport(
8283
item_osfid=_report.item_osfids[0],
83-
item_type=list(_report.item_types),
84+
item_type=[get_legacy_item_type(_obj)],
8485
provider_id=list(_report.provider_ids),
8586
platform_iri=list(_report.platform_iris),
8687
view_count=_report.view_count,

0 commit comments

Comments
 (0)