Skip to content

patrickzerrer/How-to-work-with-Android-App-Logging-Data

Repository files navigation

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>

<meta charset="utf-8">
<meta name="generator" content="quarto-1.7.32">

<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">

<meta name="author" content="Patrick Zerrer">
<meta name="author" content="Mareike Wieland">
<meta name="author" content="Charlotte de Alwis">

<title>How to work with Android App Logging Data</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
  width: 0.8em;
  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see quarto-dev/quarto-cli#4556 */ 
  vertical-align: middle;
}
/* CSS for syntax highlighting */
html { -webkit-text-size-adjust: 100%; }
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
  }
pre.numberSource { margin-left: 3em;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
/* CSS for citations */
div.csl-bib-body { }
div.csl-entry {
  clear: both;
  margin-bottom: 0em;
}
.hanging-indent div.csl-entry {
  margin-left:2em;
  text-indent:-2em;
}
div.csl-left-margin {
  min-width:2em;
  float:left;
}
div.csl-right-inline {
  margin-left:2em;
  padding-left:1em;
}
div.csl-indent {
  margin-left: 2em;
}</style>


<script src="readme_files/libs/clipboard/clipboard.min.js"></script>
<script src="readme_files/libs/quarto-html/quarto.js" type="module"></script>
<script src="readme_files/libs/quarto-html/tabsets/tabsets.js" type="module"></script>
<script src="readme_files/libs/quarto-html/popper.min.js"></script>
<script src="readme_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="readme_files/libs/quarto-html/anchor.min.js"></script>
<link href="readme_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="readme_files/libs/quarto-html/quarto-syntax-highlighting-37eea08aefeeee20ff55810ff984fec1.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="readme_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="readme_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="readme_files/libs/bootstrap/bootstrap-bb462d781dde1847d9e3ccf7736099dd.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
<style>html{ scroll-behavior: smooth; }</style>


</head>

<body class="quarto-light">

<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
  <nav id="TOC" role="doc-toc" class="toc-active">
    <h2 id="toc-title">Table of contents</h2>
   
  <ul>
  <li><a href="#course-overview" id="toc-course-overview" class="nav-link active" data-scroll-target="#course-overview"><span class="header-section-number">1</span> Course Overview</a>
  <ul class="collapse">
  <li><a href="#learning-objectives" id="toc-learning-objectives" class="nav-link" data-scroll-target="#learning-objectives"><span class="header-section-number">1.1</span> Learning Objectives</a></li>
  <li><a href="#target-audience" id="toc-target-audience" class="nav-link" data-scroll-target="#target-audience"><span class="header-section-number">1.2</span> Target audience</a></li>
  <li><a href="#setting-up-the-computational-environment" id="toc-setting-up-the-computational-environment" class="nav-link" data-scroll-target="#setting-up-the-computational-environment"><span class="header-section-number">1.3</span> Setting up the computational environment</a></li>
  <li><a href="#duration" id="toc-duration" class="nav-link" data-scroll-target="#duration"><span class="header-section-number">1.4</span> Duration</a></li>
  </ul></li>
  <li><a href="#social-science-usecases" id="toc-social-science-usecases" class="nav-link" data-scroll-target="#social-science-usecases"><span class="header-section-number">2</span> Social Science Usecases</a></li>
  <li><a href="#some-context-about-android-app-log-data" id="toc-some-context-about-android-app-log-data" class="nav-link" data-scroll-target="#some-context-about-android-app-log-data"><span class="header-section-number">3</span> Some Context about Android App Log Data</a></li>
  <li><a href="#the-red-or-blue-pill" id="toc-the-red-or-blue-pill" class="nav-link" data-scroll-target="#the-red-or-blue-pill"><span class="header-section-number">4</span> The red or blue pill?</a></li>
  <li><a href="#the-blue-data-set" id="toc-the-blue-data-set" class="nav-link" data-scroll-target="#the-blue-data-set"><span class="header-section-number">5</span> The blue data set</a>
  <ul class="collapse">
  <li><a href="#common-dimensions-of-analysis" id="toc-common-dimensions-of-analysis" class="nav-link" data-scroll-target="#common-dimensions-of-analysis"><span class="header-section-number">5.1</span> Common Dimensions of Analysis</a></li>
  <li><a href="#preprocessing-the-blue-data-set" id="toc-preprocessing-the-blue-data-set" class="nav-link" data-scroll-target="#preprocessing-the-blue-data-set"><span class="header-section-number">5.2</span> Preprocessing the Blue Data Set</a></li>
  <li><a href="#calculating-visits" id="toc-calculating-visits" class="nav-link" data-scroll-target="#calculating-visits"><span class="header-section-number">5.3</span> Calculating Visits</a></li>
  <li><a href="#calculating-duration" id="toc-calculating-duration" class="nav-link" data-scroll-target="#calculating-duration"><span class="header-section-number">5.4</span> Calculating Duration</a></li>
  <li><a href="#mobile-behavior-between-participants" id="toc-mobile-behavior-between-participants" class="nav-link" data-scroll-target="#mobile-behavior-between-participants"><span class="header-section-number">5.5</span> Mobile Behavior between participants</a></li>
  <li><a href="#mobile-behavior-over-time" id="toc-mobile-behavior-over-time" class="nav-link" data-scroll-target="#mobile-behavior-over-time"><span class="header-section-number">5.6</span> Mobile Behavior over time</a></li>
  </ul></li>
  <li><a href="#the-red-data-set" id="toc-the-red-data-set" class="nav-link" data-scroll-target="#the-red-data-set"><span class="header-section-number">6</span> The red data set</a>
  <ul class="collapse">
  <li><a href="#calculating-visits-1" id="toc-calculating-visits-1" class="nav-link" data-scroll-target="#calculating-visits-1"><span class="header-section-number">6.1</span> Calculating Visits</a></li>
  <li><a href="#calculating-the-number-of-shares-per-app" id="toc-calculating-the-number-of-shares-per-app" class="nav-link" data-scroll-target="#calculating-the-number-of-shares-per-app"><span class="header-section-number">6.2</span> Calculating the number of shares per app</a></li>
  <li><a href="#calculating-the-number-of-notifications-per-app" id="toc-calculating-the-number-of-notifications-per-app" class="nav-link" data-scroll-target="#calculating-the-number-of-notifications-per-app"><span class="header-section-number">6.3</span> Calculating the number of notifications per app</a></li>
  </ul></li>
  <li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion"><span class="header-section-number">7</span> Conclusion</a></li>
  <li><a href="#references" id="toc-references" class="nav-link" data-scroll-target="#references"><span class="header-section-number">8</span> References</a></li>
  </ul>
</nav>
</div>
<main class="content" id="quarto-document-content">

<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">How to work with Android App Logging Data</h1>
</div>


<div class="quarto-title-meta-author">
  <div class="quarto-title-meta-heading">Authors</div>
  <div class="quarto-title-meta-heading">Affiliations</div>
  
    <div class="quarto-title-meta-contents">
    <p class="author">Patrick Zerrer <a href="mailto:pzerrer@uni-bremen.de" class="quarto-title-author-email"><i class="bi bi-envelope"></i></a> <a href="https://orcid.org/0000-0002-8827-1336" class="quarto-title-author-orcid"> <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAA2ZpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMC1jMDYwIDYxLjEzNDc3NywgMjAxMC8wMi8xMi0xNzozMjowMCAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iIHhtbG5zOnN0UmVmPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvc1R5cGUvUmVzb3VyY2VSZWYjIiB4bWxuczp4bXA9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC8iIHhtcE1NOk9yaWdpbmFsRG9jdW1lbnRJRD0ieG1wLmRpZDo1N0NEMjA4MDI1MjA2ODExOTk0QzkzNTEzRjZEQTg1NyIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDozM0NDOEJGNEZGNTcxMUUxODdBOEVCODg2RjdCQ0QwOSIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDozM0NDOEJGM0ZGNTcxMUUxODdBOEVCODg2RjdCQ0QwOSIgeG1wOkNyZWF0b3JUb29sPSJBZG9iZSBQaG90b3Nob3AgQ1M1IE1hY2ludG9zaCI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjppbnN0YW5jZUlEPSJ4bXAuaWlkOkZDN0YxMTc0MDcyMDY4MTE5NUZFRDc5MUM2MUUwNEREIiBzdFJlZjpkb2N1bWVudElEPSJ4bXAuZGlkOjU3Q0QyMDgwMjUyMDY4MTE5OTRDOTM1MTNGNkRBODU3Ii8+IDwvcmRmOkRlc2NyaXB0aW9uPiA8L3JkZjpSREY+IDwveDp4bXBtZXRhPiA8P3hwYWNrZXQgZW5kPSJyIj8+84NovQAAAR1JREFUeNpiZEADy85ZJgCpeCB2QJM6AMQLo4yOL0AWZETSqACk1gOxAQN+cAGIA4EGPQBxmJA0nwdpjjQ8xqArmczw5tMHXAaALDgP1QMxAGqzAAPxQACqh4ER6uf5MBlkm0X4EGayMfMw/Pr7Bd2gRBZogMFBrv01hisv5jLsv9nLAPIOMnjy8RDDyYctyAbFM2EJbRQw+aAWw/LzVgx7b+cwCHKqMhjJFCBLOzAR6+lXX84xnHjYyqAo5IUizkRCwIENQQckGSDGY4TVgAPEaraQr2a4/24bSuoExcJCfAEJihXkWDj3ZAKy9EJGaEo8T0QSxkjSwORsCAuDQCD+QILmD1A9kECEZgxDaEZhICIzGcIyEyOl2RkgwAAhkmC+eAm0TAAAAABJRU5ErkJggg=="></a></p>
  </div>
  <div class="quarto-title-meta-contents">
        <p class="affiliation">
            University of Bremen - ZeMKI
          </p>
      </div>
    <div class="quarto-title-meta-contents">
    <p class="author">Mareike Wieland <a href="mailto:mareike.wieland@gesis.org" class="quarto-title-author-email"><i class="bi bi-envelope"></i></a> <a href="https://orcid.org/0000-0002-3269-2318" class="quarto-title-author-orcid"> <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAA2ZpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMC1jMDYwIDYxLjEzNDc3NywgMjAxMC8wMi8xMi0xNzozMjowMCAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iIHhtbG5zOnN0UmVmPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvc1R5cGUvUmVzb3VyY2VSZWYjIiB4bWxuczp4bXA9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC8iIHhtcE1NOk9yaWdpbmFsRG9jdW1lbnRJRD0ieG1wLmRpZDo1N0NEMjA4MDI1MjA2ODExOTk0QzkzNTEzRjZEQTg1NyIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDozM0NDOEJGNEZGNTcxMUUxODdBOEVCODg2RjdCQ0QwOSIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDozM0NDOEJGM0ZGNTcxMUUxODdBOEVCODg2RjdCQ0QwOSIgeG1wOkNyZWF0b3JUb29sPSJBZG9iZSBQaG90b3Nob3AgQ1M1IE1hY2ludG9zaCI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjppbnN0YW5jZUlEPSJ4bXAuaWlkOkZDN0YxMTc0MDcyMDY4MTE5NUZFRDc5MUM2MUUwNEREIiBzdFJlZjpkb2N1bWVudElEPSJ4bXAuZGlkOjU3Q0QyMDgwMjUyMDY4MTE5OTRDOTM1MTNGNkRBODU3Ii8+IDwvcmRmOkRlc2NyaXB0aW9uPiA8L3JkZjpSREY+IDwveDp4bXBtZXRhPiA8P3hwYWNrZXQgZW5kPSJyIj8+84NovQAAAR1JREFUeNpiZEADy85ZJgCpeCB2QJM6AMQLo4yOL0AWZETSqACk1gOxAQN+cAGIA4EGPQBxmJA0nwdpjjQ8xqArmczw5tMHXAaALDgP1QMxAGqzAAPxQACqh4ER6uf5MBlkm0X4EGayMfMw/Pr7Bd2gRBZogMFBrv01hisv5jLsv9nLAPIOMnjy8RDDyYctyAbFM2EJbRQw+aAWw/LzVgx7b+cwCHKqMhjJFCBLOzAR6+lXX84xnHjYyqAo5IUizkRCwIENQQckGSDGY4TVgAPEaraQr2a4/24bSuoExcJCfAEJihXkWDj3ZAKy9EJGaEo8T0QSxkjSwORsCAuDQCD+QILmD1A9kECEZgxDaEZhICIzGcIyEyOl2RkgwAAhkmC+eAm0TAAAAABJRU5ErkJggg=="></a></p>
  </div>
  <div class="quarto-title-meta-contents">
        <p class="affiliation">
            GESIS Leibniz Institute for the Social Sciences
          </p>
      </div>
    <div class="quarto-title-meta-contents">
    <p class="author">Charlotte de Alwis <a href="mailto:charlotte.dealwis@gesis.org" class="quarto-title-author-email"><i class="bi bi-envelope"></i></a> <a href="https://orcid.org/0009-0004-8903-7500" class="quarto-title-author-orcid"> <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAA2ZpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMC1jMDYwIDYxLjEzNDc3NywgMjAxMC8wMi8xMi0xNzozMjowMCAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iIHhtbG5zOnN0UmVmPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvc1R5cGUvUmVzb3VyY2VSZWYjIiB4bWxuczp4bXA9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC8iIHhtcE1NOk9yaWdpbmFsRG9jdW1lbnRJRD0ieG1wLmRpZDo1N0NEMjA4MDI1MjA2ODExOTk0QzkzNTEzRjZEQTg1NyIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDozM0NDOEJGNEZGNTcxMUUxODdBOEVCODg2RjdCQ0QwOSIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDozM0NDOEJGM0ZGNTcxMUUxODdBOEVCODg2RjdCQ0QwOSIgeG1wOkNyZWF0b3JUb29sPSJBZG9iZSBQaG90b3Nob3AgQ1M1IE1hY2ludG9zaCI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjppbnN0YW5jZUlEPSJ4bXAuaWlkOkZDN0YxMTc0MDcyMDY4MTE5NUZFRDc5MUM2MUUwNEREIiBzdFJlZjpkb2N1bWVudElEPSJ4bXAuZGlkOjU3Q0QyMDgwMjUyMDY4MTE5OTRDOTM1MTNGNkRBODU3Ii8+IDwvcmRmOkRlc2NyaXB0aW9uPiA8L3JkZjpSREY+IDwveDp4bXBtZXRhPiA8P3hwYWNrZXQgZW5kPSJyIj8+84NovQAAAR1JREFUeNpiZEADy85ZJgCpeCB2QJM6AMQLo4yOL0AWZETSqACk1gOxAQN+cAGIA4EGPQBxmJA0nwdpjjQ8xqArmczw5tMHXAaALDgP1QMxAGqzAAPxQACqh4ER6uf5MBlkm0X4EGayMfMw/Pr7Bd2gRBZogMFBrv01hisv5jLsv9nLAPIOMnjy8RDDyYctyAbFM2EJbRQw+aAWw/LzVgx7b+cwCHKqMhjJFCBLOzAR6+lXX84xnHjYyqAo5IUizkRCwIENQQckGSDGY4TVgAPEaraQr2a4/24bSuoExcJCfAEJihXkWDj3ZAKy9EJGaEo8T0QSxkjSwORsCAuDQCD+QILmD1A9kECEZgxDaEZhICIzGcIyEyOl2RkgwAAhkmC+eAm0TAAAAABJRU5ErkJggg=="></a></p>
  </div>
  <div class="quarto-title-meta-contents">
        <p class="affiliation">
            GESIS Leibniz Institute for the Social Sciences
          </p>
      </div>
  </div>

<div class="quarto-title-meta">

      
  
    
  </div>
  


</header>


<section id="course-overview" class="level1" data-number="1">
<h1 data-number="1"><span class="header-section-number">1</span> Course Overview</h1>
<section id="learning-objectives" class="level2" data-number="1.1">
<h2 data-number="1.1" class="anchored" data-anchor-id="learning-objectives"><span class="header-section-number">1.1</span> Learning Objectives</h2>
<p>This guide aims to provide learners with the conceptual understanding and practical skills needed to import, clean, transform, and analyse Android log data using R and the tidyverse ecosystem. By the end of the module, learners should be able to transform raw device logs into meaningful analytical data sets suitable for behavioural, usability, or digital phenotyping studies.</p>
<p>This includes:</p>
<ol type="1">
<li>Understanding the structure of Android log data, including key variables commonly found in app logs.</li>
<li>Importing and preprocessing both already processed and raw Android log data using the tidyverse.</li>
<li>Computing and visualizing key usage measures such as visits, session durations and sequences.</li>
</ol>
</section>
<section id="target-audience" class="level2" data-number="1.2">
<h2 data-number="1.2" class="anchored" data-anchor-id="target-audience"><span class="header-section-number">1.2</span> Target audience</h2>
<p>This guide is designed for researchers, data analysts, and students who are interested in working with mobile sensing or digital trace data, particularly Android log files. It assumes a basic familiarity with R and the tidyverse ecosystem, including data manipulation with dplyr and data visualization with ggplot2. Participants should already understand fundamental data analysis concepts and be ready to apply them to the challenges of preprocessing and analysing complex, time-based log data.</p>
</section>
<section id="setting-up-the-computational-environment" class="level2" data-number="1.3">
<h2 data-number="1.3" class="anchored" data-anchor-id="setting-up-the-computational-environment"><span class="header-section-number">1.3</span> Setting up the computational environment</h2>
<p>Install the R packages.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">"dplyr"</span>)</span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">"lubridate"</span>)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">"tidyr"</span>)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">"ggplot2"</span>)</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">"extrafont"</span>)</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">"forcats"</span>)</span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">"stringr"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>And load the R packages.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(lubridate)</span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyr)</span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(extrafont)</span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(forcats)</span>
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(stringr)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Effectively disable scientific notation</span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="fu">options</span>(<span class="at">scipen =</span> <span class="dv">999</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="duration" class="level2" data-number="1.4">
<h2 data-number="1.4" class="anchored" data-anchor-id="duration"><span class="header-section-number">1.4</span> Duration</h2>
<p>You’ll be able to complete the module in about half a day.</p>
</section>
</section>
<section id="social-science-usecases" class="level1" data-number="2">
<h1 data-number="2"><span class="header-section-number">2</span> Social Science Usecases</h1>
<p>This method has been used in previous studies to evaluate information usage of Fridays for Future supporters <span class="citation" data-cites="Zerrer2024">(e.g. <a href="#ref-Zerrer2024" role="doc-biblioref">Zerrer, 2024</a>)</span>, research on well-being <span class="citation" data-cites="Klingelhoefer2025">(e.g. <a href="#ref-Klingelhoefer2025" role="doc-biblioref">Klingelhoefer et al., 2025</a>)</span> and smartphone usage patterns <span class="citation" data-cites="Toth2025">(e.g. <a href="#ref-Toth2025" role="doc-biblioref">Toth et al., 2025</a>)</span>.</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="images/image_1_man_with_phone.png" class="img-fluid figure-img"></p>
<figcaption>Image generated by Midjourney</figcaption>
</figure>
</div>
</section>
<section id="some-context-about-android-app-log-data" class="level1" data-number="3">
<h1 data-number="3"><span class="header-section-number">3</span> Some Context about Android App Log Data</h1>
<p>Android app log data are automatically generated records that capture interactions between users, apps, and the operating system on Android devices. System services and applications record events such as app launches, screen on/off states, foreground and background transitions, notifications, and sensor activities. Each event is typically time-stamped and includes the event type, the app or process involved, and contextual metadata such as battery status, network connectivity, or user interactions. These digital traces allow researchers to reconstruct usage episodes in detail and study digital mobile behavior.</p>
</section>
<section id="the-red-or-blue-pill" class="level1" data-number="4">
<h1 data-number="4"><span class="header-section-number">4</span> The red or blue pill?</h1>
<p>Since the degree of preprocessing and thus the structure of app logging data can vary, we will look at two example cases. The first case is based on a data set that has already been preprocessed to a relatively high degree by a panel provider, thus providing a good introduction to some basic concepts in data analysis.</p>
<p>In reference to The Matrix, we call this the “blue” data set. For now, we will remain in the more comfortable world of convenient and rather well-preprocessed data structures. Later in the tutorial, however, we will turn to the “red” data set and confront the messy reality of raw event logs.</p>
</section>
<section id="the-blue-data-set" class="level1" data-number="5">
<h1 data-number="5"><span class="header-section-number">5</span> The blue data set</h1>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="images/image_2_blue_pill_v2.png" class="img-fluid figure-img"></p>
<figcaption>Image generated by Midjourney</figcaption>
</figure>
</div>
<p>Just a few brief notes on the blue data set. The data is based on one week of data collection from a German sample (N = 371) of Android users in 2021. For this tutorial, a smaller sub-sample (N = 50) was taken from the overall data set to ensure that the procedure presented here can be carried out on as wide a range of computers as possible.</p>
<p>Before any analysis can begin, it’s crucial to understand how the data is organized. A typical more processed Android logging data set includes the following variables:</p>
<p><em>panelist_id</em> – unique identifier for each user or device</p>
<p><em>date</em> – the date of the recorded event</p>
<p><em>start_time</em> - a timestamp indicating the start of an application</p>
<p><em>end_time</em> - a timestamp indicating the end of an application</p>
<p><em>duration</em> - the usage duration (in seconds or milliseconds) of the application</p>
<p><em>app_name</em> – readable name of the application</p>
<p><em>app_package</em> – the app’s identifier</p>
<p><strong>Note:</strong> The variable names as well as the structure of your data set can vary based on the level of preprocessing and tracking app used.</p>
<section id="common-dimensions-of-analysis" class="level2" data-number="5.1">
<h2 data-number="5.1" class="anchored" data-anchor-id="common-dimensions-of-analysis"><span class="header-section-number">5.1</span> Common Dimensions of Analysis</h2>
<p>We will use the blue data set to familiarize you with some conceptual decisions. However, in the course of the tutorial, we will use the red data set more to demonstrate the computational steps of data preparation. Nevertheless, these forthcoming conceptual decisions are also relevant for the red data set.</p>
<p>To make sense of app log data, analysts typically view it through several dimensions of analysis:</p>
<p><em>Person-level</em> – patterns and metrics aggregated per individual or group.</p>
<p><em>Temporal</em> – analyses over time (hourly, daily, weekly trends).</p>
<p><em>App-level</em> – comparisons across apps (WhatsApp vs.&nbsp;Telegram) or app categories (Entertainment vs.&nbsp;Shopping).</p>
<p><em>Combined perspectives</em> – mixing dimensions, such as app use over time or per user.</p>
<p>These general dimensions of comparison run through the entire analysis. Accordingly, it is important to consider at the beginning of the analysis what the results should show and on which of the dimensions of comparison they are based.</p>
<p>However, before we go through some examples of analyses, our data must undergo several preprocessing steps, even though it is already very well processed.</p>
</section>
<section id="preprocessing-the-blue-data-set" class="level2" data-number="5.2">
<h2 data-number="5.2" class="anchored" data-anchor-id="preprocessing-the-blue-data-set"><span class="header-section-number">5.2</span> Preprocessing the Blue Data Set</h2>
<p>Raw log data often contains a large amount of noise from background processes and system apps that do not represent active user behavior. In this step, we focus on data cleaning and filtering <span class="citation" data-cites="Zerrer2024">(<a href="#ref-Zerrer2024" role="doc-biblioref">Zerrer, 2024</a>)</span>, including:</p>
<p><em>Removing background apps</em> – exclude system processes or apps running without direct user engagement.</p>
<p><em>Merge consecutive visits to the same app</em> - sometimes apps are interrupted by system apps, which leads to a technical artifact that appears as a new app visit. To prevent these from being misinterpreted as human behavior (e.g., as intentional app access), these cases are removed.</p>
<p><em>Blacklisting apps</em> – recode or generalize apps that are irrelevant to the research question and are potential sensitive (e.g., health apps, banking, etc.). I have prepared a preliminary list of blacklisted apps for this tutorial. You can find the script for this on GitHub (blacklisted_apps.qmd).</p>
<div class="callout callout-style-default callout-note no-icon callout-titled">
<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-1-contents" aria-controls="callout-1" aria-expanded="false" aria-label="Toggle callout">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note
</div>
<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
</div>
<div id="callout-1" class="callout-1-contents callout-collapse collapse">
<div class="callout-body-container callout-body">
<p><strong>Ethics &amp; Privacy</strong></p>
<p>When working with Android app logging data, care should be taken to effectively protect the privacy of participants. Here, personal identifiers in the raw data must be consistently pseudonymized and, if the purpose of the analysis allows, completely anonymized in order to rule out the re-identification of individual users. Furthermore, strict blacklisting of sensitive parameters or, preferably, whitelisting of only necessary events must be used to ensure that no unintended private content is logged. Finally, the data sets should be aggregated into statistical groups in order to obscure individual usage behavior and focus on overarching trends. This is a very important but also extensive topic, which we cannot cover sufficiently in this tutorial, but we recommend the following literature:</p>
<p><em>Spiekermann, S., &amp; Cranor, L. F. (2008). Engineering privacy. IEEE Transactions on software engineering, 35(1), 67-82.</em></p>
<p><em>Breuer, J., Bishop, L., &amp; Kinder-Kurlanda, K. (2020). The practical and ethical challenges in acquiring and sharing digital trace data: Negotiating public-private partnerships. New Media &amp; Society, 22(11), 2058-2080.</em></p>
</div>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>background_apps <span class="ot">=</span> <span class="fu">read.csv</span>(<span class="st">"data/background_system_packages.csv"</span>) <span class="co"># based on Parry &amp; Toth (2025) with some smaller extensions</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>blacklisted_apps <span class="ot">=</span> <span class="fu">readRDS</span>(<span class="st">"data/blacklisted_apps.rds"</span>) <span class="sc">%&gt;%</span> <span class="fu">select</span>(<span class="sc">-</span>app_package) <span class="sc">%&gt;%</span> <span class="fu">distinct</span>(app_name, <span class="at">.keep_all =</span> <span class="cn">TRUE</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Let’s import our data set.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>blue_data <span class="ot">=</span> <span class="fu">readRDS</span>(<span class="st">"data/blue_data.rds"</span>) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>And have a quick look.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">glimpse</span>(blue_data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 121,959
Columns: 7
$ panelist_id &lt;chr&gt; "participant_33", "participant_33", "participant_33", "par…
$ date        &lt;date&gt; 2021-03-04, 2021-03-04, 2021-03-04, 2021-03-04, 2021-03-0…
$ app_package &lt;chr&gt; "com.android.chrome", "com.teslacoilsw.launcher", "com.goo…
$ app_name    &lt;chr&gt; "Chrome", "Nova Launcher", "Google", "Google", "Nova Launc…
$ start_time  &lt;chr&gt; "2021-03-04 00:02:31", "2021-03-04 00:03:09", "2021-03-04 …
$ end_time    &lt;chr&gt; "2021-03-04 00:03:08", "2021-03-04 00:03:10", "2021-03-04 …
$ duration    &lt;int&gt; 37, 1, 126, 13, 1, 23, 192, 2, 3, 122, 4, 8, 135, 9, 4, 2,…</code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>blue_data_clean <span class="ot">=</span> blue_data <span class="sc">%&gt;%</span> </span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>  </span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># remove background app logs from our data set</span></span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="sc">!</span>app_package <span class="sc">%in%</span> background_apps<span class="sc">$</span>pcn) <span class="sc">%&gt;%</span> </span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>  </span>
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>  <span class="co"># replace sensitive apps in your data set to strengthen anonymisation</span></span>
<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">left_join</span>(blacklisted_apps, <span class="at">by =</span> <span class="st">"app_name"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb8-9"><a href="#cb8-9" aria-hidden="true" tabindex="-1"></a>    <span class="co"># if the package is on our blacklisted_app list replace the name with a generic label</span></span>
<span id="cb8-10"><a href="#cb8-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">app_name =</span> <span class="fu">if_else</span>(<span class="sc">!</span><span class="fu">is.na</span>(blacklisted_app), blacklisted_app, app_name),</span>
<span id="cb8-11"><a href="#cb8-11" aria-hidden="true" tabindex="-1"></a>    <span class="at">app_package =</span> <span class="fu">if_else</span>(<span class="sc">!</span><span class="fu">is.na</span>(blacklisted_app), <span class="st">"blacklisted_package"</span>, app_package)</span>
<span id="cb8-12"><a href="#cb8-12" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb8-13"><a href="#cb8-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(<span class="sc">-</span>blacklisted_app) <span class="sc">%&gt;%</span> </span>
<span id="cb8-14"><a href="#cb8-14" aria-hidden="true" tabindex="-1"></a>  </span>
<span id="cb8-15"><a href="#cb8-15" aria-hidden="true" tabindex="-1"></a>  <span class="co"># convert start_time and end_time to proper datetime format, make sure to choose the correct timezone (tz)</span></span>
<span id="cb8-16"><a href="#cb8-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb8-17"><a href="#cb8-17" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> <span class="fu">as.POSIXct</span>(start_time, <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb8-18"><a href="#cb8-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">end_time   =</span> <span class="fu">as.POSIXct</span>(end_time,   <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>)</span>
<span id="cb8-19"><a href="#cb8-19" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb8-20"><a href="#cb8-20" aria-hidden="true" tabindex="-1"></a>  </span>
<span id="cb8-21"><a href="#cb8-21" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sort our rows in the correct temporal order per participant</span></span>
<span id="cb8-22"><a href="#cb8-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id) <span class="sc">%&gt;%</span> </span>
<span id="cb8-23"><a href="#cb8-23" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(start_time), <span class="at">.by_group =</span> <span class="cn">TRUE</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb8-24"><a href="#cb8-24" aria-hidden="true" tabindex="-1"></a>  <span class="co"># it is important that you use lag() or lead() based on your sorting (descending or ascending)</span></span>
<span id="cb8-25"><a href="#cb8-25" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb8-26"><a href="#cb8-26" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we use the 'next_panelist_id' variable to ensure that participants are not mixed up by accident.</span></span>
<span id="cb8-27"><a href="#cb8-27" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_panelist_id =</span> <span class="fu">lag</span>(panelist_id, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb8-28"><a href="#cb8-28" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we use the 'next_app_name' to identify potential consecutive app visits</span></span>
<span id="cb8-29"><a href="#cb8-29" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_app_name =</span> <span class="fu">lag</span>(app_name, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb8-30"><a href="#cb8-30" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we need the 'next_start_time' to calculate the time_gap between events</span></span>
<span id="cb8-31"><a href="#cb8-31" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_start_time =</span> <span class="fu">lag</span>(start_time, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb8-32"><a href="#cb8-32" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we calculate the time_gap in seconds</span></span>
<span id="cb8-33"><a href="#cb8-33" aria-hidden="true" tabindex="-1"></a>    <span class="at">time_gap =</span> <span class="fu">as.numeric</span>(next_start_time <span class="sc">-</span> end_time, <span class="at">units =</span> <span class="st">"secs"</span>),</span>
<span id="cb8-34"><a href="#cb8-34" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we use 'previous_start_time' to replace 'start_time' with the new correct timestamp if we merge consecutive app visits</span></span>
<span id="cb8-35"><a href="#cb8-35" aria-hidden="true" tabindex="-1"></a>    <span class="at">previous_start_time =</span> <span class="fu">lead</span>(start_time, <span class="at">n =</span> <span class="dv">1</span>L)</span>
<span id="cb8-36"><a href="#cb8-36" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb8-37"><a href="#cb8-37" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we use a filter variable to identify consecutive app visits, which we can remove later</span></span>
<span id="cb8-38"><a href="#cb8-38" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb8-39"><a href="#cb8-39" aria-hidden="true" tabindex="-1"></a>    <span class="co"># conditions are:</span></span>
<span id="cb8-40"><a href="#cb8-40" aria-hidden="true" tabindex="-1"></a>    <span class="co"># - panelist_id and next_panelist_id are identical</span></span>
<span id="cb8-41"><a href="#cb8-41" aria-hidden="true" tabindex="-1"></a>    <span class="co"># - app_name and next_app_name are identical</span></span>
<span id="cb8-42"><a href="#cb8-42" aria-hidden="true" tabindex="-1"></a>    <span class="co"># - time_gap smaller than 1 sec</span></span>
<span id="cb8-43"><a href="#cb8-43" aria-hidden="true" tabindex="-1"></a>    <span class="at">filter_var =</span> <span class="fu">if_else</span>((panelist_id <span class="sc">==</span> next_panelist_id <span class="sc">&amp;</span> app_name <span class="sc">==</span> next_app_name <span class="sc">&amp;</span> time_gap <span class="sc">&lt;=</span> <span class="dv">1</span>), <span class="st">"remove"</span>, <span class="st">"keep"</span>),</span>
<span id="cb8-44"><a href="#cb8-44" aria-hidden="true" tabindex="-1"></a>    <span class="co"># I like to keep an column indicating that I changed something, therefore I create 'rows_merged'</span></span>
<span id="cb8-45"><a href="#cb8-45" aria-hidden="true" tabindex="-1"></a>    <span class="at">rows_merged =</span> <span class="fu">if_else</span>((<span class="fu">lead</span>(filter_var)) <span class="sc">==</span> <span class="st">"remove"</span>, <span class="st">"Yes"</span>, <span class="st">"No"</span>),</span>
<span id="cb8-46"><a href="#cb8-46" aria-hidden="true" tabindex="-1"></a>    <span class="co"># replace 'start_time' with 'previous_start_time' for relevant rows </span></span>
<span id="cb8-47"><a href="#cb8-47" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> <span class="fu">if_else</span>(filter_var <span class="sc">==</span> <span class="st">"keep"</span> <span class="sc">&amp;</span> rows_merged <span class="sc">==</span> <span class="st">"Yes"</span>, <span class="fu">as_datetime</span>(previous_start_time), <span class="fu">as_datetime</span>(start_time)),</span>
<span id="cb8-48"><a href="#cb8-48" aria-hidden="true" tabindex="-1"></a>    <span class="co"># replace 'duration' with the updated duration for relevant rows</span></span>
<span id="cb8-49"><a href="#cb8-49" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="fu">if_else</span>(filter_var <span class="sc">==</span> <span class="st">"keep"</span> <span class="sc">&amp;</span> rows_merged <span class="sc">==</span> <span class="st">"Yes"</span>, <span class="fu">as.numeric</span>(end_time <span class="sc">-</span> start_time, <span class="at">units =</span> <span class="st">"secs"</span>), duration)</span>
<span id="cb8-50"><a href="#cb8-50" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb8-51"><a href="#cb8-51" aria-hidden="true" tabindex="-1"></a>  <span class="co"># remove consecutive visits</span></span>
<span id="cb8-52"><a href="#cb8-52" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(filter_var <span class="sc">==</span> <span class="st">"keep"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb8-53"><a href="#cb8-53" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Okay, we are done with our preprocessing of the blue data set. Let’s have a quick look.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">glimpse</span>(blue_data_clean)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 51,327
Columns: 14
$ panelist_id         &lt;chr&gt; "participant_1", "participant_1", "participant_1",…
$ date                &lt;date&gt; 2021-03-11, 2021-03-11, 2021-03-11, 2021-03-11, 2…
$ app_package         &lt;chr&gt; "com.sec.android.app.sbrowser", "com.twitter.andro…
$ app_name            &lt;chr&gt; "Samsung Internet", "Twitter", "Samsung Internet",…
$ start_time          &lt;dttm&gt; 2021-03-11 23:34:10, 2021-03-11 23:33:59, 2021-03…
$ end_time            &lt;dttm&gt; 2021-03-11 23:34:13, 2021-03-11 23:34:09, 2021-03…
$ duration            &lt;dbl&gt; 3, 10, 91, 45, 273, 6, 12, 2, 116, 14, 18, 22, 29,…
$ next_panelist_id    &lt;chr&gt; "participant_1", "participant_1", "participant_1",…
$ next_app_name       &lt;chr&gt; "Twitter", "Samsung Internet", "Twitter", "Samsung…
$ next_start_time     &lt;dttm&gt; 2021-03-11 23:34:14, 2021-03-11 23:34:10, 2021-03…
$ time_gap            &lt;dbl&gt; 1, 1, 2, 2, 0, 0, 22, 3706, 6, 3, 4285, 2, 884, 27…
$ previous_start_time &lt;dttm&gt; 2021-03-11 23:34:10, 2021-03-11 23:33:59, 2021-03…
$ filter_var          &lt;chr&gt; "keep", "keep", "keep", "keep", "keep", "keep", "k…
$ rows_merged         &lt;chr&gt; "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "…</code></pre>
</div>
</div>
<div class="callout callout-style-default callout-note no-icon callout-titled">
<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-2-contents" aria-controls="callout-2" aria-expanded="false" aria-label="Toggle callout">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note
</div>
<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
</div>
<div id="callout-2" class="callout-2-contents callout-collapse collapse">
<div class="callout-body-container callout-body">
<p><strong>Threshold for time gaps</strong></p>
<p>We would like to point out here that it is necessary to set a threshold value for the minimum time interval between two identical app events in order to be able to merge them in case of doubt. However, the level of this threshold value has not yet been established and is left to the discretion of the researcher. We would advocate a transparent practice in which the chosen threshold value is reported and briefly explained.</p>
</div>
</div>
</div>
<p>Alright, we are ready for some analysis.</p>
</section>
<section id="calculating-visits" class="level2" data-number="5.3">
<h2 data-number="5.3" class="anchored" data-anchor-id="calculating-visits"><span class="header-section-number">5.3</span> Calculating Visits</h2>
<p>A visit represents a unit of exposure, such as a discrete instance of app use. In other words, every time the app is opened in the foreground, we count it as a visit.</p>
<p>In order to measure visits correctly, it is particularly important to remove consecutive calls beforehand, as otherwise the number of visits will be overestimated.</p>
<p>Furthermore, it is conceivable to set a minimum duration for a call (e.g., an app must be open for at least 5 seconds to count as a visit). However, this depends on the specific research project and the objective of the research question. In our example, we do not set a threshold for the duration of a visit. Establishing a robust visit definition ensures consistent measurement of usage frequency across data sets and users.</p>
<p>Let’s stick to the dimensions we’re using for comparison. We are interested in the top 10 most-visited apps in our sample (dimension = app level).</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>most_visited_apps <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by application</span></span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(app_name) <span class="sc">%&gt;%</span> </span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># summarise the total number of visits for each application</span></span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">visit =</span> <span class="fu">n</span>()</span>
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sort in descending order</span></span>
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(visit)) <span class="sc">%&gt;%</span> </span>
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a>  <span class="co"># select the top 10 rows</span></span>
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">head</span>(<span class="at">n =</span> <span class="dv">10</span>)</span>
<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a>most_visited_apps</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 10 × 2
   app_name     visit
   &lt;chr&gt;        &lt;int&gt;
 1 WhatsApp      8181
 2 Chrome        4685
 3 Facebook      2635
 4 TRACKING APP  2532
 5 Instagram     2236
 6 Gmail         1458
 7 Telefon       1176
 8 Messenger      995
 9 YouTube        877
10 Snapchat       812</code></pre>
</div>
</div>
<p>A table is nice but a plot is better. Let’s visualize our findings using ggplot.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We need the number of participants for our plot</span></span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>blue_n_participants <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>    <span class="at">n_panelist =</span> <span class="fu">n_distinct</span>(panelist_id)</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pull</span>(n_panelist)</span>
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a>plot1 <span class="ot">=</span> <span class="fu">ggplot</span>(most_visited_apps, <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">reorder</span>(app_name, visit), <span class="at">y =</span> visit)) <span class="sc">+</span></span>
<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_col</span>(<span class="at">width =</span> <span class="fl">0.6</span>, <span class="at">fill =</span> <span class="st">"#5E81AC"</span>) <span class="sc">+</span></span>
<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_flip</span>() <span class="sc">+</span></span>
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_text</span>(<span class="fu">aes</span>(<span class="at">label =</span> visit), <span class="at">hjust =</span> <span class="sc">-</span><span class="fl">0.2</span>, <span class="at">size =</span> <span class="dv">3</span>, <span class="at">family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_continuous</span>(<span class="at">expand =</span> <span class="fu">expansion</span>(<span class="at">mult =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.15</span>))) <span class="sc">+</span></span>
<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb13-14"><a href="#cb13-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb13-15"><a href="#cb13-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb13-16"><a href="#cb13-16" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>, <span class="at">size =</span> <span class="dv">14</span>, <span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb13-17"><a href="#cb13-17" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.y =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb13-18"><a href="#cb13-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>)</span>
<span id="cb13-19"><a href="#cb13-19" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb13-20"><a href="#cb13-20" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb13-21"><a href="#cb13-21" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Top 10 most used apps by number of visits"</span>,</span>
<span id="cb13-22"><a href="#cb13-22" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, blue_n_participants, <span class="st">")"</span>),</span>
<span id="cb13-23"><a href="#cb13-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"App Name"</span>,</span>
<span id="cb13-24"><a href="#cb13-24" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Visits"</span></span>
<span id="cb13-25"><a href="#cb13-25" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb13-26"><a href="#cb13-26" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb13-27"><a href="#cb13-27" aria-hidden="true" tabindex="-1"></a>plot1</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-10-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
</section>
<section id="calculating-duration" class="level2" data-number="5.4">
<h2 data-number="5.4" class="anchored" data-anchor-id="calculating-duration"><span class="header-section-number">5.4</span> Calculating Duration</h2>
<p>Duration reflects how long a user is exposed to an app or activity. It is another key measure of exposure that complements visit counts.</p>
<p>If a data set doesn’t contain the duration of each event, start_time and end_time can be used to calculate it. Our tidy blue data set, however, already contains all three variables.</p>
<p>This enables us calculate the duration per app and aggregate it over time. This can include overall smartphone duration, app-specific duration (e.g., Instagram), and temporal patterns of duration. Duration metrics reveal not only how often apps are used, but how much attention they receive.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>most_used_apps <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by application</span></span>
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(app_name) <span class="sc">%&gt;%</span> </span>
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># summarise the total duration of each application</span></span>
<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="fu">round</span>(<span class="fu">sum</span>(duration, <span class="at">na.rm =</span> <span class="cn">TRUE</span>) <span class="sc">/</span> <span class="dv">60</span>, <span class="at">digits =</span> <span class="dv">2</span>) <span class="co"># we divide by 60 to get minutes and round the result</span></span>
<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sort in descending order</span></span>
<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(duration)) <span class="sc">%&gt;%</span> </span>
<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a>  <span class="co"># select the top 10 rows</span></span>
<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">head</span>(<span class="at">n =</span> <span class="dv">10</span>) </span>
<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a>most_used_apps</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 10 × 2
   app_name         duration
   &lt;chr&gt;               &lt;dbl&gt;
 1 Chrome              7946.
 2 Facebook            6890.
 3 WhatsApp            6480.
 4 YouTube             4927.
 5 Instagram           4219.
 6 Reddit              1339.
 7 Spider Solitaire    1224.
 8 Farm Heroes Saga    1134.
 9 Telefon             1084.
10 Disney+             1067.</code></pre>
</div>
</div>
<p>Let’s plot this again.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>plot2 <span class="ot">=</span> <span class="fu">ggplot</span>(most_used_apps, <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">reorder</span>(app_name, duration), <span class="at">y =</span> duration)) <span class="sc">+</span></span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_col</span>(<span class="at">width =</span> <span class="fl">0.6</span>, <span class="at">fill =</span> <span class="st">"#5E81AC"</span>) <span class="sc">+</span></span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_flip</span>() <span class="sc">+</span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_text</span>(<span class="fu">aes</span>(<span class="at">label =</span> duration), <span class="at">hjust =</span> <span class="sc">-</span><span class="fl">0.2</span>, <span class="at">size =</span> <span class="dv">3</span>, <span class="at">family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_continuous</span>(<span class="at">expand =</span> <span class="fu">expansion</span>(<span class="at">mult =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.15</span>))) <span class="sc">+</span></span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>, <span class="at">size =</span> <span class="dv">14</span>, <span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.y =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>)</span>
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Top 10 most used apps by total duration"</span>,</span>
<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, blue_n_participants, <span class="st">")"</span>),</span>
<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"App Name"</span>,</span>
<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Duration in minutes"</span></span>
<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-20"><a href="#cb16-20" aria-hidden="true" tabindex="-1"></a>plot2</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-12-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
</section>
<section id="mobile-behavior-between-participants" class="level2" data-number="5.5">
<h2 data-number="5.5" class="anchored" data-anchor-id="mobile-behavior-between-participants"><span class="header-section-number">5.5</span> Mobile Behavior between participants</h2>
<p>The distributions in app usage data are often extremely skewed. The use of apps or smartphones varies greatly between the individuals observed and between time units and a small number of heavy users often account for a large share of total usage. This becomes particularly important when choosing summary statistics: measures of central tendency such as the mean and median can differ substantially, and robust statistics (medians, quantiles) are often more informative than means alone.</p>
<p>Let’s take a quick look at this using Instagram usage as an example.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>instagram_participants <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># filter for Instagram usage</span></span>
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(app_name <span class="sc">==</span> <span class="st">"Instagram"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by participants </span></span>
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id) <span class="sc">%&gt;%</span> </span>
<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a>    <span class="co"># calculate vistis to Instagram</span></span>
<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">visits =</span> <span class="fu">n</span>(),</span>
<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a>    <span class="co"># and usage time</span></span>
<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="fu">sum</span>(duration, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(</span>
<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a>    <span class="at">cols =</span> <span class="fu">c</span>(visits, duration),</span>
<span id="cb17-14"><a href="#cb17-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">names_to =</span> <span class="st">"metric"</span>,</span>
<span id="cb17-15"><a href="#cb17-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">values_to =</span> <span class="st">"value"</span></span>
<span id="cb17-16"><a href="#cb17-16" aria-hidden="true" tabindex="-1"></a>  ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>plot3 <span class="ot">=</span> <span class="fu">ggplot</span>(instagram_participants, <span class="fu">aes</span>(<span class="at">x =</span> metric, <span class="at">y =</span> value, <span class="at">fill =</span> metric)) <span class="sc">+</span></span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_violin</span>(<span class="at">trim =</span> <span class="cn">FALSE</span>, <span class="at">alpha =</span> <span class="fl">0.6</span>) <span class="sc">+</span></span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_boxplot</span>(<span class="at">width =</span> <span class="fl">0.2</span>, <span class="at">outlier.size =</span> <span class="fl">0.8</span>, <span class="at">alpha =</span> <span class="fl">0.9</span>) <span class="sc">+</span></span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">stat_summary</span>(<span class="at">fun =</span> mean, <span class="at">geom =</span> <span class="st">"point"</span>, <span class="at">shape =</span> <span class="dv">4</span>, <span class="at">size =</span> <span class="dv">3</span>) <span class="sc">+</span></span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_continuous</span>(<span class="at">expand =</span> <span class="fu">expansion</span>(<span class="at">mult =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.15</span>))) <span class="sc">+</span></span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_fill_manual</span>(</span>
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">values =</span> <span class="fu">c</span>(<span class="st">"visits"</span> <span class="ot">=</span> <span class="st">"#5E81AC"</span>, <span class="st">"duration"</span> <span class="ot">=</span> <span class="st">"#5EA8AC"</span>),</span>
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">labels =</span> <span class="fu">c</span>(<span class="st">"Visits"</span>, <span class="st">"Duration (minutes)"</span>)</span>
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span> metric, <span class="at">scales =</span> <span class="st">"free"</span>) <span class="sc">+</span></span>
<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb18-13"><a href="#cb18-13" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb18-14"><a href="#cb18-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">strip.text =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>, <span class="at">size =</span> <span class="dv">12</span>),</span>
<span id="cb18-15"><a href="#cb18-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>)</span>
<span id="cb18-16"><a href="#cb18-16" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb18-17"><a href="#cb18-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb18-18"><a href="#cb18-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Instagram Visits and Duration per Participant"</span>,</span>
<span id="cb18-19"><a href="#cb18-19" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, blue_n_participants, <span class="st">")"</span>),</span>
<span id="cb18-20"><a href="#cb18-20" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">""</span>,</span>
<span id="cb18-21"><a href="#cb18-21" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Value"</span></span>
<span id="cb18-22"><a href="#cb18-22" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb18-23"><a href="#cb18-23" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-24"><a href="#cb18-24" aria-hidden="true" tabindex="-1"></a>plot3</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-14-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>The violin plot shows how duration differs across participants. The boxplot indicates the median duration and the interquartile range, while the X marks the mean.</p>
<p>Next, let’s try to get a better idea of what’s going on in our data set. To better understand the variation in our data, we examine Instagram usage at the individual level and analyse how usage differs across participants and across days. Plotting these distributions illustrates the differences in frequency and duration of app usage between participants and shows the skewed distribution.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>random_sample_25 <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(panelist_id) <span class="sc">%&gt;%</span> </span>
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">distinct</span>() <span class="sc">%&gt;%</span> </span>
<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">slice_sample</span>(<span class="at">n =</span> <span class="dv">25</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pull</span>(panelist_id)</span>
<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a>instagram_participants_days <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a>  <span class="co"># filter for Instagram usage</span></span>
<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(app_name <span class="sc">==</span> <span class="st">"Instagram"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(panelist_id <span class="sc">%in%</span> random_sample_25) <span class="sc">%&gt;%</span> </span>
<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by participants and date</span></span>
<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id, date) <span class="sc">%&gt;%</span> </span>
<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a>    <span class="co"># calculate usage time to Instagram</span></span>
<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="fu">sum</span>(duration, <span class="at">na.rm =</span> <span class="cn">TRUE</span>) <span class="sc">/</span> <span class="dv">60</span> <span class="sc">/</span><span class="dv">60</span> <span class="co"># in hours</span></span>
<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb19-17"><a href="#cb19-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(</span>
<span id="cb19-18"><a href="#cb19-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">cols =</span> <span class="fu">c</span>(duration),</span>
<span id="cb19-19"><a href="#cb19-19" aria-hidden="true" tabindex="-1"></a>    <span class="at">names_to =</span> <span class="st">"metric"</span>,</span>
<span id="cb19-20"><a href="#cb19-20" aria-hidden="true" tabindex="-1"></a>    <span class="at">values_to =</span> <span class="st">"value"</span></span>
<span id="cb19-21"><a href="#cb19-21" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb19-22"><a href="#cb19-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-23"><a href="#cb19-23" aria-hidden="true" tabindex="-1"></a><span class="co"># Boxplot-Plot </span></span>
<span id="cb19-24"><a href="#cb19-24" aria-hidden="true" tabindex="-1"></a>plot3 <span class="ot">=</span> <span class="fu">ggplot</span>(instagram_participants_days, </span>
<span id="cb19-25"><a href="#cb19-25" aria-hidden="true" tabindex="-1"></a>  <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">fct_reorder</span>(panelist_id, value, <span class="at">.fun =</span> mean, <span class="at">.desc =</span> <span class="cn">TRUE</span>),</span>
<span id="cb19-26"><a href="#cb19-26" aria-hidden="true" tabindex="-1"></a>      <span class="at">y =</span> value,</span>
<span id="cb19-27"><a href="#cb19-27" aria-hidden="true" tabindex="-1"></a>      <span class="at">fill =</span> metric)) <span class="sc">+</span></span>
<span id="cb19-28"><a href="#cb19-28" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_boxplot</span>(<span class="at">width =</span> <span class="fl">0.5</span>, <span class="at">outlier.size =</span> <span class="dv">1</span>, <span class="at">alpha =</span> <span class="fl">0.8</span>) <span class="sc">+</span></span>
<span id="cb19-29"><a href="#cb19-29" aria-hidden="true" tabindex="-1"></a>  <span class="fu">stat_summary</span>(<span class="at">fun =</span> mean, <span class="at">geom =</span> <span class="st">"point"</span>, <span class="at">shape =</span> <span class="dv">4</span>, <span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
<span id="cb19-30"><a href="#cb19-30" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span> metric, <span class="at">scales =</span> <span class="st">"free_y"</span>) <span class="sc">+</span></span>
<span id="cb19-31"><a href="#cb19-31" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_fill_manual</span>(<span class="at">values =</span> <span class="fu">c</span>(<span class="st">"duration"</span> <span class="ot">=</span> <span class="st">"#5EA8AC"</span>)) <span class="sc">+</span></span>
<span id="cb19-32"><a href="#cb19-32" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_flip</span>() <span class="sc">+</span></span>
<span id="cb19-33"><a href="#cb19-33" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb19-34"><a href="#cb19-34" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb19-35"><a href="#cb19-35" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb19-36"><a href="#cb19-36" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.title =</span> <span class="fu">element_blank</span>()</span>
<span id="cb19-37"><a href="#cb19-37" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb19-38"><a href="#cb19-38" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb19-39"><a href="#cb19-39" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Distribution of Instagram Usage per Participant"</span>,</span>
<span id="cb19-40"><a href="#cb19-40" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, blue_n_participants, <span class="st">")"</span>),</span>
<span id="cb19-41"><a href="#cb19-41" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"Participant"</span>,</span>
<span id="cb19-42"><a href="#cb19-42" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Average Duration in Hours"</span></span>
<span id="cb19-43"><a href="#cb19-43" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb19-44"><a href="#cb19-44" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-45"><a href="#cb19-45" aria-hidden="true" tabindex="-1"></a>plot3</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-15-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>The large variance in media usage duration, both across and within participants, is clearly evident here. The same applies to the occurrence of extreme values. This skewed distribution is also reflected in the widely differing values for the median and the mean. Against this background, the distribution of the app tracking data should be taken into account when selecting the parameters to be calculated.</p>
</section>
<section id="mobile-behavior-over-time" class="level2" data-number="5.6">
<h2 data-number="5.6" class="anchored" data-anchor-id="mobile-behavior-over-time"><span class="header-section-number">5.6</span> Mobile Behavior over time</h2>
<p>One of the biggest advantages of mobile tracking data is the high temporal granularity of our data. Every event, such as opening an app, is assigned a very precise timestamp (usually even in milliseconds). This allows us to view recorded user behavior over different time periods.</p>
<p>Our blue test data set covers a total of one week. Let’s take a look at a few social media plattforms, including Instagram, YouTube, WhatsApp and Facebook, usage during that week.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>week <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># filter for Instagram and Facebook</span></span>
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(app_name <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"Instagram"</span>, <span class="st">"YouTube"</span>, <span class="st">"WhatsApp"</span>, <span class="st">"Facebook"</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by panelist_id, date and app</span></span>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id, date, app_name) <span class="sc">%&gt;%</span> </span>
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>  <span class="co"># calculate visits and time spent in Instagram per participant and day</span></span>
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">visits =</span> <span class="fu">n</span>(),</span>
<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="fu">sum</span>(duration, <span class="at">na.rm =</span> <span class="cn">TRUE</span>) <span class="sc">/</span> <span class="dv">60</span>, <span class="co"># in minutes</span></span>
<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">.drop =</span> <span class="st">"groups"</span></span>
<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a>  <span class="co"># calculate average visits and duration per day across the sample</span></span>
<span id="cb20-13"><a href="#cb20-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(date, app_name) <span class="sc">%&gt;%</span> </span>
<span id="cb20-14"><a href="#cb20-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb20-15"><a href="#cb20-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">visits =</span> <span class="fu">mean</span>(visits, <span class="at">na.rm =</span> <span class="cn">TRUE</span>),</span>
<span id="cb20-16"><a href="#cb20-16" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="fu">mean</span>(duration, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
<span id="cb20-17"><a href="#cb20-17" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb20-18"><a href="#cb20-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pivot_longer</span>(<span class="at">cols =</span> <span class="fu">c</span>(visits, duration),</span>
<span id="cb20-19"><a href="#cb20-19" aria-hidden="true" tabindex="-1"></a>               <span class="at">names_to =</span> <span class="st">"metric"</span>,</span>
<span id="cb20-20"><a href="#cb20-20" aria-hidden="true" tabindex="-1"></a>               <span class="at">values_to =</span> <span class="st">"value"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb20-21"><a href="#cb20-21" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb20-22"><a href="#cb20-22" aria-hidden="true" tabindex="-1"></a>    <span class="at">metric =</span> <span class="fu">factor</span>(metric, <span class="at">levels =</span> <span class="fu">c</span>(<span class="st">"visits"</span>, <span class="st">"duration"</span>))</span>
<span id="cb20-23"><a href="#cb20-23" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb20-24"><a href="#cb20-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-25"><a href="#cb20-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-26"><a href="#cb20-26" aria-hidden="true" tabindex="-1"></a>plot4 <span class="ot">=</span> <span class="fu">ggplot</span>(week, <span class="fu">aes</span>(<span class="at">x =</span> date, <span class="at">y =</span> value, <span class="at">color =</span> metric)) <span class="sc">+</span></span>
<span id="cb20-27"><a href="#cb20-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">linewidth =</span> <span class="dv">1</span>) <span class="sc">+</span></span>
<span id="cb20-28"><a href="#cb20-28" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
<span id="cb20-29"><a href="#cb20-29" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_color_manual</span>(</span>
<span id="cb20-30"><a href="#cb20-30" aria-hidden="true" tabindex="-1"></a>    <span class="at">values =</span> <span class="fu">c</span>(<span class="st">"visits"</span> <span class="ot">=</span> <span class="st">"#5E81AC"</span>, <span class="st">"duration"</span> <span class="ot">=</span> <span class="st">"#5EA8AC"</span>),</span>
<span id="cb20-31"><a href="#cb20-31" aria-hidden="true" tabindex="-1"></a>    <span class="at">labels =</span> <span class="fu">c</span>(<span class="st">"Visits"</span>, <span class="st">"Duration (minutes)"</span>)</span>
<span id="cb20-32"><a href="#cb20-32" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb20-33"><a href="#cb20-33" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_x_date</span>(<span class="at">date_breaks =</span> <span class="st">"1 day"</span>,</span>
<span id="cb20-34"><a href="#cb20-34" aria-hidden="true" tabindex="-1"></a>               <span class="at">date_labels =</span> <span class="st">"%d.%m"</span>) <span class="sc">+</span></span>
<span id="cb20-35"><a href="#cb20-35" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span> app_name, <span class="at">ncol =</span> <span class="dv">2</span>, <span class="at">scales =</span> <span class="st">"free_y"</span>) <span class="sc">+</span></span>
<span id="cb20-36"><a href="#cb20-36" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb20-37"><a href="#cb20-37" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb20-38"><a href="#cb20-38" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.title =</span> <span class="fu">element_blank</span>(),</span>
<span id="cb20-39"><a href="#cb20-39" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"top"</span>,</span>
<span id="cb20-40"><a href="#cb20-40" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>),</span>
<span id="cb20-41"><a href="#cb20-41" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">angle =</span> <span class="dv">45</span>, <span class="at">hjust =</span> <span class="dv">1</span>),</span>
<span id="cb20-42"><a href="#cb20-42" aria-hidden="true" tabindex="-1"></a>    <span class="at">strip.text =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>)</span>
<span id="cb20-43"><a href="#cb20-43" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb20-44"><a href="#cb20-44" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb20-45"><a href="#cb20-45" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Average Daily App Usage per Participant over a Week"</span>,</span>
<span id="cb20-46"><a href="#cb20-46" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, blue_n_participants, <span class="st">")"</span>),</span>
<span id="cb20-47"><a href="#cb20-47" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"Date"</span>,</span>
<span id="cb20-48"><a href="#cb20-48" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Value"</span></span>
<span id="cb20-49"><a href="#cb20-49" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb20-50"><a href="#cb20-50" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-51"><a href="#cb20-51" aria-hidden="true" tabindex="-1"></a>plot4</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-16-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>Okay, that gives us a pretty good overview of digital media usage over the course of the week. Now, of course, we can also select other time periods and take a closer look at them. Let’s take a look at Instagram, YouTube, Whatsapp and Facebook usage over the course of a day.</p>
<p>In this case, we calculate the proportion of the hour spent on the respective app. For example, 30 minutes of Instagram use between 10 and 11 a.m. would mean that 50% of the current hour was spent on Instagram. We calculate this value for our participants and take the average of the proportion per app and hour.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Let's select a random day in our sample</span></span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>random_day <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(date) <span class="sc">%&gt;%</span> </span>
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">distinct</span>() <span class="sc">%&gt;%</span> </span>
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">slice_sample</span>(<span class="at">n =</span> <span class="dv">1</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pull</span>(date)</span>
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Okay, we select relevant apps and date</span></span>
<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a>day_data <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span></span>
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(app_name <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"Instagram"</span>, <span class="st">"YouTube"</span>, <span class="st">"WhatsApp"</span>, <span class="st">"Facebook"</span>),</span>
<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a>         date <span class="sc">==</span> random_day) <span class="sc">%&gt;%</span></span>
<span id="cb21-12"><a href="#cb21-12" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we make sure that we have the proper timeformat</span></span>
<span id="cb21-13"><a href="#cb21-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb21-14"><a href="#cb21-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> <span class="fu">as.POSIXct</span>(start_time, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb21-15"><a href="#cb21-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">end_time   =</span> <span class="fu">as.POSIXct</span>(end_time, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb21-16"><a href="#cb21-16" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_hour =</span> <span class="fu">floor_date</span>(start_time, <span class="st">"hour"</span>),</span>
<span id="cb21-17"><a href="#cb21-17" aria-hidden="true" tabindex="-1"></a>    <span class="at">end_hour   =</span> <span class="fu">ceiling_date</span>(end_time, <span class="st">"hour"</span>) <span class="sc">-</span> <span class="fu">seconds</span>(<span class="dv">1</span>)</span>
<span id="cb21-18"><a href="#cb21-18" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb21-19"><a href="#cb21-19" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-20"><a href="#cb21-20" aria-hidden="true" tabindex="-1"></a><span class="co"># We need to calculate hourly data</span></span>
<span id="cb21-21"><a href="#cb21-21" aria-hidden="true" tabindex="-1"></a>hourly_data <span class="ot">=</span> day_data <span class="sc">%&gt;%</span></span>
<span id="cb21-22"><a href="#cb21-22" aria-hidden="true" tabindex="-1"></a>  <span class="fu">rowwise</span>() <span class="sc">%&gt;%</span></span>
<span id="cb21-23"><a href="#cb21-23" aria-hidden="true" tabindex="-1"></a>  <span class="co"># create every hour between start and end hour</span></span>
<span id="cb21-24"><a href="#cb21-24" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb21-25"><a href="#cb21-25" aria-hidden="true" tabindex="-1"></a>    <span class="at">hour =</span> <span class="fu">list</span>(<span class="fu">seq</span>(start_hour, end_hour, <span class="at">by =</span> <span class="st">"hour"</span>))</span>
<span id="cb21-26"><a href="#cb21-26" aria-hidden="true" tabindex="-1"></a>    ) <span class="sc">%&gt;%</span></span>
<span id="cb21-27"><a href="#cb21-27" aria-hidden="true" tabindex="-1"></a>  <span class="fu">unnest</span>(hour) <span class="sc">%&gt;%</span></span>
<span id="cb21-28"><a href="#cb21-28" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span></span>
<span id="cb21-29"><a href="#cb21-29" aria-hidden="true" tabindex="-1"></a>  <span class="co"># share of hour used </span></span>
<span id="cb21-30"><a href="#cb21-30" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb21-31"><a href="#cb21-31" aria-hidden="true" tabindex="-1"></a>    <span class="at">hour_end =</span> hour <span class="sc">+</span> <span class="fu">hours</span>(<span class="dv">1</span>),</span>
<span id="cb21-32"><a href="#cb21-32" aria-hidden="true" tabindex="-1"></a>    <span class="at">hour_share =</span> <span class="fu">as.numeric</span>(<span class="fu">pmin</span>(end_time, hour_end) <span class="sc">-</span> <span class="fu">pmax</span>(start_time, hour), <span class="at">units =</span> <span class="st">"mins"</span>) <span class="sc">/</span> <span class="dv">60</span></span>
<span id="cb21-33"><a href="#cb21-33" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb21-34"><a href="#cb21-34" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by hour and app</span></span>
<span id="cb21-35"><a href="#cb21-35" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(hour, app_name) <span class="sc">%&gt;%</span></span>
<span id="cb21-36"><a href="#cb21-36" aria-hidden="true" tabindex="-1"></a>  <span class="co"># calculate mean, SD, and CIs</span></span>
<span id="cb21-37"><a href="#cb21-37" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb21-38"><a href="#cb21-38" aria-hidden="true" tabindex="-1"></a>    <span class="at">mean_hour_share =</span> <span class="fu">mean</span>(hour_share, <span class="at">na.rm =</span> <span class="cn">TRUE</span>),</span>
<span id="cb21-39"><a href="#cb21-39" aria-hidden="true" tabindex="-1"></a>    <span class="co"># SD</span></span>
<span id="cb21-40"><a href="#cb21-40" aria-hidden="true" tabindex="-1"></a>    <span class="at">sd_hour_share =</span> <span class="fu">sd</span>(hour_share, <span class="at">na.rm =</span> <span class="cn">TRUE</span>),</span>
<span id="cb21-41"><a href="#cb21-41" aria-hidden="true" tabindex="-1"></a>    <span class="co"># sample size (n)</span></span>
<span id="cb21-42"><a href="#cb21-42" aria-hidden="true" tabindex="-1"></a>    <span class="at">n =</span> <span class="fu">n</span>(),</span>
<span id="cb21-43"><a href="#cb21-43" aria-hidden="true" tabindex="-1"></a>    <span class="at">critical_t =</span> <span class="fu">qt</span>(<span class="fl">0.975</span>, <span class="at">df =</span> n <span class="sc">-</span> <span class="dv">1</span>),</span>
<span id="cb21-44"><a href="#cb21-44" aria-hidden="true" tabindex="-1"></a>    <span class="at">se_hour_share =</span> sd_hour_share <span class="sc">/</span> <span class="fu">sqrt</span>(n),</span>
<span id="cb21-45"><a href="#cb21-45" aria-hidden="true" tabindex="-1"></a>    <span class="at">upper_ci =</span> mean_hour_share <span class="sc">+</span> critical_t <span class="sc">*</span> se_hour_share,</span>
<span id="cb21-46"><a href="#cb21-46" aria-hidden="true" tabindex="-1"></a>    <span class="at">lower_ci =</span> mean_hour_share <span class="sc">-</span> critical_t <span class="sc">*</span> se_hour_share,</span>
<span id="cb21-47"><a href="#cb21-47" aria-hidden="true" tabindex="-1"></a>    <span class="at">.groups =</span> <span class="st">"drop"</span></span>
<span id="cb21-48"><a href="#cb21-48" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb21-49"><a href="#cb21-49" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Cleaning CIs (if n = 1 --&gt; NA)</span></span>
<span id="cb21-50"><a href="#cb21-50" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb21-51"><a href="#cb21-51" aria-hidden="true" tabindex="-1"></a>    <span class="fu">across</span>(<span class="fu">c</span>(upper_ci, lower_ci), <span class="sc">~</span><span class="fu">ifelse</span>(n <span class="sc">&lt;=</span> <span class="dv">1</span>, mean_hour_share, .x))</span>
<span id="cb21-52"><a href="#cb21-52" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb21-53"><a href="#cb21-53" aria-hidden="true" tabindex="-1"></a>  <span class="co"># complete missing cases</span></span>
<span id="cb21-54"><a href="#cb21-54" aria-hidden="true" tabindex="-1"></a>  <span class="fu">complete</span>(</span>
<span id="cb21-55"><a href="#cb21-55" aria-hidden="true" tabindex="-1"></a>    <span class="at">hour =</span> <span class="fu">seq.POSIXt</span>(<span class="fu">as.POSIXct</span>(random_day, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb21-56"><a href="#cb21-56" aria-hidden="true" tabindex="-1"></a>                      <span class="fu">as.POSIXct</span>(random_day, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>) <span class="sc">+</span> <span class="fu">hours</span>(<span class="dv">23</span>),</span>
<span id="cb21-57"><a href="#cb21-57" aria-hidden="true" tabindex="-1"></a>                      <span class="at">by =</span> <span class="st">"hour"</span>),</span>
<span id="cb21-58"><a href="#cb21-58" aria-hidden="true" tabindex="-1"></a>    app_name,</span>
<span id="cb21-59"><a href="#cb21-59" aria-hidden="true" tabindex="-1"></a>    <span class="co"># fill with 0</span></span>
<span id="cb21-60"><a href="#cb21-60" aria-hidden="true" tabindex="-1"></a>    <span class="at">fill =</span> <span class="fu">list</span>(<span class="at">mean_hour_share =</span> <span class="dv">0</span>, <span class="at">sd_hour_share =</span> <span class="cn">NA</span>, <span class="at">n =</span> <span class="dv">0</span>, <span class="at">se_hour_share =</span> <span class="cn">NA</span>, <span class="at">critical_t =</span> <span class="cn">NA</span>, <span class="at">lower_ci =</span> <span class="dv">0</span>, <span class="at">upper_ci =</span> <span class="dv">0</span>)</span>
<span id="cb21-61"><a href="#cb21-61" aria-hidden="true" tabindex="-1"></a>  ) </span>
<span id="cb21-62"><a href="#cb21-62" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-63"><a href="#cb21-63" aria-hidden="true" tabindex="-1"></a>plot5 <span class="ot">=</span> <span class="fu">ggplot</span>(hourly_data, <span class="fu">aes</span>(<span class="at">x =</span> hour, <span class="at">y =</span> mean_hour_share)) <span class="sc">+</span></span>
<span id="cb21-64"><a href="#cb21-64" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_ribbon</span>(</span>
<span id="cb21-65"><a href="#cb21-65" aria-hidden="true" tabindex="-1"></a>    <span class="fu">aes</span>(<span class="at">ymin =</span> lower_ci, <span class="at">ymax =</span> upper_ci),</span>
<span id="cb21-66"><a href="#cb21-66" aria-hidden="true" tabindex="-1"></a>    <span class="at">fill =</span> <span class="st">"#5EA8AC"</span>, </span>
<span id="cb21-67"><a href="#cb21-67" aria-hidden="true" tabindex="-1"></a>    <span class="at">alpha =</span> <span class="fl">0.25</span></span>
<span id="cb21-68"><a href="#cb21-68" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb21-69"><a href="#cb21-69" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_line</span>(<span class="at">color =</span> <span class="st">"#5EA8AC"</span>, <span class="at">linewidth =</span> <span class="fl">1.2</span>) <span class="sc">+</span></span>
<span id="cb21-70"><a href="#cb21-70" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_point</span>(<span class="at">color =</span> <span class="st">"#5EA8AC"</span>, <span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
<span id="cb21-71"><a href="#cb21-71" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_continuous</span>(<span class="at">labels =</span> scales<span class="sc">::</span><span class="fu">percent_format</span>(<span class="at">accuracy =</span> <span class="dv">1</span>)) <span class="sc">+</span></span>
<span id="cb21-72"><a href="#cb21-72" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_x_datetime</span>(<span class="at">date_breaks =</span> <span class="st">"2 hour"</span>,</span>
<span id="cb21-73"><a href="#cb21-73" aria-hidden="true" tabindex="-1"></a>                   <span class="at">date_labels =</span> <span class="st">"%H:%M"</span>) <span class="sc">+</span></span>
<span id="cb21-74"><a href="#cb21-74" aria-hidden="true" tabindex="-1"></a>  <span class="fu">facet_wrap</span>(<span class="sc">~</span> app_name, <span class="at">ncol =</span> <span class="dv">2</span>, <span class="at">scales =</span> <span class="st">"free_y"</span>) <span class="sc">+</span></span>
<span id="cb21-75"><a href="#cb21-75" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb21-76"><a href="#cb21-76" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb21-77"><a href="#cb21-77" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb21-78"><a href="#cb21-78" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>),</span>
<span id="cb21-79"><a href="#cb21-79" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">size =</span> <span class="dv">9</span>, <span class="at">angle =</span> <span class="dv">45</span>, <span class="at">hjust =</span> <span class="dv">1</span>)</span>
<span id="cb21-80"><a href="#cb21-80" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb21-81"><a href="#cb21-81" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb21-82"><a href="#cb21-82" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"App usage over the course of a day"</span>,</span>
<span id="cb21-83"><a href="#cb21-83" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, blue_n_participants, <span class="st">")"</span>),</span>
<span id="cb21-84"><a href="#cb21-84" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"Time"</span>,</span>
<span id="cb21-85"><a href="#cb21-85" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Mean share of hour used"</span></span>
<span id="cb21-86"><a href="#cb21-86" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb21-87"><a href="#cb21-87" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-88"><a href="#cb21-88" aria-hidden="true" tabindex="-1"></a>plot5</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-17-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>That looks good. However, we can go into even greater detail. Let’s say we are interested in usage behavior in the mobile situation in which Instagram is used. Specifically, how long does the person use their smartphone and which apps are used before and after? To do this, we first need to consider a few conceptual issues.</p>
<p>User behavior unfolds as sequences of events—actions that occur in a specific order over time.</p>
<p>In this section, we introduce three key concepts:</p>
<p><em>Event</em> – a single recorded action (e.g., app foregrounding).</p>
<p><em>Sequence</em> – a meaningful order of multiple events (e.g., unlocking phone → opening Instagram → switching to Messages).</p>
<p><em>Session</em> – as defined by Peng &amp; Zhu, a sequence of events with a defined duration that represents a coherent unit of mobile behavior <span class="citation" data-cites="Peng2020">(<a href="#ref-Peng2020" role="doc-biblioref">Peng &amp; Zhu, 2020</a>)</span>.</p>
<p>By identifying and analyzing sessions, we can capture the flow and structure of smartphone interaction, moving beyond isolated events to behavioral patterns. In this tutorial, we define a session as a sequence of app events for a given participant that is separated from the next sequence by at least 60 seconds of inactivity.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>sessions <span class="ot">=</span> blue_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># first, we need to identify sesssions</span></span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># new sessions starts, if </span></span>
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># - gap &gt; 60 seconds or</span></span>
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>  <span class="co"># - panelist_id does not equal next_panelist</span></span>
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">new_session =</span> <span class="fu">if_else</span>(</span>
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a>      <span class="fu">row_number</span>() <span class="sc">==</span> <span class="dv">1</span> <span class="sc">|</span> time_gap <span class="sc">&gt;</span> <span class="dv">60</span> <span class="sc">|</span> panelist_id <span class="sc">!=</span> next_panelist_id,</span>
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a>      <span class="dv">1</span>, <span class="dv">0</span></span>
<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a>    ),</span>
<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a>    <span class="at">session_id =</span> <span class="fu">paste0</span>(panelist_id, <span class="st">"_"</span>, <span class="fu">cumsum</span>(new_session))</span>
<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a>  ) </span>
<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a><span class="co"># we create a df which contains all session_ids and the number of instagram visits </span></span>
<span id="cb22-15"><a href="#cb22-15" aria-hidden="true" tabindex="-1"></a>instagram_visits <span class="ot">=</span> sessions <span class="sc">%&gt;%</span> </span>
<span id="cb22-16"><a href="#cb22-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(session_id) <span class="sc">%&gt;%</span> </span>
<span id="cb22-17"><a href="#cb22-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb22-18"><a href="#cb22-18" aria-hidden="true" tabindex="-1"></a>    <span class="co"># calculate total visits per session</span></span>
<span id="cb22-19"><a href="#cb22-19" aria-hidden="true" tabindex="-1"></a>    <span class="at">total_visits =</span> <span class="fu">n</span>(),</span>
<span id="cb22-20"><a href="#cb22-20" aria-hidden="true" tabindex="-1"></a>    <span class="co"># calculate Instagram visits per session</span></span>
<span id="cb22-21"><a href="#cb22-21" aria-hidden="true" tabindex="-1"></a>    <span class="at">instagram_visits =</span> <span class="fu">sum</span>(app_name <span class="sc">==</span> <span class="st">"Instagram"</span>)</span>
<span id="cb22-22"><a href="#cb22-22" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb22-23"><a href="#cb22-23" aria-hidden="true" tabindex="-1"></a>  <span class="co"># just keep sessions with at least one instagram visit</span></span>
<span id="cb22-24"><a href="#cb22-24" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(instagram_visits <span class="sc">&gt;</span> <span class="dv">0</span>)</span>
<span id="cb22-25"><a href="#cb22-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-26"><a href="#cb22-26" aria-hidden="true" tabindex="-1"></a><span class="co"># let's filter our sessions based on 'instagram_visits' to get the whole usage sequence</span></span>
<span id="cb22-27"><a href="#cb22-27" aria-hidden="true" tabindex="-1"></a>instagram_sessions <span class="ot">=</span> sessions <span class="sc">%&gt;%</span> </span>
<span id="cb22-28"><a href="#cb22-28" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(session_id <span class="sc">%in%</span> instagram_visits<span class="sc">$</span>session_id)</span>
<span id="cb22-29"><a href="#cb22-29" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-30"><a href="#cb22-30" aria-hidden="true" tabindex="-1"></a><span class="co"># okay, let's say we want to visualize one specific instagram session, to get a better idea about the context etc.</span></span>
<span id="cb22-31"><a href="#cb22-31" aria-hidden="true" tabindex="-1"></a><span class="co"># select a random session</span></span>
<span id="cb22-32"><a href="#cb22-32" aria-hidden="true" tabindex="-1"></a>random_session <span class="ot">=</span> instagram_sessions <span class="sc">%&gt;%</span> </span>
<span id="cb22-33"><a href="#cb22-33" aria-hidden="true" tabindex="-1"></a>  <span class="co"># We want something which is nice to visualize. Therefore we limit our sample for the random draw to sessions which have a certain duration</span></span>
<span id="cb22-34"><a href="#cb22-34" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(session_id) <span class="sc">%&gt;%</span> </span>
<span id="cb22-35"><a href="#cb22-35" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb22-36"><a href="#cb22-36" aria-hidden="true" tabindex="-1"></a>    <span class="at">session_duration =</span> <span class="fu">sum</span>(duration, <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span>
<span id="cb22-37"><a href="#cb22-37" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb22-38"><a href="#cb22-38" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(session_duration <span class="sc">&gt;</span> <span class="dv">180</span> <span class="sc">&amp;</span> session_duration <span class="sc">&lt;</span> <span class="dv">360</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb22-39"><a href="#cb22-39" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span> </span>
<span id="cb22-40"><a href="#cb22-40" aria-hidden="true" tabindex="-1"></a>  <span class="fu">slice_sample</span>(<span class="at">n =</span> <span class="dv">1</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb22-41"><a href="#cb22-41" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pull</span>(session_id)</span>
<span id="cb22-42"><a href="#cb22-42" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-43"><a href="#cb22-43" aria-hidden="true" tabindex="-1"></a>instagram_visual <span class="ot">=</span> instagram_sessions <span class="sc">%&gt;%</span> </span>
<span id="cb22-44"><a href="#cb22-44" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(session_id <span class="sc">==</span> random_session) <span class="sc">%&gt;%</span></span>
<span id="cb22-45"><a href="#cb22-45" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(app_name, start_time, end_time, duration, session_id)</span>
<span id="cb22-46"><a href="#cb22-46" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-47"><a href="#cb22-47" aria-hidden="true" tabindex="-1"></a><span class="co"># I want to fill the temporal gaps between apps, therefore I need to calculate the gaps in between</span></span>
<span id="cb22-48"><a href="#cb22-48" aria-hidden="true" tabindex="-1"></a>gaps <span class="ot">=</span> instagram_visual <span class="sc">%&gt;%</span></span>
<span id="cb22-49"><a href="#cb22-49" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb22-50"><a href="#cb22-50" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_start =</span> <span class="fu">lag</span>(start_time),</span>
<span id="cb22-51"><a href="#cb22-51" aria-hidden="true" tabindex="-1"></a>    <span class="at">gap_start  =</span> end_time,</span>
<span id="cb22-52"><a href="#cb22-52" aria-hidden="true" tabindex="-1"></a>    <span class="at">gap_end    =</span> next_start</span>
<span id="cb22-53"><a href="#cb22-53" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb22-54"><a href="#cb22-54" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="sc">!</span><span class="fu">is.na</span>(next_start) <span class="sc">&amp;</span> gap_end <span class="sc">&gt;</span> gap_start) <span class="sc">%&gt;%</span></span>
<span id="cb22-55"><a href="#cb22-55" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb22-56"><a href="#cb22-56" aria-hidden="true" tabindex="-1"></a>    <span class="at">app_name =</span> <span class="st">"GAP"</span></span>
<span id="cb22-57"><a href="#cb22-57" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb22-58"><a href="#cb22-58" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(app_name, gap_start, gap_end, session_id) <span class="sc">%&gt;%</span></span>
<span id="cb22-59"><a href="#cb22-59" aria-hidden="true" tabindex="-1"></a>  <span class="fu">rename</span>(</span>
<span id="cb22-60"><a href="#cb22-60" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> gap_start,</span>
<span id="cb22-61"><a href="#cb22-61" aria-hidden="true" tabindex="-1"></a>    <span class="at">end_time   =</span> gap_end</span>
<span id="cb22-62"><a href="#cb22-62" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb22-63"><a href="#cb22-63" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-64"><a href="#cb22-64" aria-hidden="true" tabindex="-1"></a><span class="co"># add the gap data to 'instagram_visual'</span></span>
<span id="cb22-65"><a href="#cb22-65" aria-hidden="true" tabindex="-1"></a>instagram_visual <span class="ot">=</span> instagram_visual <span class="sc">%&gt;%</span> </span>
<span id="cb22-66"><a href="#cb22-66" aria-hidden="true" tabindex="-1"></a>  <span class="fu">bind_rows</span>(gaps) <span class="sc">%&gt;%</span> </span>
<span id="cb22-67"><a href="#cb22-67" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb22-68"><a href="#cb22-68" aria-hidden="true" tabindex="-1"></a>    <span class="at">app_name =</span> <span class="fu">as.character</span>(app_name)</span>
<span id="cb22-69"><a href="#cb22-69" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb22-70"><a href="#cb22-70" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(<span class="at">bar_y =</span> <span class="dv">1</span>) </span>
<span id="cb22-71"><a href="#cb22-71" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-72"><a href="#cb22-72" aria-hidden="true" tabindex="-1"></a><span class="co"># get dynamic colors for each app, except for GAP which should be grey</span></span>
<span id="cb22-73"><a href="#cb22-73" aria-hidden="true" tabindex="-1"></a>app_levels <span class="ot">=</span> <span class="fu">unique</span>(instagram_visual<span class="sc">$</span>app_name)</span>
<span id="cb22-74"><a href="#cb22-74" aria-hidden="true" tabindex="-1"></a>apps_without_gap <span class="ot">=</span> <span class="fu">setdiff</span>(app_levels, <span class="st">"GAP"</span>)</span>
<span id="cb22-75"><a href="#cb22-75" aria-hidden="true" tabindex="-1"></a>gap_color <span class="ot">=</span> <span class="fu">c</span>(<span class="st">"GAP"</span> <span class="ot">=</span> <span class="st">"#D8DEE9"</span>)</span>
<span id="cb22-76"><a href="#cb22-76" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-77"><a href="#cb22-77" aria-hidden="true" tabindex="-1"></a>app_colors <span class="ot">=</span> <span class="fu">setNames</span>(</span>
<span id="cb22-78"><a href="#cb22-78" aria-hidden="true" tabindex="-1"></a>  grDevices<span class="sc">::</span><span class="fu">hcl.colors</span>(<span class="fu">length</span>(apps_without_gap), <span class="at">palette =</span> <span class="st">"Dynamic"</span>),</span>
<span id="cb22-79"><a href="#cb22-79" aria-hidden="true" tabindex="-1"></a>  apps_without_gap</span>
<span id="cb22-80"><a href="#cb22-80" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb22-81"><a href="#cb22-81" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-82"><a href="#cb22-82" aria-hidden="true" tabindex="-1"></a>color_map <span class="ot">=</span> <span class="fu">c</span>(gap_color, app_colors)</span>
<span id="cb22-83"><a href="#cb22-83" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-84"><a href="#cb22-84" aria-hidden="true" tabindex="-1"></a>plot6 <span class="ot">=</span> <span class="fu">ggplot</span>(instagram_visual) <span class="sc">+</span></span>
<span id="cb22-85"><a href="#cb22-85" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_segment</span>(</span>
<span id="cb22-86"><a href="#cb22-86" aria-hidden="true" tabindex="-1"></a>    <span class="fu">aes</span>(</span>
<span id="cb22-87"><a href="#cb22-87" aria-hidden="true" tabindex="-1"></a>      <span class="at">y =</span> bar_y,</span>
<span id="cb22-88"><a href="#cb22-88" aria-hidden="true" tabindex="-1"></a>      <span class="at">yend =</span> bar_y,</span>
<span id="cb22-89"><a href="#cb22-89" aria-hidden="true" tabindex="-1"></a>      <span class="at">x =</span> start_time,</span>
<span id="cb22-90"><a href="#cb22-90" aria-hidden="true" tabindex="-1"></a>      <span class="at">xend =</span> end_time,</span>
<span id="cb22-91"><a href="#cb22-91" aria-hidden="true" tabindex="-1"></a>      <span class="at">color =</span> app_name</span>
<span id="cb22-92"><a href="#cb22-92" aria-hidden="true" tabindex="-1"></a>    ),</span>
<span id="cb22-93"><a href="#cb22-93" aria-hidden="true" tabindex="-1"></a>    <span class="at">size =</span> <span class="dv">20</span></span>
<span id="cb22-94"><a href="#cb22-94" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb22-95"><a href="#cb22-95" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_color_manual</span>(<span class="at">values =</span> color_map) <span class="sc">+</span></span>
<span id="cb22-96"><a href="#cb22-96" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_x_datetime</span>(<span class="at">date_breaks =</span> <span class="st">"20 secs"</span>,</span>
<span id="cb22-97"><a href="#cb22-97" aria-hidden="true" tabindex="-1"></a>                   <span class="at">date_labels =</span> <span class="st">"%H:%M:%S"</span>) <span class="sc">+</span></span>
<span id="cb22-98"><a href="#cb22-98" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb22-99"><a href="#cb22-99" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"Time"</span>,</span>
<span id="cb22-100"><a href="#cb22-100" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="cn">NULL</span>,</span>
<span id="cb22-101"><a href="#cb22-101" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Instagram Usage Sequence of a Random User"</span>,</span>
<span id="cb22-102"><a href="#cb22-102" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb22-103"><a href="#cb22-103" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb22-104"><a href="#cb22-104" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb22-105"><a href="#cb22-105" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.title =</span> <span class="fu">element_blank</span>(),</span>
<span id="cb22-106"><a href="#cb22-106" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"top"</span>,</span>
<span id="cb22-107"><a href="#cb22-107" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>),</span>
<span id="cb22-108"><a href="#cb22-108" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">angle =</span> <span class="dv">45</span>, <span class="at">hjust =</span> <span class="dv">1</span>),</span>
<span id="cb22-109"><a href="#cb22-109" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.y  =</span> <span class="fu">element_blank</span>(),</span>
<span id="cb22-110"><a href="#cb22-110" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.title.y =</span> <span class="fu">element_blank</span>(),</span>
<span id="cb22-111"><a href="#cb22-111" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.ticks.y =</span> <span class="fu">element_blank</span>()</span>
<span id="cb22-112"><a href="#cb22-112" aria-hidden="true" tabindex="-1"></a>  )<span class="sc">+</span> </span>
<span id="cb22-113"><a href="#cb22-113" aria-hidden="true" tabindex="-1"></a>   <span class="fu">theme</span>(</span>
<span id="cb22-114"><a href="#cb22-114" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"right"</span>,</span>
<span id="cb22-115"><a href="#cb22-115" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.title =</span> <span class="fu">element_blank</span>()</span>
<span id="cb22-116"><a href="#cb22-116" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb22-117"><a href="#cb22-117" aria-hidden="true" tabindex="-1"></a>  <span class="fu">guides</span>(<span class="at">color =</span> <span class="fu">guide_legend</span>(<span class="at">override.aes =</span> <span class="fu">list</span>(<span class="at">linewidth =</span> <span class="dv">3</span>)))</span>
<span id="cb22-118"><a href="#cb22-118" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-119"><a href="#cb22-119" aria-hidden="true" tabindex="-1"></a>plot6</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-18-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>Here we see the sequence of visited apps within the randomly selected session.</p>
<p>We have now learned about some basic concepts and analyses of app tracking data using a relatively well-prepared data set.</p>
<p>If you have such a data set, you can close your laptop at this point and be happy. If you want to continue, I would suggest taking a break now and then we’ll look at the red data set.</p>
</section>
</section>
<section id="the-red-data-set" class="level1" data-number="6">
<h1 data-number="6"><span class="header-section-number">6</span> The red data set</h1>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="images/image_3_red_pill_v2.png" class="img-fluid figure-img"></p>
<figcaption>Image generated by Midjourney</figcaption>
</figure>
</div>
<p>The red data set is also a one-week sub-sample (N = 100) of a data set, which was originally collected for a larger sample (N = 600) and a period of four months in 2025 in Germany.</p>
<p>We have already learned about some concepts for analyzing app logging data and applied them to a relatively well-prepared “blue” data set.</p>
<p>However, not all app logging data sets look like this. Accordingly, I invite you to take the red pill and take a deeper look into the more or less messy reality.</p>
<p>First, let’s take a look at our data structure.</p>
<p><em>panelist_id</em> – unique identifier for each user or device</p>
<p><em>date</em> – the date of the recorded event</p>
<p><em>seen_timestamp</em> – precise time of the event (in our case milliseconds)</p>
<p><em>event_type</em> – type of user interaction (e.g., “foreground,” “background,” “notification”)</p>
<p><em>app_name</em> – readable name of the application</p>
<p><em>full_package_name</em> – complete identifier used by the Android system (e.g., com.instagram.android)</p>
<p><em>package_name</em> – shortened version of the app’s identifier</p>
<p>Understanding this schema helps ensure that all subsequent preprocessing and analysis steps are properly aligned with the data’s meaning.</p>
<p>Let’s take a closer look at the event_types. Here, we can refer to the article by Parry &amp; Toth <span class="citation" data-cites="Parry2025">(<a href="#ref-Parry2025" role="doc-biblioref">Parry &amp; Toth, 2025</a>)</span>, the official <a href="https://developer.android.com/reference/android/app/usage/UsageEvents.Event#constants_1">Android Developers Documentation</a> or <a href="https://cs.android.com/?hl=de">Android Code Search</a> (search for “UsageEvents”), which breaks down the meaning of each type.</p>
<table class="caption-top table">
<colgroup>
<col style="width: 4%">
<col style="width: 11%">
<col style="width: 84%">
</colgroup>
<thead>
<tr class="header">
<th>Event Type</th>
<th>Name</th>
<th>Explanation</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>0</td>
<td>NONE</td>
<td>A device level event like DEVICE_SHUTDOWN does not have package name, but some user code always expect a non-null for every event.</td>
</tr>
<tr class="even">
<td>1</td>
<td>Activity resumed</td>
<td>An activity (associated with a package and class) moved to the foreground. This constant was deprecated in API level 29.</td>
</tr>
<tr class="odd">
<td>2</td>
<td>Activity paused</td>
<td>An activity moved to the background.</td>
</tr>
<tr class="even">
<td>3</td>
<td>End of day</td>
<td>This is a technical note from the system at the end of the day. It indicates that the app was actively open on the screen at that time (usually midnight). The system automatically ended the recording for the day here to start a new statistics period.</td>
</tr>
<tr class="odd">
<td>4</td>
<td>Continue previous day</td>
<td>An event type denoting that a component was in the foreground the previous day. This is effectively treated as a ACTIVITY_RESUMED.</td>
</tr>
<tr class="even">
<td>5</td>
<td>Configuration change</td>
<td>The device configuration has changed.</td>
</tr>
<tr class="odd">
<td>6</td>
<td>System Interaction</td>
<td>The system interacted in some way with the respective app.</td>
</tr>
<tr class="even">
<td>7</td>
<td>User Interaction</td>
<td>A user interacted in some way with the respective app.</td>
</tr>
<tr class="odd">
<td>8</td>
<td>Shortcut invocation</td>
<td>A shortcut created by the user (e.g., via the home screen or app shortcuts) was executed. You have created a shortcut for “WhatsApp Chat with Miriam” in your favorite apps bar.</td>
</tr>
<tr class="even">
<td>9</td>
<td>Chooser Activity</td>
<td>This event means that the user has selected a specific app in the phone’s native share menu to share a file, link, or information.</td>
</tr>
<tr class="odd">
<td>10</td>
<td>Notification seen</td>
<td>The user viewed the notification.</td>
</tr>
<tr class="even">
<td>11</td>
<td>Standby bucket changed</td>
<td>Standalone component launched, such as widgets.</td>
</tr>
<tr class="odd">
<td>12</td>
<td>Interruptive notification</td>
<td>An app posted an interruptive notification, which can include visual and audible interruptions, e.g.&nbsp;Push-Notifications of WhatsApp.</td>
</tr>
<tr class="even">
<td>13</td>
<td>Slice pinned priv</td>
<td>The Home Screen app or voice assistant has saved or bookmarked a small, interactive element of an app (a “slice”) for quick access.</td>
</tr>
<tr class="odd">
<td>14</td>
<td>Slice pinned</td>
<td>An app t has saved or bookmarked a small, interactive element of an app (a “slice”) for quick access.</td>
</tr>
<tr class="even">
<td>15</td>
<td>Screen interactive</td>
<td>The screen went into an interactive state (i.e., turned on for full user interaction, not ambient display or other non‑interactive state)</td>
</tr>
<tr class="odd">
<td>16</td>
<td>Screen non‑interactive</td>
<td>The screen went into a non‑interactive state (i.e., completely turned off or turned on only in a non‑interactive state)</td>
</tr>
<tr class="even">
<td>17</td>
<td>Keyguard shown</td>
<td>The screen’s keyguard was shown</td>
</tr>
<tr class="odd">
<td>18</td>
<td>Keyguard hidden</td>
<td>The screen’s keyguard was hidden (i.e., the user unlocked the device)</td>
</tr>
<tr class="even">
<td>19</td>
<td>Foreground service start</td>
<td>An app starts a so-called foreground service. This is a background service that is so important that Android must display a permanent notification to the user. Example: “Spotify is currently playing music”</td>
</tr>
<tr class="odd">
<td>20</td>
<td>Foreground service stop</td>
<td>The running foreground service is stopped. The app no longer needs the persistent activity.</td>
</tr>
<tr class="even">
<td>21</td>
<td>Continuning foreground service</td>
<td>An event type denoting that a foreground service is at started state when the stats rolled-over at the end of a time interval.</td>
</tr>
<tr class="odd">
<td>22</td>
<td>Rollover foreground service</td>
<td>An activity becomes invisible on the UI.</td>
</tr>
<tr class="even">
<td>23</td>
<td>Activity stopped</td>
<td>An activity becomes invisible on the UI.</td>
</tr>
<tr class="odd">
<td>24</td>
<td>Activity destroyed</td>
<td>An activity object is destroyed.</td>
</tr>
<tr class="even">
<td>25</td>
<td>Flush to disk</td>
<td>An event type denoting that the Android runtime underwent a shutdown process.</td>
</tr>
<tr class="odd">
<td>26</td>
<td>Device shutdown</td>
<td>The Android runtime underwent a shutdown process.</td>
</tr>
<tr class="even">
<td>27</td>
<td>Device startup</td>
<td>The Android runtime launched.</td>
</tr>
<tr class="odd">
<td>28</td>
<td>User unlocked</td>
<td>An event type denoting that a user has been unlocked for the first time. This event mainly indicates when the user’s credential encrypted storage was first accessible.</td>
</tr>
<tr class="even">
<td>29</td>
<td>User stopped</td>
<td>An event type denoting that a user has been stopped. This typically happens when the system is being turned off or when users are being switched.</td>
</tr>
<tr class="odd">
<td>30</td>
<td>Locus ID set</td>
<td><p>This event type is an internal mechanism that is primarily used for functions such as smart suggestions or task continuation (app continuity).</p>
<p>In short: when you click on a specific channel in a chat, the app sets a new LocusId. If you later access it via Google search or the “Recently Used” menu, the system knows exactly which screen in the app to open thanks to this ID.</p></td>
</tr>
<tr class="even">
<td>31</td>
<td>App component used</td>
<td>An event type denoting that a component in the package has been used.</td>
</tr>
</tbody>
</table>
<p>In order to get started, we first need our data.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>red_data <span class="ot">=</span> <span class="fu">readRDS</span>(<span class="st">"data/red_data.rds"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Let’s check how our data set looks like.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">glimpse</span>(red_data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 1,154,510
Columns: 7
$ panelist_id       &lt;chr&gt; "Participant_25", "Participant_25", "Participant_25"…
$ date              &lt;date&gt; 2025-09-02, 2025-09-02, 2025-09-02, 2025-09-02, 202…
$ seen_timestamp    &lt;dbl&gt; 1756771713992, 1756771718014, 1756771723511, 1756771…
$ event_type        &lt;int&gt; 15, 16, 15, 1, 18, 16, 2, 23, 17, 15, 16, 15, 16, 15…
$ app_name          &lt;chr&gt; "Android-System", "Android-System", "Android-System"…
$ full_package_name &lt;chr&gt; "android/null", "android/null", "android/null", "com…
$ package_name      &lt;chr&gt; "android", "android", "android", "com.whatsapp", "an…</code></pre>
</div>
</div>
<p>Then let’s take a quick look at the event types in our data.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>event_types <span class="ot">=</span> red_data <span class="sc">%&gt;%</span> </span>
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(event_type) <span class="sc">%&gt;%</span> </span>
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>    <span class="at">n =</span> <span class="fu">n</span>()</span>
<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(n)) </span>
<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a><span class="fu">print</span>(event_types, <span class="at">n =</span> <span class="cn">Inf</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 24 × 2
   event_type      n
        &lt;int&gt;  &lt;int&gt;
 1          1 265852
 2          2 259400
 3         23 250128
 4         12  97268
 5         19  49802
 6         20  49470
 7         10  45779
 8         15  31582
 9         16  31535
10          7  23421
11         18  18134
12         17  18125
13          5  10272
14         14   1122
15          0   1040
16          8    528
17          9    478
18         26    193
19         28    142
20         27    135
21        100     50
22        101     50
23         32      3
24         13      1</code></pre>
</div>
</div>
<p>If we take a closer look, we see that there are some event types here that are not listed in the official Android documentation, e.g., event_type = 100. This can happen when manufacturers use a customized version of Android. For us, this means specifically that we do not know what event_type 100 stands for. I was also unable to find any reliable information on this. Welcome to the messy reality! For the purposes of this tutorial, we will ignore these unknown event types since they are rare in our data set. In applied research, however, such decisions should always be documented, and sensitivity analyses are recommended if these types of events are more frequent.</p>
<p>Nevertheless, we will first convert the event_types into a more readable form, at least those for which we have information available.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>red_coded <span class="ot">=</span> red_data <span class="sc">%&gt;%</span> </span>
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># We start by adding a more readable form of the event_type column </span></span>
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># Note: This is not strictly necessary, but for the sake of clarity in this tutorial, we will take this extra step.</span></span>
<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>    <span class="at">event_type_read =</span> <span class="fu">case_when</span>(</span>
<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">0</span>  <span class="sc">~</span> <span class="st">"None"</span>,</span>
<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">1</span>  <span class="sc">~</span> <span class="st">"Activity resumed"</span>,</span>
<span id="cb28-8"><a href="#cb28-8" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">2</span>  <span class="sc">~</span> <span class="st">"Activity paused"</span>,</span>
<span id="cb28-9"><a href="#cb28-9" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">3</span>  <span class="sc">~</span> <span class="st">"End of day"</span>,</span>
<span id="cb28-10"><a href="#cb28-10" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">4</span>  <span class="sc">~</span> <span class="st">"Continue previous day"</span>,</span>
<span id="cb28-11"><a href="#cb28-11" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">5</span>  <span class="sc">~</span> <span class="st">"Configuration change"</span>,</span>
<span id="cb28-12"><a href="#cb28-12" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">6</span>  <span class="sc">~</span> <span class="st">"System Interaction"</span>,</span>
<span id="cb28-13"><a href="#cb28-13" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">7</span>  <span class="sc">~</span> <span class="st">"User Interaction"</span>,</span>
<span id="cb28-14"><a href="#cb28-14" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">8</span>  <span class="sc">~</span> <span class="st">"Shortcut invocation"</span>,</span>
<span id="cb28-15"><a href="#cb28-15" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">9</span>  <span class="sc">~</span> <span class="st">"Chooser Activity (Share)"</span>,</span>
<span id="cb28-16"><a href="#cb28-16" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">10</span> <span class="sc">~</span> <span class="st">"Notification seen"</span>,</span>
<span id="cb28-17"><a href="#cb28-17" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">11</span> <span class="sc">~</span> <span class="st">"Standby bucket changed"</span>,</span>
<span id="cb28-18"><a href="#cb28-18" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">12</span> <span class="sc">~</span> <span class="st">"Interruptive notification"</span>,</span>
<span id="cb28-19"><a href="#cb28-19" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">13</span> <span class="sc">~</span> <span class="st">"Slice pinned priv"</span>,</span>
<span id="cb28-20"><a href="#cb28-20" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">14</span> <span class="sc">~</span> <span class="st">"Slice pinned"</span>,</span>
<span id="cb28-21"><a href="#cb28-21" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">15</span> <span class="sc">~</span> <span class="st">"Screen turned on (interactive)"</span>,</span>
<span id="cb28-22"><a href="#cb28-22" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">16</span> <span class="sc">~</span> <span class="st">"Screen turned off (non-interactive)"</span>,</span>
<span id="cb28-23"><a href="#cb28-23" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">17</span> <span class="sc">~</span> <span class="st">"Keyguard shown"</span>,</span>
<span id="cb28-24"><a href="#cb28-24" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">18</span> <span class="sc">~</span> <span class="st">"Keyguard hidden (device unlocked)"</span>,</span>
<span id="cb28-25"><a href="#cb28-25" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">19</span> <span class="sc">~</span> <span class="st">"Foreground service started"</span>,</span>
<span id="cb28-26"><a href="#cb28-26" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">20</span> <span class="sc">~</span> <span class="st">"Foreground service stopped"</span>,</span>
<span id="cb28-27"><a href="#cb28-27" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">21</span> <span class="sc">~</span> <span class="st">"Continuning foreground service"</span>,</span>
<span id="cb28-28"><a href="#cb28-28" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">22</span> <span class="sc">~</span> <span class="st">"Rollover foreground service"</span>,</span>
<span id="cb28-29"><a href="#cb28-29" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">23</span> <span class="sc">~</span> <span class="st">"Activity stopped"</span>,</span>
<span id="cb28-30"><a href="#cb28-30" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">24</span> <span class="sc">~</span> <span class="st">"Activity destroyed"</span>,</span>
<span id="cb28-31"><a href="#cb28-31" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">25</span> <span class="sc">~</span> <span class="st">"Flush to disk"</span>,</span>
<span id="cb28-32"><a href="#cb28-32" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">26</span> <span class="sc">~</span> <span class="st">"Device shutdown"</span>,</span>
<span id="cb28-33"><a href="#cb28-33" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">27</span> <span class="sc">~</span> <span class="st">"Device startup"</span>,</span>
<span id="cb28-34"><a href="#cb28-34" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">28</span> <span class="sc">~</span> <span class="st">"User unlocked"</span>,</span>
<span id="cb28-35"><a href="#cb28-35" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">29</span> <span class="sc">~</span> <span class="st">"User stopped"</span>,</span>
<span id="cb28-36"><a href="#cb28-36" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">30</span> <span class="sc">~</span> <span class="st">"Locus ID set"</span>,</span>
<span id="cb28-37"><a href="#cb28-37" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">31</span> <span class="sc">~</span> <span class="st">"App component used"</span>,</span>
<span id="cb28-38"><a href="#cb28-38" aria-hidden="true" tabindex="-1"></a>      <span class="at">.default =</span> <span class="cn">NA</span></span>
<span id="cb28-39"><a href="#cb28-39" aria-hidden="true" tabindex="-1"></a>    ),</span>
<span id="cb28-40"><a href="#cb28-40" aria-hidden="true" tabindex="-1"></a>    <span class="at">event =</span> <span class="fu">case_when</span>(</span>
<span id="cb28-41"><a href="#cb28-41" aria-hidden="true" tabindex="-1"></a>      <span class="co"># Start / open / activate</span></span>
<span id="cb28-42"><a href="#cb28-42" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">%in%</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">4</span>, <span class="dv">11</span>, <span class="dv">14</span>, <span class="dv">15</span>, <span class="dv">18</span>, <span class="dv">19</span>, <span class="dv">22</span>, <span class="dv">27</span>) <span class="sc">~</span> <span class="st">"Start"</span>,</span>
<span id="cb28-43"><a href="#cb28-43" aria-hidden="true" tabindex="-1"></a>      <span class="co"># Stop / pause / close / deactivate</span></span>
<span id="cb28-44"><a href="#cb28-44" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">%in%</span> <span class="fu">c</span>(<span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">10</span>, <span class="dv">13</span>, <span class="dv">16</span>, <span class="dv">17</span>, <span class="dv">20</span>, <span class="dv">23</span>, <span class="dv">24</span>, <span class="dv">25</span>, <span class="dv">26</span>) <span class="sc">~</span> <span class="st">"Stop"</span>,</span>
<span id="cb28-45"><a href="#cb28-45" aria-hidden="true" tabindex="-1"></a>      <span class="co"># System / configuration / meta data</span></span>
<span id="cb28-46"><a href="#cb28-46" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">%in%</span> <span class="fu">c</span>(<span class="dv">5</span>, <span class="dv">6</span>, <span class="dv">7</span>, <span class="dv">9</span>, <span class="dv">8</span>, <span class="dv">12</span>, <span class="dv">21</span>, <span class="dv">28</span>, <span class="dv">29</span>, <span class="dv">30</span>, <span class="dv">31</span>) <span class="sc">~</span> <span class="st">"Meta/Config"</span>,</span>
<span id="cb28-47"><a href="#cb28-47" aria-hidden="true" tabindex="-1"></a>      <span class="at">.default =</span> <span class="cn">NA_character_</span></span>
<span id="cb28-48"><a href="#cb28-48" aria-hidden="true" tabindex="-1"></a>    ),</span>
<span id="cb28-49"><a href="#cb28-49" aria-hidden="true" tabindex="-1"></a>    <span class="at">datetime =</span> <span class="fu">as_datetime</span>(seen_timestamp <span class="sc">/</span> <span class="dv">1000</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>)</span>
<span id="cb28-50"><a href="#cb28-50" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb28-51"><a href="#cb28-51" aria-hidden="true" tabindex="-1"></a>  <span class="co"># let's reorder our columns real quick</span></span>
<span id="cb28-52"><a href="#cb28-52" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(panelist_id, date, datetime, seen_timestamp, event_type, event_type_read, event, app_name, full_package_name, package_name) <span class="sc">%&gt;%</span> </span>
<span id="cb28-53"><a href="#cb28-53" aria-hidden="true" tabindex="-1"></a>  <span class="co"># remove background apps </span></span>
<span id="cb28-54"><a href="#cb28-54" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="sc">!</span>package_name <span class="sc">%in%</span> background_apps<span class="sc">$</span>pcn) <span class="sc">%&gt;%</span> </span>
<span id="cb28-55"><a href="#cb28-55" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by panelist_id</span></span>
<span id="cb28-56"><a href="#cb28-56" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id) <span class="sc">%&gt;%</span> </span>
<span id="cb28-57"><a href="#cb28-57" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sort rows in temporal order by participant</span></span>
<span id="cb28-58"><a href="#cb28-58" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(seen_timestamp, <span class="at">.by_group =</span> <span class="cn">TRUE</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Let’s have a quick look at our data.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">glimpse</span>(red_coded)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 907,647
Columns: 10
Groups: panelist_id [100]
$ panelist_id       &lt;chr&gt; "Participant_1", "Participant_1", "Participant_1", "…
$ date              &lt;date&gt; 2025-09-02, 2025-09-02, 2025-09-02, 2025-09-02, 202…
$ datetime          &lt;dttm&gt; 2025-09-02 08:34:10, 2025-09-02 09:03:05, 2025-09-0…
$ seen_timestamp    &lt;dbl&gt; 1756794850597, 1756796585973, 1756796585977, 1756796…
$ event_type        &lt;int&gt; 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, …
$ event_type_read   &lt;chr&gt; "Interruptive notification", "Interruptive notificat…
$ event             &lt;chr&gt; "Meta/Config", "Meta/Config", "Meta/Config", "Meta/C…
$ app_name          &lt;chr&gt; "Fotos", "GMX Mail", "GMX Mail", "AliExpress", "GMX …
$ full_package_name &lt;chr&gt; "com.google.android.apps.photos/null", "de.gmx.mobil…
$ package_name      &lt;chr&gt; "com.google.android.apps.photos", "de.gmx.mobile.and…</code></pre>
</div>
</div>
<p>Our initial goal is to achieve a data structure like the one in the blue data set. To do this, we will initially focus only on the start and stop of apps.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>red_start_stop <span class="ot">=</span> red_coded <span class="sc">%&gt;%</span></span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(app_name <span class="sc">!=</span> <span class="st">"Android-System"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we are not interested in the meta/config event_types, therefore we filter for start and stop</span></span>
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(event <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"Start"</span>, <span class="st">"Stop"</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we are using the same logic as before</span></span>
<span id="cb31-6"><a href="#cb31-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb31-7"><a href="#cb31-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_event =</span> <span class="fu">lead</span>(event, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb31-8"><a href="#cb31-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_timestamp =</span> <span class="fu">lead</span>(seen_timestamp, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb31-9"><a href="#cb31-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_app =</span> <span class="fu">lead</span>(app_name, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb31-10"><a href="#cb31-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_panelist =</span> <span class="fu">lead</span>(panelist_id, <span class="at">n =</span> <span class="dv">1</span>L)</span>
<span id="cb31-11"><a href="#cb31-11" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb31-12"><a href="#cb31-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(</span>
<span id="cb31-13"><a href="#cb31-13" aria-hidden="true" tabindex="-1"></a>    event <span class="sc">==</span> <span class="st">"Start"</span> <span class="sc">&amp;</span> next_event <span class="sc">==</span> <span class="st">"Stop"</span> <span class="sc">&amp;</span> panelist_id <span class="sc">==</span> next_panelist</span>
<span id="cb31-14"><a href="#cb31-14" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb31-15"><a href="#cb31-15" aria-hidden="true" tabindex="-1"></a>  <span class="co"># let's rename some variable for more convenience</span></span>
<span id="cb31-16"><a href="#cb31-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">rename</span>(</span>
<span id="cb31-17"><a href="#cb31-17" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_timestamp =</span> seen_timestamp,</span>
<span id="cb31-18"><a href="#cb31-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">stop_timestamp =</span> next_timestamp</span>
<span id="cb31-19"><a href="#cb31-19" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb31-20"><a href="#cb31-20" aria-hidden="true" tabindex="-1"></a>  <span class="co"># calculate duration in seconds</span></span>
<span id="cb31-21"><a href="#cb31-21" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb31-22"><a href="#cb31-22" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> (stop_timestamp <span class="sc">-</span> start_timestamp) <span class="sc">/</span> <span class="dv">1000</span>, <span class="co"># in seconds</span></span>
<span id="cb31-23"><a href="#cb31-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">event =</span> <span class="st">"App"</span></span>
<span id="cb31-24"><a href="#cb31-24" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb31-25"><a href="#cb31-25" aria-hidden="true" tabindex="-1"></a>  <span class="co"># select only relevant variables</span></span>
<span id="cb31-26"><a href="#cb31-26" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(</span>
<span id="cb31-27"><a href="#cb31-27" aria-hidden="true" tabindex="-1"></a>    panelist_id,</span>
<span id="cb31-28"><a href="#cb31-28" aria-hidden="true" tabindex="-1"></a>    date,</span>
<span id="cb31-29"><a href="#cb31-29" aria-hidden="true" tabindex="-1"></a>    datetime,</span>
<span id="cb31-30"><a href="#cb31-30" aria-hidden="true" tabindex="-1"></a>    start_timestamp,</span>
<span id="cb31-31"><a href="#cb31-31" aria-hidden="true" tabindex="-1"></a>    stop_timestamp,</span>
<span id="cb31-32"><a href="#cb31-32" aria-hidden="true" tabindex="-1"></a>    duration,</span>
<span id="cb31-33"><a href="#cb31-33" aria-hidden="true" tabindex="-1"></a>    app_name,</span>
<span id="cb31-34"><a href="#cb31-34" aria-hidden="true" tabindex="-1"></a>    event,</span>
<span id="cb31-35"><a href="#cb31-35" aria-hidden="true" tabindex="-1"></a>    package_name</span>
<span id="cb31-36"><a href="#cb31-36" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb31-37"><a href="#cb31-37" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">glimpse</span>(red_start_stop)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 227,471
Columns: 9
$ panelist_id     &lt;chr&gt; "Participant_1", "Participant_1", "Participant_1", "Pa…
$ date            &lt;date&gt; 2025-09-03, 2025-09-03, 2025-09-03, 2025-09-03, 2025-…
$ datetime        &lt;dttm&gt; 2025-09-03 12:03:43, 2025-09-03 12:03:43, 2025-09-03 …
$ start_timestamp &lt;dbl&gt; 1756893823221, 1756893823325, 1756893823409, 175689382…
$ stop_timestamp  &lt;dbl&gt; 1756893823312, 1756893823408, 1756893823841, 175689383…
$ duration        &lt;dbl&gt; 0.091, 0.083, 0.432, 1.798, 0.816, 0.277, 0.233, 65.92…
$ app_name        &lt;chr&gt; "WhatsApp", "WhatsApp", "WhatsApp", "Telefon", "Telefo…
$ event           &lt;chr&gt; "App", "App", "App", "App", "App", "App", "App", "App"…
$ package_name    &lt;chr&gt; "com.whatsapp", "com.whatsapp", "com.whatsapp", "com.s…</code></pre>
</div>
</div>
<p>The usage duration of apps are not the only thing, we could analysis with Android app logging data. Let’s say we are interested in push notifications. Therefore we want to include the information on notifications, shares and user interactions in our data set. We will use it later for some analysis.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>app_meta <span class="ot">=</span> red_coded <span class="sc">%&gt;%</span> </span>
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(</span>
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>    event_type <span class="sc">%in%</span> <span class="fu">c</span>(<span class="dv">7</span>, <span class="dv">9</span>, <span class="dv">10</span>, <span class="dv">12</span>)</span>
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> <span class="fu">as.POSIXct</span>(seen_timestamp <span class="sc">/</span> <span class="dv">1000</span>, <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">end_time =</span> <span class="fu">as.POSIXct</span>(seen_timestamp <span class="sc">/</span> <span class="dv">1000</span>, <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="dv">0</span>,</span>
<span id="cb34-9"><a href="#cb34-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">rows_merged =</span> <span class="cn">NA_character_</span>,</span>
<span id="cb34-10"><a href="#cb34-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">event =</span> <span class="fu">case_when</span>(</span>
<span id="cb34-11"><a href="#cb34-11" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">7</span> <span class="sc">~</span> <span class="st">"User interaction"</span>,</span>
<span id="cb34-12"><a href="#cb34-12" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">9</span> <span class="sc">~</span> <span class="st">"Share"</span>,</span>
<span id="cb34-13"><a href="#cb34-13" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">10</span> <span class="sc">~</span> <span class="st">"Notification seen"</span>,</span>
<span id="cb34-14"><a href="#cb34-14" aria-hidden="true" tabindex="-1"></a>      event_type <span class="sc">==</span> <span class="dv">12</span> <span class="sc">~</span> <span class="st">"Interruptive notification"</span>,</span>
<span id="cb34-15"><a href="#cb34-15" aria-hidden="true" tabindex="-1"></a>      <span class="at">.default =</span> <span class="cn">NA_character_</span>)</span>
<span id="cb34-16"><a href="#cb34-16" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb34-17"><a href="#cb34-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(</span>
<span id="cb34-18"><a href="#cb34-18" aria-hidden="true" tabindex="-1"></a>    panelist_id,</span>
<span id="cb34-19"><a href="#cb34-19" aria-hidden="true" tabindex="-1"></a>    date,</span>
<span id="cb34-20"><a href="#cb34-20" aria-hidden="true" tabindex="-1"></a>    datetime,</span>
<span id="cb34-21"><a href="#cb34-21" aria-hidden="true" tabindex="-1"></a>    start_time,</span>
<span id="cb34-22"><a href="#cb34-22" aria-hidden="true" tabindex="-1"></a>    end_time,</span>
<span id="cb34-23"><a href="#cb34-23" aria-hidden="true" tabindex="-1"></a>    duration,</span>
<span id="cb34-24"><a href="#cb34-24" aria-hidden="true" tabindex="-1"></a>    app_name,</span>
<span id="cb34-25"><a href="#cb34-25" aria-hidden="true" tabindex="-1"></a>    package_name,</span>
<span id="cb34-26"><a href="#cb34-26" aria-hidden="true" tabindex="-1"></a>    rows_merged,</span>
<span id="cb34-27"><a href="#cb34-27" aria-hidden="true" tabindex="-1"></a>    event</span>
<span id="cb34-28"><a href="#cb34-28" aria-hidden="true" tabindex="-1"></a>    )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Additionally, we are interested in the overall mobile screen time of our participants. The easiest way to obtain this information is to focus on the times when the screen turns on or off, as well as when the device starts up or shuts down. We prepare this data to add it to our final dataset later.</p>
<p><strong>Note:</strong> Depending on your preferences for data management, you can either keep separate data frames or include all data in one data frame with a filter variable to easily select the necessary rows. In this tutorial, we will use a single data frame with the filter variable “event.”</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb35"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>screen_start_stop <span class="ot">=</span> red_coded <span class="sc">%&gt;%</span> </span>
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span> </span>
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(app_name <span class="sc">==</span> <span class="st">"Android-System"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we are not interested in the meta/config event_types, therefore we filter for start and stop </span></span>
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(event <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"Start"</span>, <span class="st">"Stop"</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by participant_code and app_name</span></span>
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id, app_name) <span class="sc">%&gt;%</span> </span>
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sort rows in temporal order by participant</span></span>
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(seen_timestamp, <span class="at">.by_group =</span> <span class="cn">TRUE</span>) <span class="sc">%&gt;%</span></span>
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we are using the same logic as before</span></span>
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_event =</span> <span class="fu">lead</span>(event, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb35-13"><a href="#cb35-13" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_timestamp =</span> <span class="fu">lead</span>(seen_timestamp, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb35-14"><a href="#cb35-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_app =</span> <span class="fu">lead</span>(app_name, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb35-15"><a href="#cb35-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_panelist =</span> <span class="fu">lead</span>(panelist_id, <span class="at">n =</span> <span class="dv">1</span>L)</span>
<span id="cb35-16"><a href="#cb35-16" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb35-17"><a href="#cb35-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(</span>
<span id="cb35-18"><a href="#cb35-18" aria-hidden="true" tabindex="-1"></a>    event <span class="sc">==</span> <span class="st">"Start"</span> <span class="sc">&amp;</span> next_event <span class="sc">==</span> <span class="st">"Stop"</span> <span class="sc">&amp;</span> panelist_id <span class="sc">==</span> next_panelist</span>
<span id="cb35-19"><a href="#cb35-19" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span> </span>
<span id="cb35-20"><a href="#cb35-20" aria-hidden="true" tabindex="-1"></a>  <span class="co"># let's rename some variable for more convenience</span></span>
<span id="cb35-21"><a href="#cb35-21" aria-hidden="true" tabindex="-1"></a>  <span class="fu">rename</span>(</span>
<span id="cb35-22"><a href="#cb35-22" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_timestamp =</span> seen_timestamp,</span>
<span id="cb35-23"><a href="#cb35-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">stop_timestamp =</span> next_timestamp</span>
<span id="cb35-24"><a href="#cb35-24" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb35-25"><a href="#cb35-25" aria-hidden="true" tabindex="-1"></a>  <span class="co"># calculate duration in seconds</span></span>
<span id="cb35-26"><a href="#cb35-26" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb35-27"><a href="#cb35-27" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> (<span class="fu">as.numeric</span>(stop_timestamp) <span class="sc">-</span> <span class="fu">as.numeric</span>(start_timestamp)) <span class="sc">/</span> <span class="dv">1000</span>, <span class="co"># in seconds</span></span>
<span id="cb35-28"><a href="#cb35-28" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> <span class="fu">as.POSIXct</span>(start_timestamp <span class="sc">/</span> <span class="dv">1000</span>, <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb35-29"><a href="#cb35-29" aria-hidden="true" tabindex="-1"></a>    <span class="at">end_time =</span> <span class="fu">as.POSIXct</span>(stop_timestamp <span class="sc">/</span> <span class="dv">1000</span>, <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb35-30"><a href="#cb35-30" aria-hidden="true" tabindex="-1"></a>    <span class="at">rows_merged =</span> <span class="cn">NA_character_</span>,</span>
<span id="cb35-31"><a href="#cb35-31" aria-hidden="true" tabindex="-1"></a>    <span class="co"># our filter variable</span></span>
<span id="cb35-32"><a href="#cb35-32" aria-hidden="true" tabindex="-1"></a>    <span class="at">event =</span> <span class="st">"Screen"</span></span>
<span id="cb35-33"><a href="#cb35-33" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb35-34"><a href="#cb35-34" aria-hidden="true" tabindex="-1"></a>  <span class="co"># select only relevant variables</span></span>
<span id="cb35-35"><a href="#cb35-35" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(</span>
<span id="cb35-36"><a href="#cb35-36" aria-hidden="true" tabindex="-1"></a>    panelist_id,</span>
<span id="cb35-37"><a href="#cb35-37" aria-hidden="true" tabindex="-1"></a>    date,</span>
<span id="cb35-38"><a href="#cb35-38" aria-hidden="true" tabindex="-1"></a>    datetime,</span>
<span id="cb35-39"><a href="#cb35-39" aria-hidden="true" tabindex="-1"></a>    start_time,</span>
<span id="cb35-40"><a href="#cb35-40" aria-hidden="true" tabindex="-1"></a>    end_time,</span>
<span id="cb35-41"><a href="#cb35-41" aria-hidden="true" tabindex="-1"></a>    duration,</span>
<span id="cb35-42"><a href="#cb35-42" aria-hidden="true" tabindex="-1"></a>    app_name,</span>
<span id="cb35-43"><a href="#cb35-43" aria-hidden="true" tabindex="-1"></a>    package_name,</span>
<span id="cb35-44"><a href="#cb35-44" aria-hidden="true" tabindex="-1"></a>    rows_merged,</span>
<span id="cb35-45"><a href="#cb35-45" aria-hidden="true" tabindex="-1"></a>    event</span>
<span id="cb35-46"><a href="#cb35-46" aria-hidden="true" tabindex="-1"></a>  )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>We have now reached an interim stage by recoding information about the start and stop of the respective apps for our own use. Now we perform the same preprocessing steps as for the blue data set.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>red_data_clean <span class="ot">=</span> red_start_stop <span class="sc">%&gt;%</span></span>
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>  <span class="co"># convert start_time and end_time to proper datetime format, make sure to choose the correct timezone (tz)</span></span>
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> <span class="fu">as.POSIXct</span>(start_timestamp <span class="sc">/</span> <span class="dv">1000</span>, <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>),</span>
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a>    <span class="at">end_time   =</span> <span class="fu">as.POSIXct</span>(stop_timestamp <span class="sc">/</span> <span class="dv">1000</span>,   <span class="at">format =</span> <span class="st">"%Y-%m-%d %H:%M:%S"</span>, <span class="at">tz =</span> <span class="st">"Europe/Berlin"</span>)</span>
<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a>  <span class="co"># we need to sort our rows in the correct temporal order</span></span>
<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a>  <span class="co"># make sure that you group by participant to avoid sorting across all participants</span></span>
<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id) <span class="sc">%&gt;%</span></span>
<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a>  <span class="co"># make sure that you take the grouping into account, when you sort your data</span></span>
<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(start_time, <span class="at">.by_group =</span> <span class="cn">TRUE</span>) <span class="sc">%&gt;%</span></span>
<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a>  <span class="co"># it is important that you use lag() or lead() based on your sorting (descending or ascending)</span></span>
<span id="cb36-13"><a href="#cb36-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb36-14"><a href="#cb36-14" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we use the 'next_panelist_id' variable to ensure that participants are not mixed up by accident.</span></span>
<span id="cb36-15"><a href="#cb36-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_panelist_id =</span> <span class="fu">lead</span>(panelist_id, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb36-16"><a href="#cb36-16" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we use the 'next_app_name' to identify potential consecutive app visits</span></span>
<span id="cb36-17"><a href="#cb36-17" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_app_name =</span> <span class="fu">lead</span>(app_name, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb36-18"><a href="#cb36-18" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we need the 'next_start_time' to calculate the time_gap between events</span></span>
<span id="cb36-19"><a href="#cb36-19" aria-hidden="true" tabindex="-1"></a>    <span class="at">next_start_time =</span> <span class="fu">lead</span>(start_time, <span class="at">n =</span> <span class="dv">1</span>L),</span>
<span id="cb36-20"><a href="#cb36-20" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we calculate the time_gap in seconds</span></span>
<span id="cb36-21"><a href="#cb36-21" aria-hidden="true" tabindex="-1"></a>    <span class="at">time_gap =</span> <span class="fu">as.numeric</span>(next_start_time <span class="sc">-</span> end_time, <span class="at">units =</span> <span class="st">"secs"</span>),</span>
<span id="cb36-22"><a href="#cb36-22" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we use 'previous_start_time' to replace 'start_time' with the new correct timestamp if we merge consecutive app visits</span></span>
<span id="cb36-23"><a href="#cb36-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">previous_start_time =</span> <span class="fu">lag</span>(start_time, <span class="at">n =</span> <span class="dv">1</span>L)</span>
<span id="cb36-24"><a href="#cb36-24" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb36-25"><a href="#cb36-25" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb36-26"><a href="#cb36-26" aria-hidden="true" tabindex="-1"></a>    <span class="co"># we use a filter variable to identify consecutive app visits, which we can remove later</span></span>
<span id="cb36-27"><a href="#cb36-27" aria-hidden="true" tabindex="-1"></a>    <span class="co"># conditions are:</span></span>
<span id="cb36-28"><a href="#cb36-28" aria-hidden="true" tabindex="-1"></a>    <span class="co"># - panelist_id and next_panelist_id are identical</span></span>
<span id="cb36-29"><a href="#cb36-29" aria-hidden="true" tabindex="-1"></a>    <span class="co"># - app_name and next_app_name are identical</span></span>
<span id="cb36-30"><a href="#cb36-30" aria-hidden="true" tabindex="-1"></a>    <span class="co"># - time_gap smaller than 1 sec</span></span>
<span id="cb36-31"><a href="#cb36-31" aria-hidden="true" tabindex="-1"></a>    <span class="at">filter_var =</span> <span class="fu">if_else</span>((panelist_id <span class="sc">==</span> next_panelist_id <span class="sc">&amp;</span> app_name <span class="sc">==</span> next_app_name <span class="sc">&amp;</span> time_gap <span class="sc">&lt;=</span> <span class="dv">1</span>), <span class="st">"remove"</span>, <span class="st">"keep"</span>),</span>
<span id="cb36-32"><a href="#cb36-32" aria-hidden="true" tabindex="-1"></a>    <span class="co"># I like to keep an column indicating that I changed something, therefore I create 'rows_merged'</span></span>
<span id="cb36-33"><a href="#cb36-33" aria-hidden="true" tabindex="-1"></a>    <span class="at">rows_merged =</span> <span class="fu">if_else</span>((<span class="fu">lag</span>(filter_var)) <span class="sc">==</span> <span class="st">"remove"</span>, <span class="st">"Yes"</span>, <span class="st">"No"</span>),</span>
<span id="cb36-34"><a href="#cb36-34" aria-hidden="true" tabindex="-1"></a>    <span class="co"># replace 'start_time' with 'previous_start_time' for relevant rows</span></span>
<span id="cb36-35"><a href="#cb36-35" aria-hidden="true" tabindex="-1"></a>    <span class="at">start_time =</span> <span class="fu">if_else</span>(filter_var <span class="sc">==</span> <span class="st">"keep"</span> <span class="sc">&amp;</span> rows_merged <span class="sc">==</span> <span class="st">"Yes"</span>, <span class="fu">as_datetime</span>(previous_start_time), <span class="fu">as_datetime</span>(start_time)),</span>
<span id="cb36-36"><a href="#cb36-36" aria-hidden="true" tabindex="-1"></a>    <span class="co"># replace 'duration' with the updated duration for relevant rows</span></span>
<span id="cb36-37"><a href="#cb36-37" aria-hidden="true" tabindex="-1"></a>    <span class="at">duration =</span> <span class="fu">if_else</span>(filter_var <span class="sc">==</span> <span class="st">"keep"</span> <span class="sc">&amp;</span> rows_merged <span class="sc">==</span> <span class="st">"Yes"</span>, <span class="fu">as.numeric</span>(end_time <span class="sc">-</span> start_time, <span class="at">units =</span> <span class="st">"secs"</span>), duration),</span>
<span id="cb36-38"><a href="#cb36-38" aria-hidden="true" tabindex="-1"></a>    <span class="at">datetime =</span> <span class="fu">if_else</span>(filter_var <span class="sc">==</span> <span class="st">"keep"</span> <span class="sc">&amp;</span> rows_merged <span class="sc">==</span> <span class="st">"Yes"</span>, <span class="fu">as_datetime</span>(previous_start_time), <span class="fu">as_datetime</span>(datetime)),</span>
<span id="cb36-39"><a href="#cb36-39" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb36-40"><a href="#cb36-40" aria-hidden="true" tabindex="-1"></a>  <span class="co"># remove consecutive visits</span></span>
<span id="cb36-41"><a href="#cb36-41" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(filter_var <span class="sc">==</span> <span class="st">"keep"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb36-42"><a href="#cb36-42" aria-hidden="true" tabindex="-1"></a>  <span class="co"># I suggest replacing sensitive apps in your data set to strengthen anonymisation</span></span>
<span id="cb36-43"><a href="#cb36-43" aria-hidden="true" tabindex="-1"></a>  <span class="co"># replace 'app_names' based on our blacklisted apps</span></span>
<span id="cb36-44"><a href="#cb36-44" aria-hidden="true" tabindex="-1"></a>  <span class="fu">left_join</span>(blacklisted_apps, <span class="at">by =</span> <span class="st">"app_name"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb36-45"><a href="#cb36-45" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb36-46"><a href="#cb36-46" aria-hidden="true" tabindex="-1"></a>    <span class="at">app_name =</span> <span class="fu">if_else</span>(<span class="sc">!</span><span class="fu">is.na</span>(blacklisted_app), blacklisted_app, app_name),</span>
<span id="cb36-47"><a href="#cb36-47" aria-hidden="true" tabindex="-1"></a>    <span class="at">package_name =</span> <span class="fu">if_else</span>(<span class="sc">!</span><span class="fu">is.na</span>(blacklisted_app), <span class="st">"blacklisted_package"</span>, package_name)</span>
<span id="cb36-48"><a href="#cb36-48" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb36-49"><a href="#cb36-49" aria-hidden="true" tabindex="-1"></a>  <span class="co"># add meta rows</span></span>
<span id="cb36-50"><a href="#cb36-50" aria-hidden="true" tabindex="-1"></a>  <span class="fu">bind_rows</span>(app_meta) <span class="sc">%&gt;%</span></span>
<span id="cb36-51"><a href="#cb36-51" aria-hidden="true" tabindex="-1"></a>  <span class="co"># add screen rows</span></span>
<span id="cb36-52"><a href="#cb36-52" aria-hidden="true" tabindex="-1"></a>  <span class="fu">bind_rows</span>(screen_start_stop) <span class="sc">%&gt;%</span> </span>
<span id="cb36-53"><a href="#cb36-53" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(panelist_id) <span class="sc">%&gt;%</span> </span>
<span id="cb36-54"><a href="#cb36-54" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(start_time, <span class="at">.by_group =</span> <span class="cn">TRUE</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb36-55"><a href="#cb36-55" aria-hidden="true" tabindex="-1"></a>  <span class="fu">ungroup</span>() <span class="sc">%&gt;%</span> </span>
<span id="cb36-56"><a href="#cb36-56" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb36-57"><a href="#cb36-57" aria-hidden="true" tabindex="-1"></a>    <span class="at">app_name =</span> app_name <span class="sc">%&gt;%</span></span>
<span id="cb36-58"><a href="#cb36-58" aria-hidden="true" tabindex="-1"></a>      <span class="fu">str_to_lower</span>(<span class="at">locale =</span> <span class="st">"C"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb36-59"><a href="#cb36-59" aria-hidden="true" tabindex="-1"></a>      <span class="fu">str_replace_all</span>(<span class="st">"[^</span><span class="sc">\\</span><span class="st">x00-</span><span class="sc">\\</span><span class="st">x7F]"</span>, <span class="st">" "</span>) <span class="sc">%&gt;%</span>  <span class="co"># Entfernt Nicht-ASCII</span></span>
<span id="cb36-60"><a href="#cb36-60" aria-hidden="true" tabindex="-1"></a>      <span class="fu">str_replace_all</span>(<span class="st">"[^a-z0-9</span><span class="sc">\\</span><span class="st">s]"</span>, <span class="st">" "</span>) <span class="sc">%&gt;%</span>   <span class="co"># Entfernt spezielle Zeichen (außer a-z, 0-9, Leerzeichen)</span></span>
<span id="cb36-61"><a href="#cb36-61" aria-hidden="true" tabindex="-1"></a>      <span class="fu">str_squish</span>()</span>
<span id="cb36-62"><a href="#cb36-62" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb36-63"><a href="#cb36-63" aria-hidden="true" tabindex="-1"></a>  <span class="fu">select</span>(</span>
<span id="cb36-64"><a href="#cb36-64" aria-hidden="true" tabindex="-1"></a>    panelist_id,</span>
<span id="cb36-65"><a href="#cb36-65" aria-hidden="true" tabindex="-1"></a>    date,</span>
<span id="cb36-66"><a href="#cb36-66" aria-hidden="true" tabindex="-1"></a>    datetime,</span>
<span id="cb36-67"><a href="#cb36-67" aria-hidden="true" tabindex="-1"></a>    start_time,</span>
<span id="cb36-68"><a href="#cb36-68" aria-hidden="true" tabindex="-1"></a>    end_time,</span>
<span id="cb36-69"><a href="#cb36-69" aria-hidden="true" tabindex="-1"></a>    duration,</span>
<span id="cb36-70"><a href="#cb36-70" aria-hidden="true" tabindex="-1"></a>    app_name,</span>
<span id="cb36-71"><a href="#cb36-71" aria-hidden="true" tabindex="-1"></a>    package_name,</span>
<span id="cb36-72"><a href="#cb36-72" aria-hidden="true" tabindex="-1"></a>    event,</span>
<span id="cb36-73"><a href="#cb36-73" aria-hidden="true" tabindex="-1"></a>    rows_merged</span>
<span id="cb36-74"><a href="#cb36-74" aria-hidden="true" tabindex="-1"></a>    )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Now we have achieved a nice, tidy data set.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb37"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="fu">glimpse</span>(red_data_clean)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 348,700
Columns: 10
$ panelist_id  &lt;chr&gt; "Participant_1", "Participant_1", "Participant_1", "Parti…
$ date         &lt;date&gt; 2025-09-02, 2025-09-02, 2025-09-02, 2025-09-02, 2025-09-…
$ datetime     &lt;dttm&gt; 2025-09-02 08:34:10, 2025-09-02 09:03:05, 2025-09-02 09:…
$ start_time   &lt;dttm&gt; 2025-09-02 08:34:10, 2025-09-02 09:03:05, 2025-09-02 09:…
$ end_time     &lt;dttm&gt; 2025-09-02 08:34:10, 2025-09-02 09:03:05, 2025-09-02 09:…
$ duration     &lt;dbl&gt; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ app_name     &lt;chr&gt; "fotos", "gmx mail", "gmx mail", "aliexpress", "gmx mail"…
$ package_name &lt;chr&gt; "com.google.android.apps.photos", "de.gmx.mobile.android.…
$ event        &lt;chr&gt; "Interruptive notification", "Interruptive notification",…
$ rows_merged  &lt;chr&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</code></pre>
</div>
</div>
<section id="calculating-visits-1" class="level2" data-number="6.1">
<h2 data-number="6.1" class="anchored" data-anchor-id="calculating-visits-1"><span class="header-section-number">6.1</span> Calculating Visits</h2>
<p>Then we’ll use this data set to determine the most frequently used apps.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb39"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>most_visited_apps <span class="ot">=</span> red_data_clean <span class="sc">%&gt;%</span></span>
<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(<span class="sc">!</span>app_name <span class="sc">==</span> <span class="st">"Android-System"</span>) <span class="sc">%&gt;%</span></span>
<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># group by application</span></span>
<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(app_name) <span class="sc">%&gt;%</span></span>
<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a>  <span class="co"># summarise the total number of visits for each application</span></span>
<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">visit =</span> <span class="fu">n</span>()</span>
<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a>  <span class="co"># sort in descending order</span></span>
<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(visit)) <span class="sc">%&gt;%</span></span>
<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a>  <span class="co"># select the top 10 rows</span></span>
<span id="cb39-12"><a href="#cb39-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">head</span>(<span class="at">n =</span> <span class="dv">10</span>)</span>
<span id="cb39-13"><a href="#cb39-13" aria-hidden="true" tabindex="-1"></a>most_visited_apps</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 10 × 2
   app_name       visit
   &lt;chr&gt;          &lt;int&gt;
 1 whatsapp       62036
 2 android system 36192
 3 gmail          17685
 4 chrome          9860
 5 web de mail     9270
 6 facebook        9255
 7 instagram       9008
 8 snapchat        5975
 9 blacklisted 43  5885
10 gmx mail        5577</code></pre>
</div>
</div>
<p>A table is nice but a plot is better. Let’s visualize our findings using ggplot.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb41"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>red_n_participants <span class="ot">=</span> red_data_clean <span class="sc">%&gt;%</span></span>
<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a>    <span class="at">n_panelist =</span> <span class="fu">n_distinct</span>(panelist_id)</span>
<span id="cb41-4"><a href="#cb41-4" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb41-5"><a href="#cb41-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">pull</span>(n_panelist)</span>
<span id="cb41-6"><a href="#cb41-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-7"><a href="#cb41-7" aria-hidden="true" tabindex="-1"></a>plot7 <span class="ot">=</span> <span class="fu">ggplot</span>(most_visited_apps, <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">reorder</span>(app_name, visit), <span class="at">y =</span> visit)) <span class="sc">+</span></span>
<span id="cb41-8"><a href="#cb41-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_col</span>(<span class="at">width =</span> <span class="fl">0.6</span>, <span class="at">fill =</span> <span class="st">"#BF616A"</span>) <span class="sc">+</span></span>
<span id="cb41-9"><a href="#cb41-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_flip</span>() <span class="sc">+</span></span>
<span id="cb41-10"><a href="#cb41-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_text</span>(<span class="fu">aes</span>(<span class="at">label =</span> visit), <span class="at">hjust =</span> <span class="sc">-</span><span class="fl">0.2</span>, <span class="at">size =</span> <span class="dv">3</span>, <span class="at">family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb41-11"><a href="#cb41-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_continuous</span>(<span class="at">expand =</span> <span class="fu">expansion</span>(<span class="at">mult =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.15</span>))) <span class="sc">+</span></span>
<span id="cb41-12"><a href="#cb41-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb41-13"><a href="#cb41-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb41-14"><a href="#cb41-14" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb41-15"><a href="#cb41-15" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>, <span class="at">size =</span> <span class="dv">14</span>, <span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb41-16"><a href="#cb41-16" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.y =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb41-17"><a href="#cb41-17" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>)</span>
<span id="cb41-18"><a href="#cb41-18" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb41-19"><a href="#cb41-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb41-20"><a href="#cb41-20" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Top 10 most used apps by number of visits"</span>,</span>
<span id="cb41-21"><a href="#cb41-21" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, red_n_participants, <span class="st">")"</span>),</span>
<span id="cb41-22"><a href="#cb41-22" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"App Name"</span>,</span>
<span id="cb41-23"><a href="#cb41-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Visits"</span></span>
<span id="cb41-24"><a href="#cb41-24" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb41-25"><a href="#cb41-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb41-26"><a href="#cb41-26" aria-hidden="true" tabindex="-1"></a>plot7</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-31-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
</section>
<section id="calculating-the-number-of-shares-per-app" class="level2" data-number="6.2">
<h2 data-number="6.2" class="anchored" data-anchor-id="calculating-the-number-of-shares-per-app"><span class="header-section-number">6.2</span> Calculating the number of shares per app</h2>
<p>We have already created several plots. But now I would like to briefly draw your attention to the advantages of the unprocessed red data set. Here we have some more information than in the blue one, for example about shares and notifications.</p>
<p>First, let’s look at shares. By shares, we mean sharing content from one app to another app (e.g.&nbsp;You send a friend a recipe from your browser via WhatsApp). This opens the Android Share window, and we count the number of times it is opened here. Let’s take a quick look at that.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb42"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>shares <span class="ot">=</span> red_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(event <span class="sc">==</span> <span class="st">"Share"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(app_name) <span class="sc">%&gt;%</span> </span>
<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a>    <span class="at">n_shares =</span> <span class="fu">n</span>()</span>
<span id="cb42-6"><a href="#cb42-6" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb42-7"><a href="#cb42-7" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(n_shares)) <span class="sc">%&gt;%</span></span>
<span id="cb42-8"><a href="#cb42-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">head</span>(<span class="at">n =</span> <span class="dv">5</span>) <span class="sc">%&gt;%</span></span>
<span id="cb42-9"><a href="#cb42-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">mutate</span>(</span>
<span id="cb42-10"><a href="#cb42-10" aria-hidden="true" tabindex="-1"></a>    <span class="at">app_name =</span> <span class="fu">str_replace</span>(app_name, <span class="st">"Fotos�&amp; Videos"</span>, <span class="st">"Fotos &amp; Videos"</span>)</span>
<span id="cb42-11"><a href="#cb42-11" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb42-12"><a href="#cb42-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb42-13"><a href="#cb42-13" aria-hidden="true" tabindex="-1"></a>plot8 <span class="ot">=</span> <span class="fu">ggplot</span>(shares, <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">reorder</span>(app_name, n_shares), <span class="at">y =</span> n_shares)) <span class="sc">+</span></span>
<span id="cb42-14"><a href="#cb42-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_col</span>(<span class="at">width =</span> <span class="fl">0.6</span>, <span class="at">fill =</span> <span class="st">"#BF616A"</span>) <span class="sc">+</span></span>
<span id="cb42-15"><a href="#cb42-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_flip</span>() <span class="sc">+</span></span>
<span id="cb42-16"><a href="#cb42-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_text</span>(<span class="fu">aes</span>(<span class="at">label =</span> n_shares), <span class="at">hjust =</span> <span class="sc">-</span><span class="fl">0.2</span>, <span class="at">size =</span> <span class="dv">3</span>, <span class="at">family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb42-17"><a href="#cb42-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_continuous</span>(<span class="at">expand =</span> <span class="fu">expansion</span>(<span class="at">mult =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.15</span>))) <span class="sc">+</span></span>
<span id="cb42-18"><a href="#cb42-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb42-19"><a href="#cb42-19" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb42-20"><a href="#cb42-20" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb42-21"><a href="#cb42-21" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>, <span class="at">size =</span> <span class="dv">14</span>, <span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb42-22"><a href="#cb42-22" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.y =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb42-23"><a href="#cb42-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>)</span>
<span id="cb42-24"><a href="#cb42-24" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb42-25"><a href="#cb42-25" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb42-26"><a href="#cb42-26" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Top 5 apps with most shares"</span>,</span>
<span id="cb42-27"><a href="#cb42-27" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, red_n_participants, <span class="st">")"</span>),</span>
<span id="cb42-28"><a href="#cb42-28" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"App Name"</span>,</span>
<span id="cb42-29"><a href="#cb42-29" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Shares"</span></span>
<span id="cb42-30"><a href="#cb42-30" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb42-31"><a href="#cb42-31" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb42-32"><a href="#cb42-32" aria-hidden="true" tabindex="-1"></a>plot8</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-32-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
</section>
<section id="calculating-the-number-of-notifications-per-app" class="level2" data-number="6.3">
<h2 data-number="6.3" class="anchored" data-anchor-id="calculating-the-number-of-notifications-per-app"><span class="header-section-number">6.3</span> Calculating the number of notifications per app</h2>
<p>And then we’ll do the same thing again for notifications.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb43"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>notifications <span class="ot">=</span> red_data_clean <span class="sc">%&gt;%</span> </span>
<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(app_name <span class="sc">!=</span> <span class="st">"android system"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">filter</span>(event <span class="sc">==</span> <span class="st">"Interruptive notification"</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(app_name) <span class="sc">%&gt;%</span> </span>
<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarise</span>(</span>
<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a>    <span class="at">n_notifications =</span> <span class="fu">n</span>()</span>
<span id="cb43-7"><a href="#cb43-7" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">%&gt;%</span></span>
<span id="cb43-8"><a href="#cb43-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">arrange</span>(<span class="fu">desc</span>(n_notifications)) <span class="sc">%&gt;%</span></span>
<span id="cb43-9"><a href="#cb43-9" aria-hidden="true" tabindex="-1"></a>  <span class="fu">head</span>(<span class="at">n =</span> <span class="dv">5</span>)</span>
<span id="cb43-10"><a href="#cb43-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb43-11"><a href="#cb43-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb43-12"><a href="#cb43-12" aria-hidden="true" tabindex="-1"></a>plot9 <span class="ot">=</span> <span class="fu">ggplot</span>(notifications, <span class="fu">aes</span>(<span class="at">x =</span> <span class="fu">reorder</span>(app_name, n_notifications), <span class="at">y =</span> n_notifications)) <span class="sc">+</span></span>
<span id="cb43-13"><a href="#cb43-13" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_col</span>(<span class="at">width =</span> <span class="fl">0.6</span>, <span class="at">fill =</span> <span class="st">"#BF616A"</span>) <span class="sc">+</span></span>
<span id="cb43-14"><a href="#cb43-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">coord_flip</span>() <span class="sc">+</span></span>
<span id="cb43-15"><a href="#cb43-15" aria-hidden="true" tabindex="-1"></a>  <span class="fu">geom_text</span>(<span class="fu">aes</span>(<span class="at">label =</span> n_notifications), <span class="at">hjust =</span> <span class="sc">-</span><span class="fl">0.2</span>, <span class="at">size =</span> <span class="dv">3</span>, <span class="at">family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb43-16"><a href="#cb43-16" aria-hidden="true" tabindex="-1"></a>  <span class="fu">scale_y_continuous</span>(<span class="at">expand =</span> <span class="fu">expansion</span>(<span class="at">mult =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.15</span>))) <span class="sc">+</span></span>
<span id="cb43-17"><a href="#cb43-17" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme_minimal</span>(<span class="at">base_size =</span> <span class="dv">13</span>, <span class="at">base_family =</span> <span class="st">"serif"</span>) <span class="sc">+</span></span>
<span id="cb43-18"><a href="#cb43-18" aria-hidden="true" tabindex="-1"></a>  <span class="fu">theme</span>(</span>
<span id="cb43-19"><a href="#cb43-19" aria-hidden="true" tabindex="-1"></a>    <span class="at">legend.position =</span> <span class="st">"none"</span>,</span>
<span id="cb43-20"><a href="#cb43-20" aria-hidden="true" tabindex="-1"></a>    <span class="at">plot.title =</span> <span class="fu">element_text</span>(<span class="at">face =</span> <span class="st">"bold"</span>, <span class="at">size =</span> <span class="dv">14</span>, <span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb43-21"><a href="#cb43-21" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.y =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>),</span>
<span id="cb43-22"><a href="#cb43-22" aria-hidden="true" tabindex="-1"></a>    <span class="at">axis.text.x =</span> <span class="fu">element_text</span>(<span class="at">family =</span> <span class="st">"serif"</span>)</span>
<span id="cb43-23"><a href="#cb43-23" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb43-24"><a href="#cb43-24" aria-hidden="true" tabindex="-1"></a>  <span class="fu">labs</span>(</span>
<span id="cb43-25"><a href="#cb43-25" aria-hidden="true" tabindex="-1"></a>    <span class="at">title =</span> <span class="st">"Top 5 apps with most notifications"</span>,</span>
<span id="cb43-26"><a href="#cb43-26" aria-hidden="true" tabindex="-1"></a>    <span class="at">subtitle =</span> <span class="fu">paste0</span>(<span class="st">"Data based on a Sample of German Internet Users (N = "</span>, red_n_participants, <span class="st">")"</span>),</span>
<span id="cb43-27"><a href="#cb43-27" aria-hidden="true" tabindex="-1"></a>    <span class="at">x =</span> <span class="st">"App Name"</span>,</span>
<span id="cb43-28"><a href="#cb43-28" aria-hidden="true" tabindex="-1"></a>    <span class="at">y =</span> <span class="st">"Notifications"</span></span>
<span id="cb43-29"><a href="#cb43-29" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb43-30"><a href="#cb43-30" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb43-31"><a href="#cb43-31" aria-hidden="true" tabindex="-1"></a>plot9</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div>
<figure class="figure">
<p><img src="readme_files/figure-html/unnamed-chunk-33-1.png" class="img-fluid figure-img" width="672"></p>
</figure>
</div>
</div>
</div>
<p>It’s not surprising that WhatsApp sends the highest number of notifications.</p>
<p>Interestingly, the camera displays a relatively large number of notifications. This may seem counterintuitive at first, but it is often because the camera sends notifications when, for example, Google Lens is being used or a video is being recorded (e.g., “recording in progress” notification).</p>
<p>We have now looked at several core aspects of Android app log data. Depending on the research question you can extract and use more exciting information from the data. For this tutorial, however, this is sufficient and it brings us to the end.</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="images/image_4_person_underground_v2.png" class="img-fluid figure-img"></p>
<figcaption>Image generated by Midjourney</figcaption>
</figure>
</div>
</section>
</section>
<section id="conclusion" class="level1" data-number="7">
<h1 data-number="7"><span class="header-section-number">7</span> Conclusion</h1>
<p>If you’ve made it this far, I hope we’ve been able to give you a good introduction to the processing and analysis of Android log data. Among other things, we have covered data processing, which includes removing duplicates and consecutive apps, removing background processes, and using blacklists and Android event types. You have also gained an overview of the most common analysis dimensions for app tracking data, which include app, person, and time levels. Good luck!</p>
</section>
<section id="references" class="level1 unnumbered" data-number="8">


</section>

<div id="quarto-appendix" class="default"><section class="quarto-appendix-contents" role="doc-bibliography" id="quarto-bibliography"><h2 class="anchored quarto-appendix-heading">8 References</h2><div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" data-line-spacing="2" role="list">
<div id="ref-Klingelhoefer2025" class="csl-entry" role="listitem">
Klingelhoefer, J., Gilbert, A., Adrian, C., &amp; Meier, A. (2025). Possible futures all at once: Time frame and time lag in short-term longitudinal media effects research on well-being. <em>Journal of Communication</em>, <em>jqaf037</em>.
</div>
<div id="ref-Parry2025" class="csl-entry" role="listitem">
Parry, D., &amp; Toth, R. (2025). Extracting meaningful measures of smartphone usage from android event log data: A methodological primer. <em>Computational Communication Research</em>, <em>7</em>(1), 1.
</div>
<div id="ref-Peng2020" class="csl-entry" role="listitem">
Peng, T.-Q., &amp; Zhu, J. (2020). Mobile phone use as sequential processes: From discrete behaviors to sessions of behaviors and trajectories of sessions. <em>Journal of Computer-Mediated Communication</em>, <em>25</em>(2), 129–146.
</div>
<div id="ref-Toth2025" class="csl-entry" role="listitem">
Toth, R., Parry, D., &amp; Emmer, M. (2025). From screen time to daily rhythms: A mixed methods study of smartphone use among german adults. <em>Journal of Quantitative Description: Digital Media</em>, <em>5</em>(1), 1–63.
</div>
<div id="ref-Zerrer2024" class="csl-entry" role="listitem">
Zerrer, P. (2024). <em>Political action and news use of the fridays for future movement in germany</em> [PhD thesis, University of Bremen]. <a href="https://doi.org/10.26092/elib/3604">https://doi.org/10.26092/elib/3604</a>
</div>
</div></section></div></main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
  window.document.addEventListener("DOMContentLoaded", function (event) {
    const icon = "";
    const anchorJS = new window.AnchorJS();
    anchorJS.options = {
      placement: 'right',
      icon: icon
    };
    anchorJS.add('.anchored');
    const isCodeAnnotation = (el) => {
      for (const clz of el.classList) {
        if (clz.startsWith('code-annotation-')) {                     
          return true;
        }
      }
      return false;
    }
    const onCopySuccess = function(e) {
      // button target
      const button = e.trigger;
      // don't keep focus
      button.blur();
      // flash "checked"
      button.classList.add('code-copy-button-checked');
      var currentTitle = button.getAttribute("title");
      button.setAttribute("title", "Copied!");
      let tooltip;
      if (window.bootstrap) {
        button.setAttribute("data-bs-toggle", "tooltip");
        button.setAttribute("data-bs-placement", "left");
        button.setAttribute("data-bs-title", "Copied!");
        tooltip = new bootstrap.Tooltip(button, 
          { trigger: "manual", 
            customClass: "code-copy-button-tooltip",
            offset: [0, -8]});
        tooltip.show();    
      }
      setTimeout(function() {
        if (tooltip) {
          tooltip.hide();
          button.removeAttribute("data-bs-title");
          button.removeAttribute("data-bs-toggle");
          button.removeAttribute("data-bs-placement");
        }
        button.setAttribute("title", currentTitle);
        button.classList.remove('code-copy-button-checked');
      }, 1000);
      // clear code selection
      e.clearSelection();
    }
    const getTextToCopy = function(trigger) {
        const codeEl = trigger.previousElementSibling.cloneNode(true);
        for (const childEl of codeEl.children) {
          if (isCodeAnnotation(childEl)) {
            childEl.remove();
          }
        }
        return codeEl.innerText;
    }
    const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
      text: getTextToCopy
    });
    clipboard.on('success', onCopySuccess);
    if (window.document.getElementById('quarto-embedded-source-code-modal')) {
      const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
        text: getTextToCopy,
        container: window.document.getElementById('quarto-embedded-source-code-modal')
      });
      clipboardModal.on('success', onCopySuccess);
    }
      var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
      var mailtoRegex = new RegExp(/^mailto:/);
        var filterRegex = new RegExp('/' + window.location.host + '/');
      var isInternal = (href) => {
          return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
      }
      // Inspect non-navigation links and adorn them if external
     var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
      for (var i=0; i<links.length; i++) {
        const link = links[i];
        if (!isInternal(link.href)) {
          // undo the damage that might have been done by quarto-nav.js in the case of
          // links that we want to consider external
          if (link.dataset.originalHref !== undefined) {
            link.href = link.dataset.originalHref;
          }
        }
      }
    function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
      const config = {
        allowHTML: true,
        maxWidth: 500,
        delay: 100,
        arrow: false,
        appendTo: function(el) {
            return el.parentElement;
        },
        interactive: true,
        interactiveBorder: 10,
        theme: 'quarto',
        placement: 'bottom-start',
      };
      if (contentFn) {
        config.content = contentFn;
      }
      if (onTriggerFn) {
        config.onTrigger = onTriggerFn;
      }
      if (onUntriggerFn) {
        config.onUntrigger = onUntriggerFn;
      }
      window.tippy(el, config); 
    }
    const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
    for (var i=0; i<noterefs.length; i++) {
      const ref = noterefs[i];
      tippyHover(ref, function() {
        // use id or data attribute instead here
        let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
        try { href = new URL(href).hash; } catch {}
        const id = href.replace(/^#\/?/, "");
        const note = window.document.getElementById(id);
        if (note) {
          return note.innerHTML;
        } else {
          return "";
        }
      });
    }
    const xrefs = window.document.querySelectorAll('a.quarto-xref');
    const processXRef = (id, note) => {
      // Strip column container classes
      const stripColumnClz = (el) => {
        el.classList.remove("page-full", "page-columns");
        if (el.children) {
          for (const child of el.children) {
            stripColumnClz(child);
          }
        }
      }
      stripColumnClz(note)
      if (id === null || id.startsWith('sec-')) {
        // Special case sections, only their first couple elements
        const container = document.createElement("div");
        if (note.children && note.children.length > 2) {
          container.appendChild(note.children[0].cloneNode(true));
          for (let i = 1; i < note.children.length; i++) {
            const child = note.children[i];
            if (child.tagName === "P" && child.innerText === "") {
              continue;
            } else {
              container.appendChild(child.cloneNode(true));
              break;
            }
          }
          if (window.Quarto?.typesetMath) {
            window.Quarto.typesetMath(container);
          }
          return container.innerHTML
        } else {
          if (window.Quarto?.typesetMath) {
            window.Quarto.typesetMath(note);
          }
          return note.innerHTML;
        }
      } else {
        // Remove any anchor links if they are present
        const anchorLink = note.querySelector('a.anchorjs-link');
        if (anchorLink) {
          anchorLink.remove();
        }
        if (window.Quarto?.typesetMath) {
          window.Quarto.typesetMath(note);
        }
        if (note.classList.contains("callout")) {
          return note.outerHTML;
        } else {
          return note.innerHTML;
        }
      }
    }
    for (var i=0; i<xrefs.length; i++) {
      const xref = xrefs[i];
      tippyHover(xref, undefined, function(instance) {
        instance.disable();
        let url = xref.getAttribute('href');
        let hash = undefined; 
        if (url.startsWith('#')) {
          hash = url;
        } else {
          try { hash = new URL(url).hash; } catch {}
        }
        if (hash) {
          const id = hash.replace(/^#\/?/, "");
          const note = window.document.getElementById(id);
          if (note !== null) {
            try {
              const html = processXRef(id, note.cloneNode(true));
              instance.setContent(html);
            } finally {
              instance.enable();
              instance.show();
            }
          } else {
            // See if we can fetch this
            fetch(url.split('#')[0])
            .then(res => res.text())
            .then(html => {
              const parser = new DOMParser();
              const htmlDoc = parser.parseFromString(html, "text/html");
              const note = htmlDoc.getElementById(id);
              if (note !== null) {
                const html = processXRef(id, note);
                instance.setContent(html);
              } 
            }).finally(() => {
              instance.enable();
              instance.show();
            });
          }
        } else {
          // See if we can fetch a full url (with no hash to target)
          // This is a special case and we should probably do some content thinning / targeting
          fetch(url)
          .then(res => res.text())
          .then(html => {
            const parser = new DOMParser();
            const htmlDoc = parser.parseFromString(html, "text/html");
            const note = htmlDoc.querySelector('main.content');
            if (note !== null) {
              // This should only happen for chapter cross references
              // (since there is no id in the URL)
              // remove the first header
              if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
                note.children[0].remove();
              }
              const html = processXRef(null, note);
              instance.setContent(html);
            } 
          }).finally(() => {
            instance.enable();
            instance.show();
          });
        }
      }, function(instance) {
      });
    }
        let selectedAnnoteEl;
        const selectorForAnnotation = ( cell, annotation) => {
          let cellAttr = 'data-code-cell="' + cell + '"';
          let lineAttr = 'data-code-annotation="' +  annotation + '"';
          const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
          return selector;
        }
        const selectCodeLines = (annoteEl) => {
          const doc = window.document;
          const targetCell = annoteEl.getAttribute("data-target-cell");
          const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
          const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
          const lines = annoteSpan.getAttribute("data-code-lines").split(",");
          const lineIds = lines.map((line) => {
            return targetCell + "-" + line;
          })
          let top = null;
          let height = null;
          let parent = null;
          if (lineIds.length > 0) {
              //compute the position of the single el (top and bottom and make a div)
              const el = window.document.getElementById(lineIds[0]);
              top = el.offsetTop;
              height = el.offsetHeight;
              parent = el.parentElement.parentElement;
            if (lineIds.length > 1) {
              const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
              const bottom = lastEl.offsetTop + lastEl.offsetHeight;
              height = bottom - top;
            }
            if (top !== null && height !== null && parent !== null) {
              // cook up a div (if necessary) and position it 
              let div = window.document.getElementById("code-annotation-line-highlight");
              if (div === null) {
                div = window.document.createElement("div");
                div.setAttribute("id", "code-annotation-line-highlight");
                div.style.position = 'absolute';
                parent.appendChild(div);
              }
              div.style.top = top - 2 + "px";
              div.style.height = height + 4 + "px";
              div.style.left = 0;
              let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
              if (gutterDiv === null) {
                gutterDiv = window.document.createElement("div");
                gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
                gutterDiv.style.position = 'absolute';
                const codeCell = window.document.getElementById(targetCell);
                const gutter = codeCell.querySelector('.code-annotation-gutter');
                gutter.appendChild(gutterDiv);
              }
              gutterDiv.style.top = top - 2 + "px";
              gutterDiv.style.height = height + 4 + "px";
            }
            selectedAnnoteEl = annoteEl;
          }
        };
        const unselectCodeLines = () => {
          const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
          elementsIds.forEach((elId) => {
            const div = window.document.getElementById(elId);
            if (div) {
              div.remove();
            }
          });
          selectedAnnoteEl = undefined;
        };
          // Handle positioning of the toggle
      window.addEventListener(
        "resize",
        throttle(() => {
          elRect = undefined;
          if (selectedAnnoteEl) {
            selectCodeLines(selectedAnnoteEl);
          }
        }, 10)
      );
      function throttle(fn, ms) {
      let throttle = false;
      let timer;
        return (...args) => {
          if(!throttle) { // first call gets through
              fn.apply(this, args);
              throttle = true;
          } else { // all the others get throttled
              if(timer) clearTimeout(timer); // cancel #2
              timer = setTimeout(() => {
                fn.apply(this, args);
                timer = throttle = false;
              }, ms);
          }
        };
      }
        // Attach click handler to the DT
        const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
        for (const annoteDlNode of annoteDls) {
          annoteDlNode.addEventListener('click', (event) => {
            const clickedEl = event.target;
            if (clickedEl !== selectedAnnoteEl) {
              unselectCodeLines();
              const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
              if (activeEl) {
                activeEl.classList.remove('code-annotation-active');
              }
              selectCodeLines(clickedEl);
              clickedEl.classList.add('code-annotation-active');
            } else {
              // Unselect the line
              unselectCodeLines();
              clickedEl.classList.remove('code-annotation-active');
            }
          });
        }
    const findCites = (el) => {
      const parentEl = el.parentElement;
      if (parentEl) {
        const cites = parentEl.dataset.cites;
        if (cites) {
          return {
            el,
            cites: cites.split(' ')
          };
        } else {
          return findCites(el.parentElement)
        }
      } else {
        return undefined;
      }
    };
    var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
    for (var i=0; i<bibliorefs.length; i++) {
      const ref = bibliorefs[i];
      const citeInfo = findCites(ref);
      if (citeInfo) {
        tippyHover(citeInfo.el, function() {
          var popup = window.document.createElement('div');
          citeInfo.cites.forEach(function(cite) {
            var citeDiv = window.document.createElement('div');
            citeDiv.classList.add('hanging-indent');
            citeDiv.classList.add('csl-entry');
            var biblioDiv = window.document.getElementById('ref-' + cite);
            if (biblioDiv) {
              citeDiv.innerHTML = biblioDiv.innerHTML;
            }
            popup.appendChild(citeDiv);
          });
          return popup.innerHTML;
        });
      }
    }
  });
  </script>
</div> <!-- /content -->




<script src="readme_files/libs/quarto-html/zenscroll-min.js"></script>
</body></html>

About

Tutorial for the Gesis Methods Hub

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors