-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathbdaca.bib
963 lines (844 loc) · 57.6 KB
/
bdaca.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/
%% Created for Damian Trilling at 2015-03-16 14:34:16 +0100
%% Saved with string encoding Unicode (UTF-8)
@article{Trilling2016,
author = {Trilling, Damian},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Trilling{\_}2016.pdf:pdf},
journal = {SSRN},
title = {Doing Computational Social Science with {P}ython: {A}n Introduction. {V}ersion 1.3},
url = {http://papers.ssrn.com/abstract=2737682},
year = {2019}
}
@incollection{Trilling2017a,
author = {Trilling, Damian},
booktitle = {The International Encyclopedia of Communication Research Methods},
doi = {10.1002/9781118901731.iecrm0014},
editor = {Matthes, J{\"{o}}rg and Davis, Christine S. and Potter, Robert F.},
isbn = {9781118901731},
publisher = {Wiley},
title = {{Big Data, Analysis of}},
year = {2017}
}
@article{scikit-learn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
}
@InProceedings{Maas2011,
author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},
title = {Learning Word Vectors for Sentiment Analysis},
booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
year = {2011},
address = {Portland, Oregon, USA},
publisher = {Association for Computational Linguistics},
pages = {142--150},
url = {http://www.aclweb.org/anthology/P11-1015}
}
@inproceedings{Rehurek2010,
title = {Software Framework for Topic Modelling with Large Corpora},
author = {Radim {\v R}eh{\r u}{\v r}ek and Petr Sojka},
booktitle = {{Proceedings of the LREC 2010 Workshop on New
Challenges for NLP Frameworks}},
pages = {45--50},
year = 2010,
publisher = {ELRA},
address = {Valletta, Malta},
note={\url{http://is.muni.cz/publication/884893/en}},
language={English}
}
@article{Thelwall2012,
Author = {Thelwall, Mike and Buckley, Kevan and Paltoglou, Georgios},
Date-Added = {2015-03-16 13:31:47 +0000},
Date-Modified = {2015-03-16 13:32:09 +0000},
Doi = {10.1002/asi.21662},
Issn = {1532-2890},
Journal = {Journal of the American Society for Information Science and Technology},
Number = {1},
Pages = {163--173},
Title = {Sentiment strength detection for the social web},
Volume = {63},
Year = {2012},
Bdsk-Url-1 = {http://dx.doi.org/10.1002/asi.21662}}
@inproceedings{Ellison2013,
Abstract = {Past research suggests Facebook use is linked to perceptions of social capital, a concept that taps into the resources people gain from interactions with their social network. In this study, we examine a sample of public Facebook status updates (N=20,000) for instances in which users request a response from their network. These attempts to mobilize resources offer insight into the mechanisms through which Facebook is used for social capital conversion. After identifying mobilization requests (N=856), we categorize them by cost (i.e., effort needed to satisfy the request) and type (e.g., opinion, information, social coordination) in order to describe the prevalence of these requests and the extent to which they require effort on the part of the potential responders. Finally, we examine characteristics of these users and the linguistic characteristics of status updates that contain mobilization requests. Introduction},
Author = {Ellison, Nicole B and Gray, Rebecca and Vitak, Jessica and Lampe, Cliff and Fiore, Andrew T},
Booktitle = {Proceedings of the 7th annual International Conference on Weblogs and Social Media ({ICWSM})},
Date-Added = {2015-02-20 10:17:05 +0100},
Date-Modified = {2015-03-10 14:49:47 +0000},
File = {:Users/dami/Dropbox/uva/literatuur-mendeley-import/Ellison\_etal\_ICWSM2013.pdf:pdf},
Title = {Calling all friends: Exploring requests for help on {Facebook}},
Url = {http://www-personal.umich.edu/~enicole/Ellison\_etal\_ICWSM2013.pdf},
Year = {2013},
Bdsk-Url-1 = {http://www-personal.umich.edu/~enicole/Ellison%5C_etal%5C_ICWSM2013.pdf}}
@inproceedings{Castillo2014,
Abstract = {This paper presents a study of the life cycle of news articles posted online. We describe the interplay between website visitation patterns and social media reactions to news content. We show that we can use this hybrid observation method to characterize distinct classes of articles. We also find that social media reactions can help predict future visitation patterns early and accurately. We validate our methods using qualitative analysis as well as quantitative analysis on data from a large international news network, for a set of articles generating more than 3,000,000 visits and 200,000 social media reactions. We show that it is possible to model accurately the overall traffic articles will ultimately receive by observing the first ten to twenty minutes of social media reactions. Achieving the same prediction accuracy with visits alone would require to wait for three hours of data. We also describe significant improvements on the accuracy of the early prediction of shelf-life for news stories.},
Address = {Baltimore, ML},
Author = {Castillo, Carlos and El-Haddad, Mohammed and Pfeffer, J\"{u}rgen and Stempeck, Matt},
Booktitle = {Proceedings of the 17th {ACM} conference on Computer supported cooperative work \& social computing},
Date-Added = {2015-02-20 10:17:05 +0100},
Date-Modified = {2015-03-10 14:53:02 +0000},
Doi = {10.1145/2531602.2531623},
File = {:Users/dami/Documents/Mendeley Desktop/Castillo et al.\_2013.pdf:pdf},
Publisher = {ACM},
Title = {Characterizing the Life Cycle of Online News Stories Using Social Media Reactions},
Year = {2014},
Bdsk-Url-1 = {http://arxiv.org/abs/1304.3010},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/2531602.2531623}}
@article{Conover2012,
Author = {Conover, Michael D and Gon\c{c}alves, Bruno and Flammini, Alessandro and Menczer, Filippo},
Date-Added = {2015-02-20 10:17:05 +0100},
Date-Modified = {2015-03-10 14:49:56 +0000},
Doi = {10.1140/epjds6},
File = {:Users/dami/Documents/Mendeley Desktop/Conover et al.\_2012.pdf:pdf},
Issn = {2193-1127},
Journal = {EPJ Data Science},
Number = {6},
Pages = {1--19},
Title = {Partisan asymmetries in online political activity},
Volume = {1},
Year = {2012},
Bdsk-Url-1 = {http://www.epjdatascience.com/content/1/1/6},
Bdsk-Url-2 = {http://dx.doi.org/10.1140/epjds6}}
@article{Ausserhofer2013,
Author = {Ausserhofer, Julian and Maireder, Axel},
Date-Added = {2015-02-19 22:42:46 +0100},
Date-Modified = {2015-02-19 23:06:26 +0100},
Doi = {10.1080/1369118X.2012.756050},
File = {:Users/dami/Documents/Mendeley Desktop/Ausserhofer, Maireder\_2013.pdf:pdf},
Issn = {1369-118X},
Journal = {Information, Communication \& Society},
Number = {3},
Pages = {291--314},
Title = {National Politics on {Twitter}},
Volume = {16},
Year = {2013},
Bdsk-Url-1 = {http://dx.doi.org/10.1080/1369118X.2012.756050}}
@article{Trilling2015,
abstract = {While watching television, more and more citizens comment the program live on social media. This is especially interesting in the case of political debates, as viewers' comments might not only allow us to tap into public opinion, but they can also be an influential factor of their own and contribute to public discourse. This article analyzes how the TV debate between the candidates for chancellor during the German election campaign 2013 was discussed on Twitter. To do so, the transcript of the debate is linked to a set of N = 120,557 tweets containing the hashtag {\#}tvduell. The results indicate that the candidates were only to a minor degree successful in getting their topics to the Twitter debate. An optimistic reading of the results suggests that Twitter serves as a complement to draw attention to topics neglected in the official debate. A more pessimistic reading would point to the fact that the discourse on Twitter seems to be dominated by sarcastic or funny rather than by substantial content.},
author = {Trilling, D.},
doi = {10.1177/0894439314537886},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Trilling{\_}2015.pdf:pdf},
isbn = {0894439314537},
issn = {0894-4393},
journal = {Social Science Computer Review},
keywords = {election campaign,german elections,political debate,second screen,tvduell,twitter},
mendeley-groups = {onderwerpen/2ndscreen,papers/damianpetro},
number = {3},
pages = {259--276},
title = {Two Different Debates? {I}nvestigating the Relationship Between a Political Debate on {TV} and Simultaneous Comments on {Twitter}},
volume = {33},
year = {2015}
}
@misc{Madnani,
Author = {Madnani, N.},
Date-Added = {2015-02-19 21:36:01 +0100},
Date-Modified = {2015-02-19 23:10:26 +0100},
Howpublished = {http://desilinguist.org/pdf/crossroads.pdf},
Title = {Getting started on Natural Language Processing with {P}ython.},
Year = {n.d.}}
@article{Bruns2013,
Author = {Bruns, Axel and Stieglitz, Stefan},
Date-Added = {2015-02-19 21:15:30 +0100},
Date-Modified = {2015-02-19 23:06:02 +0100},
Doi = {10.1080/13645579.2012.756095},
File = {:Users/dami/Documents/Mendeley Desktop/Bruns, Stieglitz\_2013.pdf:pdf},
Issn = {1364-5579},
Journal = {International Journal of Social Research Methodology},
Number = {2},
Pages = {91--108},
Title = {{Towards more systematic Twitter analysis: metrics for tweeting activities}},
Volume = {16},
Year = {2013},
Bdsk-Url-1 = {http://dx.doi.org/10.1080/13645579.2012.756095}}
@article{Huang2007,
Author = {Huang, Yen-Pei and Goh, Tiong and Liew, Chern Li},
Date-Added = {2015-02-19 21:15:30 +0100},
Date-Modified = {2015-02-19 23:07:15 +0100},
Doi = {10.1109/ISM.Workshops.2007.92},
File = {:Users/dami/Documents/Mendeley Desktop/Huang, Goh, Liew\_2007.pdf:pdf},
Isbn = {0-7695-3084-2},
Journal = {Ninth IEEE International Symposium on Multimedia Workshops (ISMW 2007)},
Pages = {517--521},
Publisher = {Ieee},
Title = {Hunting Suicide Notes in {Web 2.0} -- Preliminary Findings},
Year = {2007},
Bdsk-Url-1 = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4476021},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/ISM.Workshops.2007.92}}
@article{Mostafa2013,
Author = {Mostafa, Mohamed M.},
Date-Added = {2015-02-19 21:15:30 +0100},
Date-Modified = {2015-02-19 23:08:57 +0100},
Doi = {10.1016/j.eswa.2013.01.019},
File = {:Users/dami/Library/Application Support/Mendeley Desktop/Downloaded/Mostafa - 2013 - More than words Social networks' text mining for consumer brand sentiments.pdf:pdf},
Issn = {09574174},
Journal = {Expert Systems with Applications},
Number = {10},
Pages = {4241--4251},
Title = {More than words: Social networks' text mining for consumer brand sentiments},
Volume = {40},
Year = {2013},
Bdsk-Url-1 = {http://linkinghub.elsevier.com/retrieve/pii/S0957417413000328},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.eswa.2013.01.019}}
@article{Pestian2012,
Author = {Pestian, John and Matykiewicz, Pawel and Linn-Gust, Michelle and South, Brett and Uzuner, Ozlem and Wiebe, Jan and Cohen, K. Bretonnel and Hurdle, John and Brew, Christopher},
Date-Added = {2015-02-19 21:15:30 +0100},
Date-Modified = {2015-03-10 14:51:11 +0000},
Doi = {10.4137/BII.S9042},
File = {:Users/dami/Library/Application Support/Mendeley Desktop/Downloaded/Pestian et al. - 2012 - Sentiment Analysis of Suicide Notes A Shared Task.pdf:pdf},
Issn = {1178-2226},
Journal = {Biomedical Informatics Insights},
Keywords = {1,10,16,3,4137,5,bii,biomedical informatics insights 2012,challenge 2011,com,computational linguistics,doi,from http,la-press,natural language processing,s9042,sentiment analysis,shared task,suicide,suicide notes,suppl,this article is available,www},
Pages = {3--16},
Title = {{Sentiment Analysis of Suicide Notes: A Shared Task}},
Volume = {5},
Year = {2012},
Bdsk-Url-1 = {http://dx.doi.org/10.4137/BII.S9042}}
@inproceedings{Morstatter2013,
Abstract = {july 2013},
Address = {Boston, MA},
Author = {Morstatter, Fred and Pfeffer, J\"{u}rgen and Liu, Huan and Carley, Kathleen M},
Booktitle = {International {AAAI} Conference on Weblogs and Social Media {(ICWSM)}},
Date-Added = {2015-02-19 20:54:58 +0100},
Date-Modified = {2015-03-10 14:50:58 +0000},
File = {:Users/dami/Documents/Mendeley Desktop/Morstatter et al.\_2013.pdf:pdf},
Title = {Is the Sample Good Enough? Comparing Data from {Twitter's Streaming API} with {Twitter's Firehose}},
Url = {http://www.public.asu.edu/~fmorstat/paperpdfs/icwsm2013.pdf},
Year = {2013},
Bdsk-Url-1 = {http://www.public.asu.edu/~fmorstat/paperpdfs/icwsm2013.pdf}}
@article{Mahrt2013,
Author = {Mahrt, Merja and Scharkow, Michael},
Date-Added = {2015-02-19 20:30:14 +0100},
Date-Modified = {2015-02-19 23:07:45 +0100},
Doi = {10.1080/08838151.2012.761700},
File = {:Users/dami/Library/Application Support/Mendeley Desktop/Downloaded/Mahrt, Scharkow - 2013 - The Value of Big Data in Digital Media Research.pdf:pdf},
Issn = {0883-8151},
Journal = {Journal of Broadcasting \& Electronic Media},
Number = {1},
Pages = {20--33},
Title = {The Value of {Big Data} in Digital Media Research},
Volume = {57},
Year = {2013},
Bdsk-Url-1 = {http://www.tandfonline.com/doi/abs/10.1080/08838151.2012.761700},
Bdsk-Url-2 = {http://dx.doi.org/10.1080/08838151.2012.761700}}
@article{Vis2013,
Author = {Vis, Farida},
Date-Added = {2015-02-19 20:30:07 +0100},
Date-Modified = {2015-02-19 23:09:31 +0100},
Doi = {10.5210/fm.v18i10.4878},
File = {:Users/dami/Library/Application Support/Mendeley Desktop/Downloaded/Vis - 2013 - A critical reflection on Big Data Considering APIs, researchers and tools as data makers.pdf:pdf},
Issn = {13960466},
Journal = {First Monday},
Number = {10},
Pages = {1--16},
Title = {A critical reflection on {Big Data}: Considering {APIs}, researchers and tools as data makers},
Volume = {18},
Year = {2013},
Bdsk-Url-1 = {http://journals.uic.edu/ojs/index.php/fm/article/view/4878},
Bdsk-Url-2 = {http://dx.doi.org/10.5210/fm.v18i10.4878}}
@article{boyd2012,
Abstract = { The era of Big Data has begun. Computer scientists, physicists, economists, mathematicians, political scientists, bio-informaticists, sociologists, and other scholars are clamoring for access to the massive quantities of information produced by and about people, things, and their interactions. Diverse groups argue about the potential benefits and costs of analyzing genetic sequences, social media interactions, health records, phone logs, government records, and other digital traces left by people. Significant questions emerge. Will large-scale search data help us create better tools, services, and public goods? Or will it usher in a new wave of privacy incursions and invasive marketing? Will data analytics help us understand online communities and political movements? Or will it be used to track protesters and suppress speech? Will it transform how we study human communication and culture, or narrow the palette of research options and alter what `research' means? Given the rise of Big Data as a socio-technical phenomenon, we argue that it is necessary to critically interrogate its assumptions and biases. In this article, we offer six provocations to spark conversations about the issues of Big Data: a cultural, technological, and scholarly phenomenon that rests on the interplay of technology, analysis, and mythology that provokes extensive utopian and dystopian rhetoric. },
author = {Danah Boyd and Kate Crawford},
Date-Added = {2015-02-19 20:28:12 +0100},
Date-Modified = {2015-03-10 14:47:27 +0000},
Doi = {10.1080/1369118X.2012.678878},
Journal = {Information, Communication \& Society},
Number = {5},
Pages = {662-679},
Title = {Critical questions for {Big Data}},
Volume = {15},
Year = {2012},
Bdsk-Url-1 = {http://dx.doi.org/10.1080/1369118X.2012.678878}}
@book{McKinney2012,
Address = {Sebastopol, CA},
Author = {McKinney, W.},
Date-Added = {2015-02-19 20:14:07 +0100},
Date-Modified = {2015-02-19 20:14:39 +0100},
Publisher = {O'Reilly},
Title = {Python for data analysis},
Year = {2012}}
@book{Bird2009,
Address = {Sebastopol, CA},
Author = {Bird, S. and Loper, E. and Klein, E.},
Date-Added = {2015-02-19 20:12:46 +0100},
Date-Modified = {2015-02-19 23:05:52 +0100},
Publisher = {O'Reilly},
Title = {Natural language processing with {P}ython},
Year = {2009}}
@book{Russel2013,
Address = {Sebastopol, CA},
Author = {Russel, M.A.},
Date-Added = {2015-02-19 20:08:54 +0100},
Date-Modified = {2015-03-10 15:08:28 +0000},
Edition = {2nd},
Publisher = {O'Reilly},
Title = {Mining the social web. {Data} mining {Facebook, Twitter, LinkedIn, Google+, GitHub}, and more},
Year = {2013}}
@article{Lewis2013,
Author = {Lewis, Seth C. and Zamith, Rodrigo and Hermida, Alfred},
Date-Modified = {2015-03-10 14:50:20 +0000},
Doi = {10.1080/08838151.2012.761702},
Journal = {Journal of Broadcasting \& Electronic Media},
Number = {1},
Pages = {34--52},
Title = {Content Analysis in an Era of {Big Data}: A Hybrid Approach to Computational and Manual Methods},
Volume = {57},
Year = {2013},
Bdsk-Url-1 = {http://www.tandfonline.com/doi/abs/10.1080/08838151.2012.761702},
Bdsk-Url-2 = {http://dx.doi.org/10.1080/08838151.2012.761702}}
@article{mazieres2014,
author = {Antoine Mazières and Mathieu Trachman and Jean-Philippe Cointet and Baptiste Coulmont and Christophe Prieur},
title = {Deep tags: toward a quantitative analysis of online pornography},
journal = {Porn Studies},
volume = {1},
number = {1-2},
pages = {80-95},
year = {2014},
doi = {10.1080/23268743.2014.888214},
abstract = { The development of the web has increased the diversity of pornographic content, and at the same time the rise of online platforms has initiated a new trend of quantitative research that makes possible the analysis of data on an unprecedented scale. This paper explores the application of a quantitative approach to publicly available data collected from pornographic websites. Several analyses are applied to these digital traces with a focus on keywords describing videos and their underlying categorization systems. The analysis of a large network of tags shows that the accumulation of categories does not separate scripts from each other, but instead draws a multitude of significant paths between fuzzy categories. The datasets and tools we describe have been made publicly available for further study. }
}
@book{VanAtteveldt2008,
address = {Charleston, SC},
author = {Van Atteveldt, Wouter},
file = {:Users/dami/Library/Application Support/Mendeley Desktop/Downloaded/van Atteveldt - 2008 - Semantic Network Analysis Techniques for Extracting, Representing, and Querying Media Content.pdf:pdf},
isbn = {1439211361},
publisher = {BookSurge},
title = {Semantic Network Analysis: {Techniques} for Extracting, Representing, and Querying Media Content},
year = {2008}
}
@article{DeSmedt2012,
author = {{De Smedt}, Tom and Daelemans, W},
file = {:Users/dami/Documents/Mendeley Desktop/De Smedt, Daelemans\_2012.pdf:pdf},
journal = {The Journal of Machine Learning Research},
keywords = {data mining,graph networks,machine learning,natural language processing,python},
pages = {2063--2067},
title = {{Pattern for Python}},
volume = {13},
year = {2012}
}
@article{JunquedeFortuny2012,
author = {{Junqu\'{e} de Fortuny}, Enric and {De Smedt}, Tom and Martens, David and Daelemans, Walter},
doi = {10.1016/j.eswa.2012.04.013},
file = {:Users/dami/Library/Application Support/Mendeley Desktop/Downloaded/Junqu\'{e} de Fortuny et al. - 2012 - Media coverage in times of political crisis A text mining approach.pdf:pdf},
issn = {09574174},
journal = {Expert Systems with Applications},
number = {14},
pages = {11616--11622},
title = {{Media coverage in times of political crisis: A text mining approach}},
volume = {39},
year = {2012}
}
@article{Borra2014,
author = {Borra, Erik and Rieder, Bernhard},
doi = {10.1108/AJIM-09-2013-0094},
file = {:Users/dami/Library/Application Support/Mendeley Desktop/Downloaded/Borra, Rieder - 2014 - Programmed method developing a toolset for capturing and analyzing tweets.pdf:pdf},
isbn = {0920130097},
issn = {2050-3806},
journal = {Aslib Journal of Information Management},
number = {3},
pages = {262--278},
title = {Programmed method: {Developing} a toolset for capturing and analyzing tweets},
volume = {66},
year = {2014}
}
@inproceedings{Rieder2013,
address = {New York, NY},
author = {Rieder, Bernhard},
booktitle = {Proceedings of the 5th Annual {ACM} Web Science Conference},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Rieder\_2013.pdf:pdf},
isbn = {9781450318891},
pages = {346--355},
publisher = {ACM},
title = {{Studying Facebook via data extraction: The Netvizz application}},
year = {2013}
}
@article{Boumans2016,
author = {Boumans, Jelle W. and Trilling, Damian},
doi = {10.1080/21670811.2015.1096598},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Boumans, Trilling{\_}2016.pdf:pdf},
issn = {2167-0811},
journal = {Digital Journalism},
number = {1},
pages = {8--23},
title = {Taking stock of the toolkit: An overview of relevant autmated content analysis approaches and techniques for digital journalism scholars},
volume = {4},
year = {2016}
}
@article{Jurgens2016,
author = {J{\"{u}}rgens, Pascal and Jungherr, Andreas},
doi = {10.2139/ssrn.2710146},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/J{\"{u}}rgens, Jungherr{\_}2016.pdf:pdf},
journal = {SSRN},
title = {A tutorial for using {T}witter data in the social sciences: {D}ata collection, preparation, and analysis},
year = {2016}
}
@article{Kitchin2014,
abstract = {This article examines how the availability of Big Data, coupled with new data analytics, challenges established epistemologies across the sciences, social sciences and humanities, and assesses the extent to which they are engendering paradigm shifts across multiple disciplines. In particular, it critically explores new forms of empiricism that declare ‘the end of theory’, the creation of data-driven rather than knowledge-driven science, and the development of digital humanities and computational social sciences that propose radically different ways to make sense of culture, history, economy and society. It is argued that: (1) Big Data and new data analytics are disruptive innovations which are reconfiguring in many instances how research is conducted; and (2) there is an urgent need for wider critical reflection within the academy on the epistemological implications of the unfolding data revolution, a task that has barely begun to be tackled despite the rapid changes in research practices presently taking place. After critically reviewing emerging epistemological positions, it is contended that a potentially fruitful approach would be the development of a situated, reflexive and contextually nuanced epistemology. Keywords},
author = {Kitchin, Rob},
doi = {10.1177/2053951714528481},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Kitchin{\_}2014.pdf:pdf},
issn = {2053-9517},
journal = {Big Data {\&} Society},
keywords = {big data,computational,data analytics,data-driven science,digital humanities,end of theory,epistemology,paradigms},
number = {1},
pages = {1--12},
title = {{Big Data, new epistemologies and paradigm shifts}},
volume = {1},
year = {2014}
}
@article{Lazer2009,
abstract = {A field is emerging that leverages the capacity to collect and analyze data at a scale that may reveal patterns of individual and group behaviors.},
author = {Lazer, David and Pentland, Alex and Adamic, Lada and Aral, Sinan and Barab{\'{a}}si, Albert-L{\'{a}}szl{\'{o}} and Brewer, Devon and Christakis, Nicholas and Contractor, Noshir and Fowler, James and Gutmann, Myron and Jebara, Tony and King, Gary and Macy, Michael and Roy, Deb and van Alstyne, Marshall},
doi = {10.1126/science.1167742},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Lazer et al.{\_}2009.pdf:pdf},
isbn = {1939-0068},
issn = {19395108},
journal = {Science},
mendeley-groups = {papers/bigdatahoofdstuk},
pages = {721--723},
pmid = {19197046},
title = {Computational social science},
volume = {323},
year = {2009}
}
@article{Shah2015,
author = {Shah, D. V. and Cappella, J. N. and Neuman, W. R.},
doi = {10.1177/0002716215572084},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Shah, Cappella, Neuman{\_}2015.pdf:pdf},
issn = {0002-7162},
journal = {The ANNALS of the American Academy of Political and Social Science},
mendeley-groups = {papers/bigdatahoofdstuk},
number = {1},
pages = {6--13},
title = {{Big Data}, digital media, and computational social science: {P}ossibilities and perils},
volume = {659},
year = {2015}
}
@inproceedings{statsmodels,
title={Statsmodels: Econometric and statistical modeling with {P}ython},
author={Seabold, Skipper and Perktold, Josef},
booktitle={9th {P}ython in Science Conference},
year={2010}
}
@Misc{scipy,
author = {Eric Jones and Travis Oliphant and Pearu Peterson and others},
title = {{SciPy}: Open source scientific tools for {Python}},
year = {2001},
url = "http://www.scipy.org/",
}
@article{numpy,
doi = {10.1109/mcse.2011.37},
year = 2011,
publisher = {Institute of Electrical {\&} Electronics Engineers ({IEEE})},
volume = {13},
number = {2},
pages = {22--30},
author = {Ste{\'}fan {Van der Walt} and S Chris Colbert and Gaël Varoquaux},
title = {The {NumPy} Array: A Structure for Efficient Numerical Computation},
journal = {Computing in Science \& Engineering}
}
@article{matplotlib,
doi = {10.1109/mcse.2007.55},
year = 2007,
publisher = {Institute of Electrical {\&} Electronics Engineers ({IEEE})},
volume = {9},
number = {3},
pages = {90--95},
author = {John D. Hunter},
title = {Matplotlib: A 2D Graphics Environment},
journal = {Computing in Science \& Engineering}
}
@inproceedings{pandas,
title={Data structures for statistical computing in {P}ython},
author={McKinney, Wes and others},
booktitle={Proceedings of the 9th Python in Science Conference},
volume={445},
pages={51--56},
year={2010}
}
@inproceedings{Hutto2014,
title={Vader: A parsimonious rule-based model for sentiment analysis of social media text},
author={Hutto, Clayton J and Gilbert, Eric},
booktitle={Eighth International AAAI Conference on Weblogs and Social Media},
year={2014}
}
@inproceedings{Wilson2005,
title={Recognizing contextual polarity in phrase-level sentiment analysis},
author={Wilson, Theresa and Wiebe, Janyce and Hoffmann, Paul},
booktitle={Proceedings of the conference on human language technology and empirical methods in natural language processing},
pages={347--354},
year={2005},
organization={Association for Computational Linguistics}
}
@Book{Vanderplas2016,
author = {VanderPlas, Jacob},
title = {Python data science handbook: {E}ssential tools for working with data},
publisher = {O'Reilly},
year = {2016},
address = {Sebastopol, CA},
isbn = {978-1-4919-1205-8}
}
@book{Salganik2017,
author = {Salganik, Matthew J.},
title = {Bit by bit: Social research in the digital age},
year = {2017},
address = {Princeton, NJ},
publisher = {Princeton University Press}
}
@article{Maier2018a,
abstract = {ABSTRACTLatent Dirichlet allocation (LDA) topic models are increasingly being used in communication research. Yet, questions regarding reliability and validity of the approach have received little attention thus far. In applying LDA to textual data, researchers need to tackle at least four major challenges that affect these criteria: (a) appropriate pre-processing of the text collection; (b) adequate selection of model parameters, including the number of topics to be generated; (c) evaluation of the model's reliability; and (d) the process of validly interpreting the resulting topics. We review the research literature dealing with these questions and propose a methodology that approaches these challenges. Our overall goal is to make LDA topic modeling more accessible to communication researchers and to ensure compliance with disciplinary standards. Consequently, we develop a brief hands-on user guide for applying LDA topic modeling. We demonstrate the value of our approach with empirical data from an ongo...},
author = {Maier, Daniel and Waldherr, A. and Miltner, P. and Wiedemann, G. and Niekler, A. and Keinert, A. and Pfetsch, B. and Heyer, G. and Reber, U. and H{\"{a}}ussler, T. and Schmid-Petri, H. and Adam, S.},
doi = {10.1080/19312458.2018.1430754},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Maier et al. - 2018 - Applying LDA Topic Modeling in Communication Research Toward a Valid and Reliable Methodology.pdf:pdf},
issn = {19312466},
journal = {Communication Methods and Measures},
number = {2-3},
pages = {93--118},
publisher = {Routledge},
title = {Applying {LDA} Topic Modeling in Communication Research: {T}oward a Valid and Reliable Methodology},
volume = {12},
year = {2018}
}
@inproceedings{Tsur2015,
abstract = {Framing is a sophisticated form of dis- course in which the speaker tries to in- duce a cognitive bias through consis- tent linkage between a topic and a spe- cific context (frame). We build on po- litical science and communication theory and use probabilistic topic models com- bined with time series regression analy- sis (autoregressive distributed-lag models) to gain insights about the language dy- namics in the political processes. Pro- cessing four years of public statements is- sued by members of the U.S. Congress, our results provide a glimpse into the com- plex dynamic processes of framing, atten- tion shifts and agenda setting, commonly known as ‘spin'. We further provide new evidence for the divergence in party disci- pline in U.S. politics},
author = {Tsur, Oren and Calacci, Dan and Lazer, David},
booktitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Tsur, Calacci, Lazer - 2015 - A Frame of Mind Using Statistical Models for Detection of Framing and Agenda Setting Campaigns(2).pdf:pdf},
isbn = {9781941643723},
pages = {1629--1638},
publisher = {ACL},
title = {{A Frame of Mind: Using Statistical Models for Detection of Framing and Agenda Setting Campaigns}},
year = {2015}
}
@article{Kusner2015,
abstract = {We present the Word Mover's Distance (WMD), a novel distance function between text docu-ments. Our work is based on recent results in word embeddings that learn semantically mean-ingful representations for words from local co-occurrences in sentences. The WMD distance measures the dissimilarity between two text doc-uments as the minimum amount of distance that the embedded words of one document need to " travel " to reach the embedded words of another document. We show that this distance metric can be cast as an instance of the Earth Mover's Dis-tance, a well studied transportation problem for which several highly efficient solvers have been developed. Our metric has no hyperparameters and is straight-forward to implement. Further, we demonstrate on eight real world document classi-fication data sets, in comparison with seven state-of-the-art baselines, that the WMD metric leads to unprecedented low k-nearest neighbor docu-ment classification error rates.},
author = {Kusner, Matt J and Sun, Yu and Kolkin, Nicholas I and Weinberger, Kilian Q},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kusner et al. - 2015 - From Word Embeddings To Document Distances.pdf:pdf},
isbn = {9781510810587},
issn = {1938-7228},
journal = {Proceedings of The 32nd International Conference on Machine Learning},
pages = {957--966},
pmid = {1714571},
title = {{From Word Embeddings To Document Distances}},
volume = {37},
year = {2015}
}
@article{Garg2017,
abstract = {Word embeddings are ubiquitous in machine learning and natural language processing. Recent works show that the ge-ometry of the word embedding captures gender stereotypes. In this paper, we develop a framework to investigate the tem-poral dynamics of gender and ethnic stereotypes by analyzing how the embedding geometry changes over time. We apply this approach to analyze embeddings trained over 100 years of text data. Moreover, we integrate U.S. Census data over the 20th century and demonstrate that dynamics of the word embedding track closely with demographic and occupation shifts over time. We systematically quantify changes in the embeddings of various classes of occupations and adjectives and their relationship to gender and ethnic groups. While the overall embedding bias has been decreasing, specific occu-pations (e.g. intellectual professions) are still associated with males and adjectives for physical appearance are closer to fe-males. We show that our approach can robustly capture global shifts – e.g. the feminist movement in the 1960s and Asian immigration into the U.S. – as well as changes in the biases associated with specific words. Temporal analysis of word embeddings thus opens up a powerful new intersection be-tween machine learning and quantitative social science. All of our data, metric and analysis tools are available on GitHub.},
archivePrefix = {arXiv},
arxivId = {1711.08412},
author = {Garg, Nikhil and Schiebinger, Londa and Jurafsky, Dan and Zou, James},
doi = {10.1073/pnas.1720347115},
eprint = {1711.08412},
isbn = {1720347115},
issn = {0027-8424},
journal = {Proceedings of the National Academy of Sciences},
number = {16},
pages = {E3635--E3644},
pmid = {29615513},
title = {{Word Embeddings as a Lens to Quantify 100 Years of Gender and Ethnic Stereotypes}},
volume = {115},
year = {2018}
}
@Article{Vliegenthart2014,
author="Vliegenthart, Rens",
title={Moving up. {A}pplying aggregate level time series analysis in the study of media coverage},
journal="Quality {\&} Quantity",
year="2014",
volume="48",
number="5",
pages="2427--2445",
abstract="In this article the advantages of aggregate level time series analysis for the study of media coverage are discussed. This type of analysis offers the opportunity to answer questions relating to causes and effects of media attention for issues and all kind of other content characteristics. Data that ask for a time series approach have become widely available during the past years, due to the rise of digital archives and social media such as Twitter and Facebook. This type of analysis allows for answering a set of interesting research questions and strong inferences about causal processes. Common challenges in time series analysis, relating to stationarity, accounting for a series' past and autoregressive conditional heteroscedasticity are discussed. Two useful approaches, ARIMA and VAR, are introduced stepwise. An empirical example, dealing with intermedia agenda-setting between different newspapers in the Netherlands, demonstrates how both techniques can be applied and how they provide insightful answers to interesting research problems.",
issn="1573-7845",
doi="10.1007/s11135-013-9899-0",
}
@article{Strycharz2018,
abstract = {Published with license by Taylor {\&} Francis Group, LLC {\textcopyright} 2017 [Joanna Strycharz, Nadine Strauss, and Damian Trilling] This study investigates the reciprocal relationships between the fluctuation of the closing prices of three companies listed on the Amsterdam exchange index, namely ING, Philips and Shell and online media coverage related to these firms for a period of two years (2014–2015). Automated content analysis methods were employed to analyze sentiment and emotionality and to identify corporate topics related to the companies. A positive relation of the amount of coverage and emotionality with the fluctuation of stock prices was detected for Shell and Philips. In addition, corporate topics were found to positively Granger cause stock price fluctuation, particularly for Philips. The study advances past research in showing that the prediction of stock price fluctuation based on media coverage can be improved by including sentiment, emotionality, and corporate topics. The findings inform strategic communication, and particularly investor relations, in suggesting that media attention, sentiment, and certain corporate topics are crucial when managing media relations and with regard to securing a fair evaluation of listed companies. Furthermore, the innovative research methods are useful for researchers and practitioners alike in showcasing how media coverage related to firms and their stock fluctuations can be identified and analyzed in a reproducible, hands-on and efficient manner.},
author = {Strycharz, Joanna and Strauss, Nadine and Trilling, Damian},
doi = {10.1080/1553118X.2017.1378220},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Strycharz, Strauss, Trilling - 2018 - The role of media coverage in explaining stock market fluctuations Insights for strategic financia.pdf:pdf},
issn = {1553-118X},
journal = {International Journal of Strategic Communication},
number = {1},
pages = {67--85},
publisher = {Routledge},
title = {{The role of media coverage in explaining stock market fluctuations: Insights for strategic financial communication}},
volume = {12},
year = {2018}
}
@article{Burscher2014,
author = {Burscher, Bj{\"{o}}rn and Odijk, Daan and Vliegenthart, Rens and de Rijke, Maarten and de Vreese, Claes H.},
doi = {10.1080/19312458.2014.937527},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Burscher et al. - 2014 - Teaching the computer to code frames in news Comparing two supervised machine learning approaches to frame anal.pdf:pdf},
issn = {1931-2458},
journal = {Communication Methods and Measures},
mendeley-groups = {papers/damianpetro},
number = {3},
pages = {190--206},
title = {Teaching the computer to code frames in news: {C}omparing two supervised machine learning approaches to frame analysis},
volume = {8},
year = {2014}
}
@article{burscher2016,
author = {Bjorn Burscher and Rens Vliegenthart and Claes H. {de Vreese}},
title ={Frames Beyond Words: {A}pplying Cluster and Sentiment Analysis to News Coverage of the Nuclear Power Issue},
journal = {Social Science Computer Review},
volume = {34},
number = {5},
pages = {530-545},
year = {2016},
doi = {10.1177/0894439315596385},
abstract = { Methods to automatically analyze media content are advancing significantly. Among others, it has become increasingly popular to analyze the framing of news articles by means of statistical procedures. In this article, we investigate the conceptual validity of news frames that are inferred by a combination of k-means cluster analysis and automatic sentiment analysis. Furthermore, we test a way of improving statistical frame analysis such that revealed clusters of articles reflect the framing concept more closely. We do so by only using words from an article’s title and lead and by excluding named entities and words with a certain part of speech from the analysis. To validate revealed frames, we manually analyze samples of articles from the extracted clusters. Findings of our tests indicate that when following the proposed feature selection approach, the resulting clusters more accurately discriminate between articles with a different framing. We discuss the methodological and theoretical implications of our findings. }
}
@article{GonzalezBailon2015,
abstract = {This study offers a systematic comparison of automated content analysis tools. The ability of different lexicons to correctly identify affective tone (e.g., positive vs. negative) is assessed in different social media environments. Our comparisons examine the reliability and validity of publicly available, off-the-shelf classifiers. We use datasets from a range of online sources that vary in the diversity and formality of the language used, and we apply different classifiers to extract information about the affective tone in these datasets. We first measure agreement (reliability test) and then compare their classifications with the benchmark of human coding (validity test). Our analyses show that validity and reliability vary with the formality and diversity of the text; we also show that ready-to-use methods leave much space for improvement when analyzing domain-specific content and that a machine-learning approach offers more accurate predictions across communication domains.},
author = {Gonzalez-Bailon, S. and Paltoglou, G.},
doi = {10.1177/0002716215569192},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Gonzalez-Bailon, Paltoglou - 2015 - Signals of Public Opinion in Online Communication A Comparison of Methods and Data Sources.pdf:pdf},
issn = {0002-7162},
journal = {The ANNALS of the American Academy of Political and Social Science},
keywords = {analysis,content analysis,diversity,information,language formality,lexicon-based methods,machine,sentiment,text mining},
number = {1},
pages = {95--107},
title = {{Signals of Public Opinion in Online Communication: A Comparison of Methods and Data Sources}},
url = {http://ann.sagepub.com/content/659/1/95.abstract?rss=1},
volume = {659},
year = {2015}
}
@article{Vermeer2019,
author = {Vermeer, Susan and Araujo, Theo and Bernritter, Stefan F. and van Noort, Guda},
doi = {10.1016/j.ijresmar.2019.01.010},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Vermeer et al. - 2019 - Seeing the wood for the trees How machine learning can help firms in identifying relevant electronic word-of-mou.pdf:pdf},
issn = {01678116},
journal = {International Journal of Research in Marketing},
keywords = {Automated content analysis,Digital marketing strategies,Machine learning,Sentiment analysis,Social media,Webcare,eWOM},
month = {sep},
number = {3},
pages = {492--508},
publisher = {Elsevier B.V.},
title = {{Seeing the wood for the trees: How machine learning can help firms in identifying relevant electronic word-of-mouth in social media}},
volume = {36},
year = {2019}
}
@article{Welbers2018,
author = {Welbers, Kasper and Opgenhaffen, Micha{\"{e}}l and Janssens, Marie-Christine},
file = {:home/damian/SURFdrive/literatuur-mendeley/Welbers, Opgenhaffen, Janssens{\_}2018.pdf:pdf},
journal = {Tijdschrift voor Communicatiewetenschap},
number = {1},
pages = {25--40},
title = {{Scrapers , API's en data-archieven}},
volume = {46},
year = {2018}
}
@book{cssbook,
author = {{van Atteveldt}, Wouter and Trilling, Damian and {Arcila Calder{\'o}n}, Carlos},
year = 2021},
title = {Computational analysis of communication: A practical introduction to the analysis of texts, networks, and images with code examples in Python and R},
address = {Hoboken, NJ},
publisher = {Wiley}
}
@article{Hilbert2019,
author = {Hilbert, Martin and Barnett, George and Blumenstock, Joshua and Contractor, Noshir and Diesner, Jana and Frey, Seth and Gonz{\'{a}}lez-Bail{\'{o}}n, Sandra and Lamberso, PJ and Pan, Jennifer and Peng, Tai-Quan and Shen, Cuihua and Smaldino, Paul E and {Van Atteveldt}, Wouter and Waldherr, Annie and Zhang, Jingwen and Zhu, Jonathan J H},
journal = {International Journal of Communication},
pages = {3912--3934},
title = {{Computational Communication Science : A Methodological Catalyzer for a Maturing Discipline}},
volume = {13},
year = {2019}
}
@article{VanAtteveldt2018a,
abstract = {ABSTRACTThe recent increase in digitally available data, tools, and processing power is fostering the use of computational methods to the study of communication. This special issue discusses the validity of using big data in communication science and showcases a number of new methods and applications in the fields of text and network analysis. Computational methods have the potential to greatly enhance the scientific study of communication because they allow us to move towards collaborative large-N studies of actual behavior in its social context. This requires us to develop new skills and infrastructure and meet the challenges of open, valid, reliable, and ethical “big data” research. By bringing together a number of leading scholars in one issue, we contribute to the increasing development and adaptation of computational methods in communication science.},
author = {van Atteveldt, Wouter and Peng, Tai Quan},
doi = {10.1080/19312458.2018.1458084},
journal = {Communication Methods and Measures},
number = {2-3},
pages = {81--92},
publisher = {Routledge},
title = {{When Communication Meets Computation: Opportunities, Challenges, and Pitfalls in Computational Communication Science}},
volume = {12},
year = {2018}
}
@book{Hovy2020,
doi = {10.1017/9781108873352},
year = {2020},
publisher = {Cambridge University Press},
address = {Cambridge, UK},
author = {Dirk Hovy},
title = {Text Analysis in {P}ython for Social Scientists: Discovery and Exploration}
}
@misc{gartner,
title = {Big Data},
author = {Gartner},
booktitle = {Information Technology Glossary},
url = {https://www.gartner.com/en/information-technology/glossary/big-data}
}
@article{VanAtteveldt2019,
author = {{van Atteveldt}, Wouter and Strycharz, Joanna and Trilling, Damian and Welbers, Kasper},
file = {:home/damian/SURFdrive/literatuur-mendeley/Van Atteveldt et al.{\_}2019(2).pdf:pdf},
journal = {International Journal of Communication},
pages = {3935--3954},
title = {{Toward Open Computational Communication Science : A Practical Road Map for Reusable Data and Code University of Amsterdam , the Netherlands}},
volume = {13},
year = {2019}
}
@data{nelagt2018,
author = {Nørregaard, Jeppe and Horne, Benjamin Douglas and Adali, Sibel},
publisher = {Harvard Dataverse},
title = {{NELA-GT-2018}},
year = {2019},
version = {V4},
doi = {10.7910/DVN/ULHLCB},
url = {https://doi.org/10.7910/DVN/ULHLCB},
note = {Dataset for article "NELA-GT-2018: A Large Multi-Labelled News Dataset for the Study of Misinformation in News Articles". (2019-01-15) }
}
@article{Haselmayer2017,
abstract = {? 2016 The Author(s)Sentiment is important in studies of news values, public opinion, negative campaigning or political polarization and an explosive expansion of digital textual data and fast progress in automated text analysis provide vast opportunities for innovative social science research. Unfortunately, tools currently available for automated sentiment analysis are mostly restricted to English texts and require considerable contextual adaption to produce valid results. We present a procedure for collecting fine-grained sentiment scores through crowdcoding to build a negative sentiment dictionary in a language and for a domain of choice. The dictionary enables the analysis of large text corpora that resource-intensive hand-coding struggles to cope with. We calculate the tonality of sentences from dictionary words and we validate these estimates with results from manual coding. The results show that the crowdbased dictionary provides efficient and valid measurement of sentiment. Empirical examples illustrate its use by analyzing the tonality of party statements and media reports.},
author = {Haselmayer, Martin and Jenny, Marcelo},
doi = {10.1007/s11135-016-0412-4},
file = {:home/damian/SURFdrive/literatuur-mendeley/Haselmayer, Jenny{\_}2017.pdf:pdf},
issn = {0033-5177},
journal = {Quality {\&} Quantity},
keywords = {Crowdcoding,Media negativity,Negative campaigning,Political communication,Sentiment analysis},
month = {nov},
number = {6},
pages = {2623--2646},
publisher = {Springer Netherlands},
title = {{Sentiment analysis of political communication: combining a dictionary approach with crowdcoding}},
volume = {51},
year = {2017}
}
@article{Boukes2020,
abstract = {This article scrutinizes the method of automated content analysis to measure the tone of news coverage. We compare a range of off-the-shelf sentiment analysis tools to manually coded economic news as well as examine the agreement between these dictionary approaches themselves. We assess the performance of five off-the-shelf sentiment analysis tools and two tailor-made dictionary-based approaches. The analyses result in five conclusions. First, there is little overlap between the off-the-shelf tools; causing wide divergence in terms of tone measurement. Second, there is no stronger overlap with manual coding for short texts (i.e., headlines) than for long texts (i.e., full articles). Third, an approach that combines individual dictionaries achieves a comparably good performance. Fourth, precision may increase to acceptable levels at higher levels of granularity. Fifth, performance of dictionary approaches depends more on the number of relevant keywords in the dictionary than on the number of valenced words as such; a small tailor-made lexicon was not inferior to large established dictionaries. Altogether, we conclude that off-the-shelf sentiment analysis tools are mostly unreliable and unsuitable for research purposes–at least in the context of Dutch economic news–and manual validation for the specific language, domain, and genre of the research project at hand is always warranted.},
author = {Boukes, Mark and van de Velde, Bob and Araujo, Theo and Vliegenthart, Rens},
doi = {10.1080/19312458.2019.1671966},
file = {:home/damian/SURFdrive/literatuur-mendeley/Boukes et al.{\_}2019.pdf:pdf},
issn = {1931-2458},
journal = {Communication Methods and Measures},
month = {apr},
number = {2},
pages = {83--104},
publisher = {Routledge},
title = {{What's the Tone? Easy Doesn't Do It: Analyzing Performance and Agreement Between Off-the-Shelf Sentiment Analysis Tools}},
url = {https://www.tandfonline.com/doi/full/10.1080/19312458.2019.1671966},
volume = {14},
year = {2020}
}
@article{vanatteveldt20,
title={The Validity of Sentiment Analysis: {C}omparing Manual Annotation, Crowd-Coding, Dictionary Approaches, and Machine Learning Algorithms},
author = {Wouter {Van Atteveldt} and Mariken A.C.G. {Van der Velden} and Mark Boukes},
year = {2020},
journal = {Computational Methods and Measures}
}
@misc{Pennebaker2007,
address = {Austin; TX},
author = {Pennebaker, J. W. and Booth, R. J. and Francis, M. E.},
publisher = {LIWC.net},
title = {{Linguistic Inquiry and Word Count: LIWC}},
year = {2007}
}
@article{VanAtteveldt2021,
abstract = {Sentiment is central to many studies of communication science, from negativity and polarization in political communication to analyzing product reviews and social media comments in other sub-fields. This study provides an exhaustive comparison of sentiment analysis methods, using a validation set of Dutch economic headlines to compare the performance of manual annotation, crowd coding, numerous dictionaries and machine learning using both traditional and deep learning algorithms. The three main conclusions of this article are that: (1) The best performance is still attained with trained human or crowd coding; (2) None of the used dictionaries come close to acceptable levels of validity; and (3) machine learning, especially deep learning, substantially outperforms dictionary-based methods but falls short of human performance. From these findings, we stress the importance of always validating automatic text analysis methods before usage. Moreover, we provide a recommended step-by-step approach for (automated) text analysis projects to ensure both efficiency and validity.},
author = {van Atteveldt, Wouter and van der Velden, Mariken A.C.G. and Boukes, Mark},
doi = {10.1080/19312458.2020.1869198},
file = {:home/damian/SURFdrive/literatuur-mendeley/Atteveldt, Velden, Boukes{\_}2021.pdf:pdf},
issn = {19312466},
journal = {Communication Methods and Measures},
keywords = {Automated Approaches,Evaluation,Manual Annotation,Measurement,Sentiment Analysis,Validity},
number = {00},
pages = {1--20},
publisher = {Routledge},
title = {{The Validity of Sentiment Analysis:Comparing Manual Annotation, Crowd-Coding, Dictionary Approaches, and Machine Learning Algorithms}},
url = {https://doi.org/10.1080/19312458.2020.1869198},
volume = {00},
year = {2021}
}
@article{Burscher2015,
author = {Burscher, Bj{\"{o}}rn and Vliegenthart, Rens and {De Vreese}, C. H.},
doi = {10.1177/0002716215569441},
file = {:home/damian/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Burscher, Vliegenthart, De Vreese - 2015 - Using supervised machine learning to code policy issues Can classifiers generalize across con.pdf:pdf},
issn = {0002-7162},
journal = {The ANNALS of the American Academy of Political and Social Science},
mendeley-groups = {papers/damianpetro},
number = {1},
pages = {122--131},
title = {{Using supervised machine learning to code policy issues: Can classifiers generalize across contexts?}},
volume = {659},
year = {2015}
}
@article{Vermeer2020c,
abstract = {The complexity and diversity of today's media landscape provides many challenges for scholars studying online news consumption. Yet it is unclear how news consumers navigate online. Moving forward, we used a custom-built browser plug-in—passively tracking Dutch online news consumers 24/7—to examine how context (website) and content (news topic) features affect patterns of online news consumption. This resulted in a data set containing more than one million Web pages, from 175 websites (news websites, search engines, social media), collected over 8 months in 2017/18. We used automated content analysis to retrieve news topics, and estimated Markov chains to detect consumption patterns. Our findings indicate that news consumers often directly visit their favorite (typically mainstream) news outlet, and continue browsing within that outlet. We also found a strong preference for entertainment news over any other topic. Although social media often offer entertainment news, they are not necessarily the starting point to such news.},
author = {Vermeer, Susan and Trilling, Damian and Kruikemeier, Sanne and de Vreese, Claes},
doi = {10.1080/21670811.2020.1767509},
file = {:home/damian/SURFdrive/literatuur-mendeley/Vermeer et al.{\_}2020(2).pdf:pdf},
issn = {2167082X},
journal = {Digital Journalism},
keywords = {Markov chains,Online news consumption,entertainment news,political news,social media,supervised machine learning},
number = {9},
pages = {1114--1141},
publisher = {Routledge},
title = {{Online News User Journeys: The Role of Social Media, News Websites, and Topics}},
url = {https://doi.org/10.1080/21670811.2020.1767509},
volume = {8},
year = {2020}
}
@article{Hopkins2010,
author = {Hopkins, Daniel J. and King, Gary},
file = {:home/damian/SURFdrive/literatuur-mendeley/Hopkins, King{\_}2010.pdf:pdf},
journal = {American Journal of Political Science},
number = {1},
pages = {229--247},
title = {{A method of automated nonparametric content analysis for social science}},
volume = {54},
year = {2010}
}
@article{Trilling2021,
title = {Between article and topic: News events as level of analysis and their computational identification},
author = {Damian Trilling and Marieke {van Hoof}},
journal = {Digital Journalism},
year = {2020},
volume = {8},
issue = {10},
pages = {1317--1337},
doi = {10.1080/21670811.2020.1839352}
}
@article{Traag2019,
abstract = {Community detection is often used to understand the structure of large and complex networks. One of the most popular algorithms for uncovering community structure is the so-called Louvain algorithm. We show that this algorithm has a major defect that largely went unnoticed until now: the Louvain algorithm may yield arbitrarily badly connected communities. In the worst case, communities may even be disconnected, especially when running the algorithm iteratively. In our experimental analysis, we observe that up to 25{\%} of the communities are badly connected and up to 16{\%} are disconnected. This may present serious issues in subsequent analyses. To address this problem, we introduce the Leiden algorithm. We prove that the Leiden algorithm yields communities that are guaranteed to be connected. In addition, we prove that, when the Leiden algorithm is applied iteratively, it converges to a partition in which all subsets of all communities are locally optimally assigned. Furthermore, by relying on a fast local move approach, the Leiden algorithm runs faster than the Louvain algorithm. We demonstrate the performance of the Leiden algorithm for several benchmark and real-world networks. We find that the Leiden algorithm is faster than the Louvain algorithm and uncovers better partitions, in addition to providing explicit guarantees. Based on our results, we conclude that the Leiden algorithm is preferable to the Louvain algorithm.},
author = {Traag, V. A. and Waltman, L. and van Eck, N. J.},
doi = {10.1038/s41598-019-41695-z},
file = {:Users/damian/Dropbox/uva/literatuur-mendeley/Traag, Waltman, van Eck{\_}2019.pdf:pdf},
isbn = {4159801941695},
issn = {20452322},
journal = {Scientific Reports},
number = {1},
pages = {1--12},
title = {{From Louvain to Leiden: guaranteeing well-connected communities}},
volume = {9},
year = {2019}
}
@article{Traag2015,
doi = {10.1103/physreve.92.022816},
year = {2015},
publisher = {American Physical Society ({APS})},
volume = {92},
number = {2},
author = {V. A. Traag and R. Aldecoa and J.-C. Delvenne},
title = {Detecting communities using asymptotical surprise},
journal = {Physical Review E}
}
@inproceedings{Nothman2018,
abstract = {Open-source software (OSS) packages for natural language processing often include stop word lists. Users may apply them without awareness of their surprising omissions (e.g. hasn't but not hadn't) and inclusions (e.g. computer), or their incompatibility with particular tokenizers. Motivated by issues raised about the Scikit-learn stop list, we investigate variation among and consistency within 52 popular English-language stop lists, and propose strategies for mitigating these issues.},
address = {Stroudsburg, PA, USA},
author = {Nothman, Joel and Qin, Hanmin and Yurchak, Roman},
booktitle = {Proceedings of Workshop for NLP Open Source Software (NLP-OSS)},
doi = {10.18653/v1/W18-2502},
file = {:home/damian/SURFdrive/literatuur-mendeley/Nothman, Qin, Yurchak{\_}2018.pdf:pdf},
pages = {7--12},
publisher = {Association for Computational Linguistics},
title = {{Stop Word Lists in Free Open-source Software Packages}},
year = {2018}
}