load required packages
require(readtext)
## Loading required package: readtext
require(LIWCalike)
## Loading required package: LIWCalike
require(quanteda)
## Loading required package: quanteda
## Package version: 2.1.2
## Parallel computing: 2 of 8 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
##
## View
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(tidytext)
## Loading required package: tidytext
To get a better sense of which terms are distinctively associated with which books, calculate term frequency-inverse document frequency (tf-idf).
tfidf_nietzsche <- dfm_tfidf(dfmat_nietzsche)
transposetfidf_nietzsche <- t(tfidf_nietzsche)
tfidf.df <- as.data.frame(transposetfidf_nietzsche)
## Warning: 'as.data.frame.dfm' is deprecated.
## Use 'convert(x, to = "data.frame")' instead.
## See help("Deprecated")
a <- tfidf.df[order(tfidf.df$A.txt, decreasing = TRUE),]
head(a)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 703 instinkt 13.054850 12.817489 1.1868046 2.136248 0.0000000 0.4747218
## 481 bloss 10.206519 1.898887 0.4747218 3.085692 0.0000000 1.1868046
## 561 décadent 8.797017 0.000000 0.0000000 7.443630 0.0000000 0.0000000
## 671 décadenc 7.443630 0.000000 0.0000000 8.797017 0.0000000 0.0000000
## 519 sokrat 6.785460 1.995724 11.5751966 0.000000 0.7982894 0.0000000
## 3430 l'art 6.413059 3.206529 0.0000000 0.000000 0.0000000 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 703 8.5449930 10.4438803 2.8483310 0.000000 0.2373609 0.0000000 0
## 481 9.2570757 3.7977747 0.2373609 0.000000 0.0000000 0.7120827 0
## 561 10.1504041 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0
## 671 4.0601617 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0
## 519 0.9978618 0.3991447 1.7961512 1.397006 1.3970065 0.1995724 0
## 3430 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 703 0.2373609 13.054850 0 0 0 0
## 481 0.9494437 10.206519 0 0 0 0
## 561 0.0000000 8.797017 0 0 0 0
## 671 0.0000000 7.443630 0 0 0 0
## 519 0.3991447 6.785460 0 0 0 0
## 3430 0.0000000 6.413059 0 0 0 0
bge <- tfidf.df[order(tfidf.df$BGE.txt, decreasing = TRUE),]
head(bge)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 703 instinkt 13.0548504 12.817489 1.1868046 2.1362482 0.000000 0.4747218
## 2344 europa 0.9888615 12.031148 0.4944307 0.6592410 4.944307 0.0000000
## 405 moral 5.9544158 9.547598 1.1292858 1.3346104 7.597013 0.3079870
## 5232 ideen 0.0000000 9.015927 1.1269908 0.0000000 0.000000 0.7513272
## 226 philosophen 2.3612339 8.931624 0.3079870 0.7186364 2.463896 0.1026623
## 4903 hauptstück 0.0000000 8.799512 0.0000000 0.0000000 0.000000 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 703 8.5449930 10.443880 2.848331 0.000000 0.2373609 0.0000000 0.0000000
## 2344 1.6481025 3.625825 5.933169 2.142533 3.2962050 0.0000000 0.1648102
## 405 4.3118184 3.901169 4.619805 2.874546 2.7718832 0.4106494 0.0000000
## 5232 0.7513272 2.253982 2.253982 0.000000 0.7513272 0.0000000 0.0000000
## 226 1.2319481 3.079870 3.695844 3.285195 1.2319481 0.2053247 0.4106494
## 4903 0.0000000 0.000000 0.000000 8.799512 0.0000000 0.0000000 0.0000000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 703 0.2373609 13.0548504 0.0000000 0 0 0.0000000
## 2344 0.0000000 0.9888615 0.0000000 0 0 0.3296205
## 405 0.1026623 5.9544158 0.2053247 0 0 0.0000000
## 5232 0.7513272 0.0000000 0.0000000 0 0 0.0000000
## 226 5.3384418 2.3612339 0.0000000 0 0 0.0000000
## 4903 0.0000000 0.0000000 0.0000000 0 0 0.0000000
bt <- tfidf.df[order(tfidf.df$BT.txt, decreasing = TRUE),]
head(bt)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt EH.txt
## 12466 euripid 0.0000000 0 57.68569 0.0000000 0 0 0.0000000
## 3048 dionysischen 4.0584852 0 51.60074 0.0000000 0 0 4.6382688
## 11768 dionysus 0.0000000 0 49.87139 0.0000000 0 0 0.0000000
## 11534 chor 0.0000000 0 43.01984 0.0000000 0 0 0.9777236
## 11453 mythus 0.0000000 0 39.54759 0.5006024 0 0 0.0000000
## 3099 apollinischen 0.6766936 0 39.24823 0.0000000 0 0 0.0000000
## GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt SE.txt TI.txt
## 12466 0 0.000000 0.9777236 0.000000 0.0000000 0.000000 0.0000000 0.0000000
## 3048 0 1.739351 0.0000000 0.000000 0.0000000 0.000000 0.0000000 4.0584852
## 11768 0 0.000000 0.0000000 0.000000 0.0000000 0.000000 0.0000000 0.0000000
## 11534 0 0.000000 0.0000000 0.000000 0.0000000 0.000000 0.0000000 0.0000000
## 11453 0 0.000000 1.5018071 1.501807 0.0000000 5.506626 0.5006024 0.0000000
## 3099 0 0.000000 0.0000000 0.000000 0.6766936 0.000000 0.0000000 0.6766936
## Z1.txt Z2.txt Z3.txt Z4.txt
## 12466 0 0 0 0
## 3048 0 0 0 0
## 11768 0 0 0 0
## 11534 0 0 0 0
## 11453 0 0 0 0
## 3099 0 0 0 0
cw <- tfidf.df[order(tfidf.df$CW.txt, decreasing = TRUE),]
head(cw)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 2513 wagner 0.3296205 2.4721537 1.153672 29.501035 0.659241 0.0000000
## 16047 brahm 0.0000000 0.0000000 0.000000 10.230029 0.000000 0.0000000
## 15395 wagnern 0.0000000 0.0000000 0.000000 9.619588 0.000000 0.0000000
## 422 musik 0.9239611 4.1064937 17.657923 8.931624 6.467728 0.5133117
## 671 décadenc 7.4436297 0.0000000 0.000000 8.797017 0.000000 0.0000000
## 10363 bizet 0.0000000 0.9777236 0.000000 7.821789 0.000000 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 2513 9.723805 2.3073435 2.6369640 0.3296205 0.9888615 0.0000000 18.953179
## 16047 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000
## 15395 3.206529 0.0000000 0.8016323 0.0000000 0.0000000 0.0000000 0.000000
## 422 2.669221 0.9239611 6.6730522 4.2091560 9.2396108 0.2053247 8.212987
## 671 4.060162 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000
## 10363 0.000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.000000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 2513 0.6592410 0.3296205 0 0 0 0
## 16047 0.0000000 0.0000000 0 0 0 0
## 15395 0.0000000 0.0000000 0 0 0 0
## 422 0.5133117 0.9239611 0 0 0 0
## 671 0.0000000 7.4436297 0 0 0 0
## 10363 0.0000000 0.0000000 0 0 0 0
d <- tfidf.df[order(tfidf.df$D.txt, decreasing = TRUE),]
head(d)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 16259 diess 0.0000000 0.0000000 0.000000 0 51.01191 0.0000000
## 3474 unser 0.2053247 0.9239611 5.338442 0 19.19786 4.0038313
## 16280 sodass 0.0000000 0.0000000 0.000000 0 15.07437 0.0000000
## 16260 sittlichkeit 0.0000000 0.0000000 0.000000 0 13.14823 0.3756636
## 16263 desshalb 0.0000000 0.0000000 0.000000 0 12.02124 0.0000000
## 16272 intellect 0.0000000 0.0000000 0.000000 0 11.01589 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 16259 0 0.0000000 30.105389 38.7467505 38.746751 0.000000 10.3138832
## 3474 0 0.1026623 10.676884 11.2928576 9.958247 4.209156 3.1825326
## 16280 0 0.0000000 6.377620 0.5797836 7.537187 0.000000 0.5797836
## 16260 0 2.2539817 2.629645 0.7513272 1.126991 0.000000 0.3756636
## 16263 0 0.0000000 4.883627 27.7991074 21.412826 0.000000 0.0000000
## 16272 0 0.0000000 5.797836 14.4945899 5.218052 0.000000 0.5797836
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 16259 0.2787536 0.0000000 9.4776224 15.0526945 7.5263472 10.8713904
## 3474 2.3612339 0.2053247 0.2053247 0.0000000 0.0000000 0.1026623
## 16280 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 16260 1.5026545 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 16263 0.0000000 0.0000000 2.2539817 0.3756636 0.3756636 0.3756636
## 16272 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
ds <- tfidf.df[order(tfidf.df$DS.txt, decreasing = TRUE),]
head(ds)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 2421 strauss 0.3756636 0.000000 0.000000 0.3756636 0.0000000 54.84689
## 20907 kultur 0.0000000 0.000000 0.000000 0.0000000 0.0000000 54.75252
## 21026 philist 0.0000000 0.000000 0.000000 0.0000000 0.0000000 33.83468
## 21293 magist 0.0000000 0.000000 0.000000 0.0000000 0.0000000 33.24759
## 11338 s 0.0000000 1.298044 2.271578 0.3245111 0.3245111 24.66284
## 21267 straussischen 0.0000000 0.000000 0.000000 0.0000000 0.0000000 20.46006
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt SE.txt
## 2421 1.126991 0.000000 0.0000000 0 0.3756636 0.3756636 0 0.000000
## 20907 0.000000 0.000000 0.0000000 0 0.0000000 0.0000000 0 47.908457
## 21026 0.000000 0.000000 0.6766936 0 0.6766936 0.0000000 0 1.353387
## 21293 0.000000 0.000000 0.0000000 0 0.0000000 0.0000000 0 0.000000
## 11338 1.298044 5.516689 0.3245111 0 0.3245111 0.0000000 0 0.000000
## 21267 0.000000 0.000000 0.0000000 0 0.0000000 0.0000000 0 0.000000
## TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 2421 0.3756636 0 0 0.3756636 0
## 20907 0.0000000 0 0 0.0000000 0
## 21026 0.0000000 0 0 0.0000000 0
## 21293 0.0000000 0 0 0.0000000 0
## 11338 0.0000000 0 0 0.0000000 0
## 21267 0.0000000 0 0 0.0000000 0
eh <- tfidf.df[order(tfidf.df$EH.txt, decreasing = TRUE),]
head(eh)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 1307 zarathustra 0.7982894 0.1995724 1.1974341 0.1995724 0.000000 0.000000
## 15990 < 0.0000000 0.0000000 0.0000000 0.6766936 2.030081 0.000000
## 15991 > 0.0000000 0.0000000 0.0000000 0.6766936 2.030081 0.000000
## 561 décadent 8.7970169 0.0000000 0.0000000 7.4436297 0.000000 0.000000
## 2513 wagner 0.3296205 2.4721537 1.1536717 29.5010345 0.659241 0.000000
## 481 bloss 10.2065194 1.8988873 0.4747218 3.0856919 0.000000 1.186805
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 1307 15.367071 0.7982894 0.9978618 0.0000000 0.0000000 0.0000000 0.00000
## 15990 14.210566 0.0000000 0.0000000 0.6766936 0.0000000 0.0000000 0.00000
## 15991 14.210566 0.0000000 0.0000000 0.6766936 0.0000000 0.0000000 0.00000
## 561 10.150404 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.00000
## 2513 9.723805 2.3073435 2.6369640 0.3296205 0.9888615 0.0000000 18.95318
## 481 9.257076 3.7977747 0.2373609 0.0000000 0.0000000 0.7120827 0.00000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 1307 0.0000000 0.7982894 27.34141 18.7598 18.95937 52.88667
## 15990 0.0000000 0.0000000 0.00000 0.0000 0.00000 0.00000
## 15991 0.0000000 0.0000000 0.00000 0.0000 0.00000 0.00000
## 561 0.0000000 8.7970169 0.00000 0.0000 0.00000 0.00000
## 2513 0.6592410 0.3296205 0.00000 0.0000 0.00000 0.00000
## 481 0.9494437 10.2065194 0.00000 0.0000 0.00000 0.00000
gm <- tfidf.df[order(tfidf.df$GM.txt, decreasing = TRUE),]
head(gm)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 12822 asketisch 0.000000 0.000000 0.6766936 0.0000000 1.3533872 0.6766936
## 18561 asketischen 0.000000 0.000000 0.0000000 0.0000000 0.6766936 0.0000000
## 2805 ressenti 1.353387 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 25121 schuldner 0.000000 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 241 ideal 1.437273 1.847922 1.2319481 0.7186364 2.5665585 0.3079870
## 703 instinkt 13.054850 12.817489 1.1868046 2.1362482 0.0000000 0.4747218
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 12822 0.0000000 39.24823 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 18561 0.6766936 27.06774 0.000000 0.6766936 0.0000000 0.0000000 0.0000000
## 2805 4.7368553 19.62411 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 25121 0.0000000 14.06629 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 241 2.7718832 11.90883 2.874546 0.4106494 1.5399351 0.1026623 0.1026623
## 703 8.5449930 10.44388 2.848331 0.0000000 0.2373609 0.0000000 0.0000000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 12822 0.0000000 0.000000 0 0 0 0
## 18561 0.0000000 0.000000 0 0 0 0
## 2805 0.0000000 1.353387 0 0 0 0
## 25121 0.0000000 0.000000 0 0 0 0
## 241 0.8212987 1.437273 0 0 0 0
## 703 0.2373609 13.054850 0 0 0 0
gs <- tfidf.df[order(tfidf.df$GS.txt, decreasing = TRUE),]
head(gs)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt EH.txt
## 16259 diess 0.0000000 0.0000000 0.000000 0.0000000 51.01191 0.000000 0
## 3474 unser 0.2053247 0.9239611 5.338442 0.0000000 19.19786 4.003831 0
## 27954 chamfort 0.0000000 0.0000000 0.000000 0.0000000 0.00000 0.000000 0
## 27990 brutus 0.0000000 0.0000000 0.000000 0.0000000 0.00000 0.000000 0
## 9551 corrupt 0.0000000 3.3834680 0.000000 0.0000000 0.00000 1.353387 0
## 15482 b 0.0000000 0.0000000 0.000000 0.6766936 10.82710 0.000000 0
## GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt SE.txt
## 16259 0.0000000 30.105389 38.746751 38.746751 0.000000 10.313883 0.2787536
## 3474 0.1026623 10.676884 11.292858 9.958247 4.209156 3.182533 2.3612339
## 27954 0.0000000 10.230029 0.000000 0.000000 0.000000 0.000000 0.0000000
## 27990 0.0000000 9.777236 0.000000 0.000000 0.000000 0.000000 0.9777236
## 9551 0.0000000 8.797017 1.353387 0.000000 0.000000 0.000000 0.0000000
## 15482 0.0000000 8.120323 0.000000 2.706774 0.000000 0.000000 0.0000000
## TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 16259 0.0000000 9.4776224 15.05269 7.526347 10.8713904
## 3474 0.2053247 0.2053247 0.00000 0.000000 0.1026623
## 27954 0.0000000 0.0000000 0.00000 0.000000 0.0000000
## 27990 0.0000000 0.0000000 0.00000 0.000000 0.0000000
## 9551 0.0000000 0.0000000 0.00000 0.000000 0.0000000
## 15482 0.0000000 0.0000000 0.00000 0.000000 0.0000000
hh1 <- tfidf.df[order(tfidf.df$HH1.txt, decreasing = TRUE),]
head(hh1)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 16259 diess 0.000000 0.000000 0.0000000 0.0000000 51.011909 0.0000000
## 16263 desshalb 0.000000 0.000000 0.0000000 0.0000000 12.021236 0.0000000
## 2339 cultur 2.254635 2.785137 7.4270317 0.1326256 2.917762 0.0000000
## 209 handlungen 1.397006 1.995724 0.9978618 0.0000000 9.579473 0.1995724
## 16272 intellect 0.000000 0.000000 0.0000000 0.0000000 11.015888 0.0000000
## 28176 ueberzeugungen 0.000000 0.000000 0.0000000 0.0000000 0.000000 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 16259 0.0000000 0.000000 30.105389 38.74675 38.746751 0.000000 10.3138832
## 16263 0.0000000 0.000000 4.883627 27.79911 21.412826 0.000000 0.0000000
## 2339 1.4588812 1.326256 2.652511 15.51719 4.111393 4.244018 1.4588812
## 209 0.3991447 1.197434 2.794013 15.36707 3.193158 0.000000 0.3991447
## 16272 0.0000000 0.000000 5.797836 14.49459 5.218052 0.000000 0.5797836
## 28176 0.0000000 0.000000 4.809794 13.62775 2.404897 0.000000 0.0000000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 16259 0.2787536 0.000000 9.477622 15.0526945 7.5263472 10.8713904
## 16263 0.0000000 0.000000 2.253982 0.3756636 0.3756636 0.3756636
## 2339 1.1936301 2.254635 0.000000 0.0000000 0.0000000 0.0000000
## 209 0.0000000 1.397006 0.000000 0.0000000 0.0000000 0.0000000
## 16272 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0000000
## 28176 0.0000000 0.000000 0.000000 0.0000000 0.0000000 0.0000000
hh2 <- tfidf.df[order(tfidf.df$HH2.txt, decreasing = TRUE),]
head(hh2)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt EH.txt
## 16259 diess 0.0000000 0.0000000 0.000000 0 51.011909 0.000000 0.000000
## 16263 desshalb 0.0000000 0.0000000 0.000000 0 12.021236 0.000000 0.000000
## 21532 autor 0.0000000 0.0000000 0.000000 0 0.000000 5.506626 1.001205
## 35896 pyrrhon 0.0000000 0.0000000 0.000000 0 0.000000 0.000000 0.000000
## 3474 unser 0.2053247 0.9239611 5.338442 0 19.197858 4.003831 0.000000
## 2752 freud 0.1648102 0.1648102 1.812913 0 4.944307 0.659241 0.000000
## GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt SE.txt
## 16259 0.0000000 30.105389 38.746751 38.746751 0.000000 10.3138832 0.2787536
## 16263 0.0000000 4.883627 27.799107 21.412826 0.000000 0.0000000 0.0000000
## 21532 0.0000000 0.000000 7.008433 15.518673 1.001205 0.0000000 1.0012047
## 35896 0.0000000 0.000000 0.000000 11.508782 0.000000 0.0000000 0.0000000
## 3474 0.1026623 10.676884 11.292858 9.958247 4.209156 3.1825326 2.3612339
## 2752 1.1536717 3.625825 6.757220 9.723805 0.000000 0.3296205 0.4944307
## TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 16259 0.0000000 9.4776224 15.0526945 7.5263472 10.8713904
## 16263 0.0000000 2.2539817 0.3756636 0.3756636 0.3756636
## 21532 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 35896 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 3474 0.2053247 0.2053247 0.0000000 0.0000000 0.1026623
## 2752 0.1648102 0.0000000 0.1648102 0.0000000 0.0000000
hl <- tfidf.df[order(tfidf.df$HL.txt, decreasing = TRUE),]
head(hl)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 37253 weltprozess 0.0000000 0.0000000 0.0000000 0 0.0000000 0
## 975 histori 0.5987171 0.5987171 0.1995724 0 0.3991447 0
## 36614 antiquarisch 0.0000000 0.0000000 0.0000000 0 0.0000000 0
## 24308 unhistorisch 0.0000000 0.0000000 0.0000000 0 0.0000000 0
## 8361 historischen 0.0000000 1.3937680 1.9512752 0 0.8362608 0
## 12313 uebermaass 0.0000000 0.0000000 4.7368553 0 0.0000000 0
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 37253 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 30.69009 0.0000000
## 975 0.0000000 1.3970065 0.1995724 0.5987171 2.9935853 15.76622 0.9978618
## 36614 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 14.06629 0.0000000
## 24308 0.0000000 0.8016323 0.0000000 0.8016323 0.0000000 12.02449 0.0000000
## 8361 0.5575072 1.3937680 1.6725216 2.2300288 0.8362608 11.15014 0.2787536
## 12313 0.0000000 0.0000000 1.3533872 0.0000000 2.0300808 10.82710 0.0000000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 37253 0.000000 0.0000000 0 0 0 0
## 975 1.397006 0.5987171 0 0 0 0
## 36614 0.000000 0.0000000 0 0 0 0
## 24308 0.000000 0.0000000 0 0 0 0
## 8361 0.000000 0.0000000 0 0 0 0
## 12313 0.000000 0.0000000 0 0 0 0
rwb <- tfidf.df[order(tfidf.df$RWB.txt, decreasing = TRUE),]
head(rwb)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 2513 wagner 0.3296205 2.4721537 1.153672 29.501035 0.659241 0.0000000
## 10457 wagnerischen 0.0000000 0.6766936 0.000000 4.060162 0.000000 0.0000000
## 16259 diess 0.0000000 0.0000000 0.000000 0.000000 51.011909 0.0000000
## 422 musik 0.9239611 4.1064937 17.657923 8.931624 6.467728 0.5133117
## 37654 nibelungen 0.0000000 0.0000000 0.000000 0.000000 0.000000 0.0000000
## 38153 styl 0.0000000 0.0000000 0.000000 0.000000 0.000000 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 2513 9.723805 2.3073435 2.636964 0.3296205 0.9888615 0.0000000 18.953179
## 10457 0.000000 0.0000000 0.000000 0.0000000 0.6766936 0.0000000 10.827098
## 16259 0.000000 0.0000000 30.105389 38.7467505 38.7467505 0.0000000 10.313883
## 422 2.669221 0.9239611 6.673052 4.2091560 9.2396108 0.2053247 8.212987
## 37654 0.000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 7.672522
## 38153 0.000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000 7.672522
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 2513 0.6592410 0.3296205 0.000000 0.00000 0.000000 0.00000
## 10457 0.0000000 0.0000000 0.000000 0.00000 0.000000 0.00000
## 16259 0.2787536 0.0000000 9.477622 15.05269 7.526347 10.87139
## 422 0.5133117 0.9239611 0.000000 0.00000 0.000000 0.00000
## 37654 0.0000000 0.0000000 0.000000 0.00000 0.000000 0.00000
## 38153 0.0000000 0.0000000 0.000000 0.00000 0.000000 0.00000
se <- tfidf.df[order(tfidf.df$SE.txt, decreasing = TRUE),]
head(se)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt
## 20907 kultur 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 38718 erwerbenden 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 39153 universitätsphilosophi 0.000000 0.0000000 0.0000000 0.0000000 0.0000000
## 1573 schopenhau 1.334610 1.2319481 1.4372728 0.4106494 2.5665585
## 10914 genius 0.000000 0.4747218 5.4593011 0.0000000 0.9494437
## 923 philosophi 1.026623 6.6730522 0.8212987 0.4106494 2.4638962
## DS.txt EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt
## 20907 54.7525219 0.000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000
## 38718 0.0000000 0.000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000
## 39153 0.0000000 0.000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000
## 1573 2.4638962 1.334610 2.5665585 2.0532468 1.847922 1.7452598 0.2053247
## 10914 1.6615264 0.000000 0.2373609 0.4747218 6.646106 0.4747218 0.4747218
## 923 0.7186364 1.437273 1.9505845 2.3612339 3.798507 2.5665585 1.0266234
## RWB.txt SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 20907 0.0000000 47.908457 0.000000 0 0 0 0
## 38718 0.0000000 7.672522 0.000000 0 0 0 0
## 39153 0.0000000 7.672522 0.000000 0 0 0 0
## 1573 0.3079870 7.289026 1.334610 0 0 0 0
## 10914 0.9494437 7.120827 0.000000 0 0 0 0
## 923 0.6159741 6.570390 1.026623 0 0 0 0
ti <- tfidf.df[order(tfidf.df$TI.txt, decreasing = TRUE),]
head(ti)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 703 instinkt 13.054850 12.817489 1.1868046 2.136248 0.0000000 0.4747218
## 481 bloss 10.206519 1.898887 0.4747218 3.085692 0.0000000 1.1868046
## 561 décadent 8.797017 0.000000 0.0000000 7.443630 0.0000000 0.0000000
## 671 décadenc 7.443630 0.000000 0.0000000 8.797017 0.0000000 0.0000000
## 519 sokrat 6.785460 1.995724 11.5751966 0.000000 0.7982894 0.0000000
## 3430 l'art 6.413059 3.206529 0.0000000 0.000000 0.0000000 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 703 8.5449930 10.4438803 2.8483310 0.000000 0.2373609 0.0000000 0
## 481 9.2570757 3.7977747 0.2373609 0.000000 0.0000000 0.7120827 0
## 561 10.1504041 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0
## 671 4.0601617 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0
## 519 0.9978618 0.3991447 1.7961512 1.397006 1.3970065 0.1995724 0
## 3430 0.0000000 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 703 0.2373609 13.054850 0 0 0 0
## 481 0.9494437 10.206519 0 0 0 0
## 561 0.0000000 8.797017 0 0 0 0
## 671 0.0000000 7.443630 0 0 0 0
## 519 0.3991447 6.785460 0 0 0 0
## 3430 0.0000000 6.413059 0 0 0 0
z1 <- tfidf.df[order(tfidf.df$Z1.txt, decreasing = TRUE),]
head(z1)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt
## 1307 zarathustra 0.7982894 0.1995724 1.1974341 0.1995724 0.0000000
## 23925 übermenschen 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 16259 diess 0.0000000 0.0000000 0.0000000 0.0000000 51.0119090
## 5415 überflüssigen 0.0000000 1.6032647 0.0000000 0.0000000 0.0000000
## 12637 wahrlich 0.0000000 0.0000000 0.1995724 0.0000000 0.9978618
## 13170 bruder 0.0000000 0.0000000 0.7513272 0.0000000 0.3756636
## DS.txt EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt
## 1307 0.0000000 15.3670713 0.7982894 0.9978618 0.000000 0.0000000 0.0000000
## 23925 0.0000000 1.7393508 0.0000000 0.0000000 0.000000 0.0000000 0.0000000
## 16259 0.0000000 0.0000000 0.0000000 30.1053889 38.746751 38.7467505 0.0000000
## 5415 0.0000000 0.0000000 0.8016323 0.0000000 0.000000 0.0000000 0.0000000
## 12637 0.3991447 0.7982894 0.0000000 1.9957235 0.000000 0.1995724 0.1995724
## 13170 0.0000000 0.0000000 0.0000000 0.3756636 1.126991 0.7513272 0.0000000
## RWB.txt SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 1307 0.0000000 0.0000000 0.7982894 27.341413 18.75980 18.959374 52.8866740
## 23925 0.0000000 0.0000000 0.0000000 10.436105 6.37762 1.159567 1.1595672
## 16259 10.3138832 0.2787536 0.0000000 9.477622 15.05269 7.526347 10.8713904
## 5415 0.0000000 0.0000000 0.0000000 8.016323 0.00000 0.000000 0.0000000
## 12637 0.0000000 0.3991447 0.0000000 7.384177 14.36921 10.178190 7.3841771
## 13170 0.7513272 0.0000000 0.0000000 6.010618 0.00000 0.000000 0.7513272
z2 <- tfidf.df[order(tfidf.df$Z2.txt, decreasing = TRUE),]
head(z2)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 1307 zarathustra 0.7982894 0.1995724 1.1974341 0.1995724 0.0000000 0.0000000
## 16259 diess 0.0000000 0.0000000 0.0000000 0.0000000 51.0119090 0.0000000
## 12637 wahrlich 0.0000000 0.0000000 0.1995724 0.0000000 0.9978618 0.3991447
## 40177 feuerhund 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 22944 gesindel 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 8635 krüppel 0.0000000 0.8016323 0.0000000 0.0000000 0.0000000 0.0000000
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 1307 15.3670713 0.7982894 0.9978618 0.00000 0.0000000 0.0000000 0.00000
## 16259 0.0000000 0.0000000 30.1053889 38.74675 38.7467505 0.0000000 10.31388
## 12637 0.7982894 0.0000000 1.9957235 0.00000 0.1995724 0.1995724 0.00000
## 40177 0.0000000 0.0000000 0.0000000 0.00000 0.0000000 0.0000000 0.00000
## 22944 1.7393508 0.0000000 0.0000000 0.00000 0.5797836 0.0000000 0.00000
## 8635 0.0000000 0.0000000 0.0000000 0.00000 0.0000000 0.0000000 0.00000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 1307 0.0000000 0.7982894 27.341413 18.759801 18.959374 52.8866740
## 16259 0.2787536 0.0000000 9.477622 15.052694 7.526347 10.8713904
## 12637 0.3991447 0.0000000 7.384177 14.369210 10.178190 7.3841771
## 40177 0.0000000 0.0000000 0.000000 11.508782 0.000000 0.0000000
## 22944 0.0000000 0.0000000 0.000000 6.957403 1.159567 2.3191344
## 8635 0.0000000 0.0000000 0.000000 6.413059 0.000000 0.8016323
z3 <- tfidf.df[order(tfidf.df$Z3.txt, decreasing = TRUE),]
head(z3)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt
## 1307 zarathustra 0.7982894 0.1995724 1.1974341 0.1995724 0.0000000
## 265 oh 0.7957534 2.1220090 0.5305023 0.1326256 2.9177624
## 39568 zerbrecht 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 15778 tafeln 0.0000000 0.0000000 0.0000000 0.6766936 0.0000000
## 12637 wahrlich 0.0000000 0.0000000 0.1995724 0.0000000 0.9978618
## 41218 hochzeitlichen 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## DS.txt EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt
## 1307 0.0000000 15.3670713 0.7982894 0.9978618 0 0.0000000 0.0000000
## 265 0.0000000 1.8567579 1.7241323 5.0397715 0 0.3978767 0.0000000
## 39568 0.0000000 0.0000000 0.0000000 0.0000000 0 0.0000000 0.0000000
## 15778 0.0000000 2.0300808 0.0000000 0.0000000 0 0.0000000 0.0000000
## 12637 0.3991447 0.7982894 0.0000000 1.9957235 0 0.1995724 0.1995724
## 41218 0.0000000 0.0000000 0.0000000 0.0000000 0 0.0000000 0.0000000
## RWB.txt SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 1307 0 0.0000000 0.7982894 27.3414126 18.759801 18.959374 52.886674
## 265 0 0.0000000 0.7957534 0.9283790 3.713516 18.567579 14.323561
## 39568 0 0.0000000 0.0000000 0.9777236 0.000000 13.688130 0.000000
## 15778 0 0.0000000 0.0000000 2.7067744 0.000000 11.503791 0.000000
## 12637 0 0.3991447 0.0000000 7.3841771 14.369210 10.178190 7.384177
## 41218 0 0.0000000 0.0000000 0.0000000 0.000000 8.951275 0.000000
z4 <- tfidf.df[order(tfidf.df$Z4.txt, decreasing = TRUE),]
head(z4)
## doc_id A.txt BGE.txt BT.txt CW.txt D.txt DS.txt
## 1307 zarathustra 0.7982894 0.1995724 1.1974341 0.1995724 0.000000 0
## 265 oh 0.7957534 2.1220090 0.5305023 0.1326256 2.917762 0
## 40871 i-a 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0
## 33338 hässlichst 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0
## 16259 diess 0.0000000 0.0000000 0.0000000 0.0000000 51.011909 0
## 41389 blutegel 0.0000000 0.0000000 0.0000000 0.0000000 0.000000 0
## EH.txt GM.txt GS.txt HH1.txt HH2.txt HL.txt RWB.txt
## 1307 15.367071 0.7982894 0.9978618 0.00000 0.0000000 0 0.00000
## 265 1.856758 1.7241323 5.0397715 0.00000 0.3978767 0 0.00000
## 40871 0.000000 0.0000000 0.0000000 0.00000 0.0000000 0 0.00000
## 33338 0.000000 0.0000000 0.0000000 0.00000 0.9777236 0 0.00000
## 16259 0.000000 0.0000000 30.1053889 38.74675 38.7467505 0 10.31388
## 41389 0.000000 0.0000000 0.0000000 0.00000 0.0000000 0 0.00000
## SE.txt TI.txt Z1.txt Z2.txt Z3.txt Z4.txt
## 1307 0.0000000 0.7982894 27.341413 18.759801 18.9593737 52.88667
## 265 0.0000000 0.7957534 0.928379 3.713516 18.5675791 14.32356
## 40871 0.0000000 0.0000000 0.000000 0.000000 0.9777236 12.71041
## 33338 0.0000000 0.0000000 0.000000 0.000000 0.0000000 11.73268
## 16259 0.2787536 0.0000000 9.477622 15.052694 7.5263472 10.87139
## 41389 0.0000000 0.0000000 0.000000 0.000000 0.0000000 10.23003