@@ -6,6 +6,11 @@ install.packages("GGally")
6
6
install.packages(" tidyr" )
7
7
install.packages(" pander" )
8
8
install.packages(" ANOVA" )
9
+ install.packages(" ggpubr" )
10
+ install.packages(" sandwich" )
11
+ install.packages(" ggthemes" )
12
+ install.packages(" Hmisc" )
13
+ install.packages(" huxtable" )
9
14
10
15
library(dplyr )
11
16
library(memisc )
@@ -15,79 +20,51 @@ library(ggplot2)
15
20
library(ggfortify )
16
21
library(pander )
17
22
library(lme4 )
18
- library(ANOVA )
23
+ library(ggpubr )
24
+ library(car )
25
+ library(sandwich )
26
+ library(ggthemes )
27
+ library(Hmisc )
28
+ library(huxtable )
19
29
20
30
df = as.data.frame(read_spss(" BD final - estudantes.sav" ))
21
31
22
- # Base variables
23
32
24
- data = transmute(df , oportunidade = as.numeric(df $ tri_oportunidade ),
25
- pressao = as.numeric(df $ losango_motivacao ),
26
- racionalizacao = as.numeric(df $ tri_dist_moral ),
27
- fraudenctx = as.numeric(df $ norma_contexto ),
28
- fraudencop = as.numeric(df $ norma_copiar ),
29
- fraudenplg = as.numeric(df $ norma_plagio ),
30
- fraudeneud = as.numeric(df $ norma_eu_desonesto ),
31
- fraudenavg = rowMeans(df [68 : 71 ], na.rm = TRUE ),
32
- fraudkfreq = rowMeans(df [34 : 50 ], na.rm = TRUE ))
33
33
34
-
35
- GGally :: ggpairs(data )
36
34
# Models
37
35
# # Fraude by the Norm
38
- # ## Norma Contexto
36
+ datan = transmute(df , oportunidade = as.numeric(df $ tri_oportunidade ),
37
+ pressao = as.numeric(df $ losango_motivacao ),
38
+ racionalizacao = as.numeric(df $ tri_dist_moral ),
39
+ fraudenctx = as.numeric(df $ norma_contexto ),
40
+ fraudencop = as.numeric(df $ norma_copiar ),
41
+ fraudenplg = as.numeric(df $ norma_plagio ),
42
+ fraudeneud = as.numeric(df $ norma_eu_desonesto ),
43
+ fraudenavg = rowMeans(df [68 : 71 ], na.rm = TRUE ))
39
44
40
- model11 = lm(data = data , fraudenctx ~ oportunidade + pressao + racionalizacao )
41
- summary(model11 )
42
- autoplot(model11 )
45
+ # ## Norma Contexto
46
+ modeln1 = lm(data = datan , fraudenctx ~ oportunidade + pressao + racionalizacao )
43
47
44
48
# ## Norma Copiar
45
-
46
- model12 = lm(data = data , fraudencop ~ oportunidade + pressao + racionalizacao )
47
- summary(model12 )
49
+ modeln2 = lm(data = datan , fraudencop ~ oportunidade + pressao + racionalizacao )
48
50
49
51
# ## Norma Plágio
50
-
51
- model13 = lm(data = data , fraudenplg ~ oportunidade + pressao + racionalizacao )
52
- summary(model13 )
52
+ modeln3 = lm(data = datan , fraudenplg ~ oportunidade + pressao + racionalizacao )
53
53
54
54
# ## Norma Eu desonesto
55
-
56
- model14 = lm(data = data , fraudeneud ~ oportunidade + pressao + racionalizacao )
57
- summary(model14 )
55
+ modeln4 = lm(data = datan , fraudeneud ~ oportunidade + pressao + racionalizacao )
58
56
59
57
# ## Norma Avg
60
-
61
- model15 = lm(data = data , fraudenavg ~ oportunidade + pressao + racionalizacao )
62
- summary(model15 )
63
- autoplot(model15 ) + theme_bw()
64
- autoplot(prcomp(model15 ), scale = TRUE )
58
+ modelnavg = lm(data = datan , fraudenavg ~ oportunidade + pressao + racionalizacao )
65
59
66
60
# ## Comparing Models
67
61
AIC(model11 , model12 , model13 , model14 , model15 )
68
62
69
- # # Fraud by Frequency
70
-
71
- model21 = lm(data = data , fraudkfreq ~ oportunidade + pressao + racionalizacao )
72
- summary(model21 )
73
-
74
- # Tests
75
- # Multicollinearity - bptest, gqtest
76
- bptest(model11 )
77
- bptest(model12 )
78
- bptest(model13 )
79
- bptest(model14 )
80
- bptest(model15 )
81
-
82
- gqtest(model13 )
83
- gqtest(model15 )
63
+ BIC(model11 , model12 , model13 , model14 , model15 )
84
64
85
- # Heteroskedascitity - PCA?
86
-
87
-
88
- # Experimental - Explaining k frequency with k gravity assessement
89
- # Create the data
65
+ # ### Best model - modelnavg
90
66
67
+ # # Fraud by specific behaviours and the gravity assessement
91
68
datak = transmute(df , oportunidade = as.numeric(df $ tri_oportunidade ),
92
69
pressao = as.numeric(df $ losango_motivacao ),
93
70
racionalizacao = as.numeric(df $ tri_dist_moral ),
@@ -126,7 +103,7 @@ datak = transmute(df, oportunidade = as.numeric(df$tri_oportunidade),
126
103
kg16 = as.factor(df $ k_especificos_2_28 ),
127
104
kgavg = as.factor(as.integer(rowMeans(df [51 : 67 ], na.rm = TRUE ))))
128
105
129
- # Create models for each one
106
+ # # Models
130
107
modelk1 = lm(data = datak ,
131
108
kf1 ~ oportunidade + pressao + racionalizacao + kg1 )
132
109
@@ -175,13 +152,13 @@ modelk15 = lm(data = datak,
175
152
modelk16 = lm(data = datak ,
176
153
kf16 ~ oportunidade + pressao + racionalizacao + kg16 )
177
154
178
- # avg model
155
+ # Average model
179
156
modelkavg = lm(data = datak ,
180
157
kfavg ~ oportunidade + pressao + racionalizacao + kgavg )
181
158
summary(modelkavg )
182
159
autoplot(modelkavg )
183
160
184
- # Comparing models
161
+ # # Comparing models
185
162
AIC(modelk1 , modelk2 , modelk3 , modelk4 , modelk5 , modelk6 , modelk7 ,
186
163
modelk8 , modelk9 , modelk10 , modelk11 , modelk12 , modelk13 , modelk14 ,
187
164
modelk15 , modelk16 , modelkavg )
@@ -190,3 +167,63 @@ BIC(modelk1, modelk2, modelk3, modelk4, modelk5, modelk6, modelk7,
190
167
modelk8 , modelk9 , modelk10 , modelk11 , modelk12 , modelk13 , modelk14 ,
191
168
modelk15 , modelk16 , modelkavg )
192
169
170
+ # ## Best model - modelkavg
171
+ # Final Models
172
+ dataF = transmute(df , oportunidade = as.numeric(df $ tri_oportunidade ),
173
+ pressao = as.numeric(df $ losango_motivacao ),
174
+ racionalizacao = as.numeric(df $ tri_dist_moral ),
175
+ fraudenavg = rowMeans(df [68 : 71 ], na.rm = TRUE ),
176
+ kgavg = as.factor(as.integer(rowMeans(df [51 : 67 ], na.rm = TRUE ))),
177
+ kfavg = rowMeans(df [34 : 50 ], na.rm = TRUE ))
178
+
179
+ ModelN = lm(data = dataF , fraudenavg ~ oportunidade + pressao + racionalizacao )
180
+
181
+ ModelC = lm(data = dataF , kfavg ~ oportunidade + pressao + racionalizacao + kgavg )
182
+
183
+ # # Normality Tests
184
+ # ## Shapiro-Wilk's
185
+ shapiro.test(dataF $ oportunidade )
186
+ shapiro.test(dataF $ pressao )
187
+ shapiro.test(dataF $ racionalizacao )
188
+ shapiro.test(dataF $ fraudenavg )
189
+ shapiro.test(dataF $ kgavg )
190
+ shapiro.test(dataF $ kfavg )
191
+
192
+ # ### Graphical confirmation
193
+ ggqqplot(dataF $ fraudenavg ) +
194
+ theme_gdocs() +
195
+ labs(title = " QQPlot FraudeN" )
196
+
197
+ ggqqplot(dataF $ kgavg ) +
198
+ theme_gdocs() +
199
+ labs(title = " QQPlot FraudeC" )
200
+
201
+ # # Autocorrelation - No
202
+ dwtest(ModelC )
203
+ dwtest(ModelN )
204
+ bgtest(ModelC )
205
+ bgtest(ModelN )
206
+
207
+ # # Multicollinearity - No
208
+ vif(ModelC )
209
+ vif(ModelN )
210
+
211
+ # # Heteroscedasticity - No
212
+ bptest(ModelC )
213
+ bptest(ModelN )
214
+ gqtest(ModelC )
215
+ gqtest(ModelN )
216
+
217
+ ncvTest(ModelC )
218
+
219
+ # ## Graphical Confirmation
220
+ autoplot(ModelC )
221
+
222
+ # Analysing the results
223
+ summary(ModelN )
224
+ summary(ModelC )
225
+
226
+
227
+
228
+
229
+
0 commit comments