#-----------------------------------------------#
# Title: 00_main
#-----------------------------------------------#
# Project: Ordered Logit Empirical Applications
# Author: Carlos Gonzalez
# Date: November 2025
#-----------------------------------------------#
# Description:
# - System prep including system set-up, 
# library loading, directory creation, 
# globals definition and code sourcing
# - Executes 01_risk.R and 02_altruism.R to 
# replicate all results in Apesteguia and Ballester (2025)
#----------------------------------------------#

rm(list = ls())

# 0a. Libraries
# -------------#
# Package description
# dplyr                   syntax and readability
# ggplot2                 plotting
# stringr                 regex functions
# tidyr                   data managing functions
# igraph                  graph manipulation
# future                  parallel computing
# future.apply            user-friendly parallel computing
# remotes                 install repository based algorithms
# gridExtra               grid in ggplot
# DescTools               class management
# twosamples              fast non-parametric testing of two-samples distributions
# tictoc                  time-management (developer use only)
# kableExtra              R to LaTex Tables
# latex2exp               LaTex Annotations in ggplot
# textshape               column_to_rownames() function
# huoston/shortestpath    access to bellman-ford algorithm

# Installing packages routine
package_list = c("dplyr", "ggplot2", "stringr", "tidyr",
                 "igraph", "future", "future.apply", "remotes",
                 "gridExtra", "twosamples", "tictoc", "kableExtra",
                 "latex2exp", "textshape", "kSamples")
package_list_repo = c("huoston/shortestpath")

missing_packages = package_list[!(package_list %in% 
                                    installed.packages()[,"Package"])]
missing_packages_repo = package_list_repo[!(package_list_repo %in% 
                                              installed.packages()[,"Package"])]
if (length(missing_packages)){
  install.packages(missing_packages)
  warning("The following packages ", missing_packages, 
          " will be installed in your device")}

if (length(missing_packages_repo)){
  remotes::install_github(missing_packages_repo)
  warning("The following packages ", missing_packages_repo, 
          " will be installed in your device from External Sources")}

library(stringr)
package_list_repo = sapply(package_list_repo, str_replace,
                           pattern = ".*\\/", replacement = "")

# Load libraries
for (package in c(package_list, package_list_repo))
{eval(bquote(library(.(package))))}

# 0b. Directory and folders
# -------------------------#
# Set main directory
dir = dirname(rstudioapi::getActiveDocumentContext()$path)
main_dir = str_replace(dir, "/[^/]*$", "")
setwd(main_dir) # Replace manually with main directory if needed

# Output directory (for tables, graphs and auxiliary files)
if (!dir.exists("output_final")){dir.create("output_final")}

# 0c. System Globals
#-------------------#
n_workers = 4 # Number of CPUs to be used in parallel computing

# 0d. Paper Globals
#-------------------#
J_global = 20 # Number of menus
pi_vector_global = c(1/100, 1/10) # Price boundaries

# 0e. Import functions
#---------------------#
source("code_final/01_risk.R")     # 01. Risk Application (and general functions)
source("code_final/02_altruism.R") # 02. Altruism Application

#---------------------#
#---------------------#
# 1. Risk Application #
#---------------------#
#---------------------#
# Section 6.1. in Apesteguia and Ballester

#---------------------#
# 1.A. Simulated data #
#---------------------#
# Menu data unordered
menu_data = data_gen()

# Menu data ordered
menu_data = order_data(menu_data)

# Table 1 (A) #
#-------------#
export_table_1a(menu_data) 

# Generate Choice data
n20_data = choice_data_gen(menu_data, 20, seed = 2)
n80_data = choice_data_gen(menu_data, 80, seed = 2)
n160_data = choice_data_gen(menu_data, 160, seed = 2)
n480_data = choice_data_gen(menu_data, 480, seed = 2)

# See seed discussion in 03_other

# Figure 1 (A) #
#--------------#
# Plot CDF of consumption ratio for a fixed set of menus
plot_cdf(list(n20_data, n80_data, 
              n160_data, n480_data), 
         c(8, 9, 10, 11, 12, 14, 15, 17, 19, 20), 10)

# Alternatively, consider a random subset
#plot_cdf(list(n20_data, n80_data, 
#              n160_data, n480_data), "random", 10)

#-----------------------------#
# 1.B. Nonparametric Analysis #
#-----------------------------#

# Figure 2 #
#----------#
# Generate weight matrix for some menu and quantile selection (quantile = 50)
weight_matrix_n20 = gen_weight_matrix(df = n20_data, plot = T)
weight_matrix_n80 = gen_weight_matrix(n80_data, plot = T)
weight_matrix_n160 = gen_weight_matrix(n160_data, plot = T)
weight_matrix_n480 = gen_weight_matrix(n480_data, plot = T)

# Table 3 #
#---------#
# We consider the same 20 quantiles per menu to make fair comparisons in number of violations
quantile_vector = n20_data |> pull(quantile) |> unique() |> sort()
quantile_vector

# Bellman-Ford (3 mins)
viols_n20_data = lapply_routine_bell(n20_data, quant_selection = quantile_vector)
viols_n80_data = lapply_routine_bell(n80_data, quant_selection = quantile_vector)
viols_n160_data = lapply_routine_bell(n160_data, quant_selection = quantile_vector)
viols_n480_data = lapply_routine_bell(n480_data, quant_selection = quantile_vector)

# Inspect violations
viols_n20_data
viols_n80_data
viols_n160_data
viols_n480_data

#-------------------------------#
# 1.C. Semi-parametric Analysis #
#-------------------------------#

# Figure 3 (A) #
#--------------#
plot_induced_param(list(n20_data, n80_data, n160_data, n480_data), 
                   menu_selection = "random", n_menus_random = 10,
                   recover_params = risk_aversion_params)

# Table 6 #
#---------#
# J-Samples AD test

# Computes AD test on a selection of quantiles
similarity_testing_wrap(list(n20_data, n80_data, n160_data, n480_data),
                        recover_params = risk_aversion_params,
                        quant_selection = quantile_vector)

#--------------------------#
# 1.D. Parametric Analysis #
#--------------------------#

# Table 7 and 8 #
#---------------#
# Mass analysis of r
mass_analysis_tables = mass_analysis(list(n20_data, n80_data, n160_data, n480_data))
lapply(mass_analysis_tables, mass_output)

# Figure 4 (A) #
#--------------#
# CLA property

# Generate CLA datasets for plot
cla_20 = cla_data(n20_data, recover_params = risk_aversion_params)
cla_80 = cla_data(n80_data, recover_params = risk_aversion_params)
cla_160 = cla_data(n160_data, recover_params = risk_aversion_params)
cla_480 = cla_data(n480_data, recover_params = risk_aversion_params)

# Plot CLA
plot_cla(list(cla_20, cla_80, cla_160, cla_480), true_params = c(0.6, 0.3))

# Table 11 (left) #
#-----------------#
# MLE Parametric estimation of Censored Logit
param_results = sapply(list(n20_data, n80_data, n160_data, n480_data), 
                       parametric_estimation, recover_params = risk_aversion_params)
param_results

# Table 11 (right) #
#------------------#

# AD test via Parametric Bootstrap 
bootstrap_ad_test(n20_data, risk_aversion_params)
bootstrap_ad_test(n80_data, risk_aversion_params)
bootstrap_ad_test(n160_data, risk_aversion_params)
bootstrap_ad_test(n480_data, risk_aversion_params)

#-------------------------#
#-------------------------#
# 2. Altruism Application #
#-------------------------#
#-------------------------#
# Section 6.2. in Apesteguia and Ballester

#---------------------#
# 2.A. Simulated Data #
#---------------------#

# Menu data ordered
menu_alt_data = data_gen_alt()

# Table 1 (B) #
#-------------#
export_table_1b(menu_alt_data)

# Generate Choice data
n20_alt_data = choice_data_alt_gen(menu_alt_data, 20)
n80_alt_data = choice_data_alt_gen(menu_alt_data, 80)
n160_alt_data = choice_data_alt_gen(menu_alt_data, 160)
n480_alt_data = choice_data_alt_gen(menu_alt_data, 480)

# Figure 1 (B) #
#--------------#
# Plot v-CDF for a selection of menus
plot_cdf_v(list(n20_alt_data, n80_alt_data, 
                n160_alt_data, n480_alt_data), "random", 10)

#-----------------------------#
# 2.B. Nonparametric Analysis #
#-----------------------------#

# Table 4 #
#---------#
# Verify WARP property for each pair of menus
warp20 = altr_warp(menu_alt_data, n20_alt_data)
warp80 = altr_warp(menu_alt_data, n80_alt_data)
warp160 = altr_warp(menu_alt_data, n160_alt_data)
warp480 = altr_warp(menu_alt_data, n480_alt_data)

warp20
warp80
warp160
warp480

#-------------------------------#
# 2.C. Semi-parametric Analysis #
#-------------------------------#

# Recover alpha for each quantile pair (3 mins)
alpha20 = recover_alpha(n20_alt_data)
alpha80 = recover_alpha(n80_alt_data)
alpha160 = recover_alpha(n160_alt_data)
alpha480 = recover_alpha(n480_alt_data)

# Table 5
#--------#
# Recover median alpha
med_alpha = median_alpha(list(alpha20, alpha80, alpha160, alpha480), 
                          true_alpha = .4)
med_alpha
# Median alpha used in the paper (see 99_instructions for a further discussion)
med_alpha = tibble(n_obs = c(20, 80, 160, 480),
                   alpha = c(0.445, 0.409, 0.387, 0.397))

# Figure 3 (B) #
#--------------#
plot_induced_param_alt(list(n20_alt_data, n80_alt_data, n160_alt_data, n480_alt_data),
                       recover_params = altruism_params, median_alpha = med_alpha)

# Table 6 #
#---------#
# J-Samples AD test

# Computes AD test on a selection of quantiles
similarity_testing_alt_wrap(df_list = list(n20_alt_data, n80_alt_data,
                                           n160_alt_data, n480_alt_data),
                            recover_params = altruism_params, menu_selection = "all",
                            n_menus_random = 10, seed = 12345,
                            quant_selection = "percentile", median_alpha = med_alpha)

#--------------------------#
# 2.D. Parametric Analysis #
#--------------------------#

# Table 9 and 10 #
#---------------#
# Mass analysis of r
mass_analysis_tables_alt = mass_analysis_alt(list(n20_alt_data, n80_alt_data, 
                                                  n160_alt_data, n480_alt_data))
lapply(mass_analysis_tables_alt, mass_output_alt)

# Figure 4 (B) #
#--------------#
# CLA property

# Generate CLA datasets for plotting
cla_20_alt = cla_data_alt(n20_alt_data, recover_params = altruism_params, median_alpha = med_alpha)
cla_80_alt = cla_data_alt(n80_alt_data, recover_params = altruism_params, median_alpha = med_alpha)
cla_160_alt = cla_data_alt(n160_alt_data, recover_params = altruism_params, median_alpha = med_alpha)
cla_480_alt = cla_data_alt(n480_alt_data, recover_params = altruism_params, median_alpha = med_alpha)

# Plot CLA
plot_cla_alt(list(cla_20_alt, cla_80_alt, cla_160_alt, cla_480_alt), 
             true_params = c(0.2, 0.2))

# Table 11 (left) #
#-----------------#
# MLE Parametric estimation of Censored Logit
param_results = sapply(list(n20_alt_data, n80_alt_data, n160_alt_data, n480_alt_data), 
                       parametric_estimation_alt, recover_params = altruism_params,
                       median_alpha = med_alpha)
param_results

# Table 11 (right) #
#------------------#

# AD test via Parametric Bootstrap 
bootstrap_ad_test_alt(n20_alt_data, altruism_params, median_alpha = med_alpha)
bootstrap_ad_test_alt(n80_alt_data, altruism_params, median_alpha = med_alpha)
bootstrap_ad_test_alt(n160_alt_data, altruism_params, median_alpha = med_alpha)
bootstrap_ad_test_alt(n480_alt_data, altruism_params, median_alpha = med_alpha)