In Compile Linux Kernel to LLVM Bitcode we use wllvm to generate the LLVM bitcode of Linux kernel. when analyzing the kernel, we sometimes need to disable some optimization passes. However, it’s hard to do with clang. First compiling it to -O0 IR and then applying some optimization passes on it seems easier.

The optimization level of the LLVM IR is -O2 by default. If we need an -O0 LLVM IR, we need to modify the kernel compilation flag.

1
make KCFLAG='-O0' CC=wllvm

However, it doesn’t work because some kernel code cannot be compiled with -O0.

Fortunately, we have another way to get our -O0 LLVM IR.

When wllvm generates LLVM bitcode (.filename.o.bc) for a C file, it also generates the command to compile the file in .filename.o.cmd. We can get the command by:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
path='/path/to/our/output'
awk_cmd='{
s = "";
ofile = 0;
for (i = 3; i <= NF; i++)
{
if (ofile == 1)
{
s = s "'"$path"'" " ";
ofile = 0;
}
else
{
if ($i == "-o")
ofile = 1;
s = s $i " ";
}
}
print s }'
cmd_line=`head -n 1 $1 | awk "$awk_cmd"`

The output file path will be replaced by the path we provide.

This command generates native code. To get LLVM bitcode, we need to add -emit-llvm. To disable optimization, we need to add -mllvm -disable-llvm-optzns.

After generating -O0 bitcode, we can optimize it with any passes we want. Finally link all bitcodes we generate and we get a kernel bitcode with custom optimization.

Below is an example script:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/bin/bash


llvm_linker=llvm-link

function get_cmd
{
path="$2"

# change output file name
awk_cmd='{
s = "";
ofile = 0;
for (i = 3; i <= NF; i++)
{
if (ofile == 1)
{
s = s "'"$path"'" " ";
ofile = 0;
}
else
{
if ($i == "-o")
ofile = 1;
s = s $i " ";
}
}
print s }'
cmd_line=`head -n 1 $1 | awk "$awk_cmd"`

# output LLVM IR
cmd_line="${cmd_line} -emit-llvm"
# disable opt
cmd_line="${cmd_line} -mllvm -disable-llvm-optzns"
echo $cmd_line
return
}

function compile_opt
{
cmd_path="$1"
basepath="$2"
tmp_path="${basepath}.tmp.bc"
output_path="${basepath}.opt.bc"
cmd=`get_cmd "$cmd_path" "$tmp_path"`
eval "$cmd" 2> /dev/null
cmd_result=$?
if [ ! -e "$tmp_path" ] || [ $cmd_result -ne 0 ]
then
exit 1
fi
opt -always-inline -inline -mem2reg -simplifycfg -instcombine "$tmp_path" -o "$output_path"
cmd_result=$?
if [ ! -e "$output_path" ] || [ $cmd_result -ne 0 ]
then
exit 1
fi
}

while getopts ":l:" opt; do
case $opt in
l) llvm_linker=$OPTARG
;;
\?) echo "Usage: -l <llvm-link>" 1>&2
exit 1
;;
esac
done
shift $((OPTIND-1))
from_file=$1
if [ -z "$from_file" ]
then
echo "no input file" 1>&2
exit 1
fi

extract-bc -l false -m $from_file
manifest="${from_file}.llvm.manifest"
if [ ! -r "$manifest" ]
then
echo "no manifest file" 1>&2
exit 1
fi

tmpmanifest="${from_file}.tmp.manifest"
echo -n "" > "$tmpmanifest"

cat "$manifest" |
while read o_bc_name;
do
basename=`basename "${o_bc_name%.o.bc}"`
# basename="${basename#.}"
dir="${o_bc_name%/*}"
basepath="${dir}/${basename}"
if [ -z "$dir" ] && [ -z "$basename" ]
then
continue
fi

cmd_path="${basepath}.o.cmd"
if [ ! -r "${cmd_path}" ]
then
echo "${cmd_path} not found"
continue
fi

while [ `jobs | wc -l` -ge 8 ]
do
sleep 0.1
done
output_path="${basepath}.opt.bc"
compile_opt "$cmd_path" "$basepath" &
echo "$output_path"
echo "$output_path" >> "${tmpmanifest}"
done

while [ `jobs | wc -l` -ne 0 ]
do
sleep 1
done

outputfile="${from_file}.opt.bc"
$llvm_linker -o "$outputfile" `cat "${tmpmanifest}"`