Why should I know this?

DEX to IR 개발일지 - 간단한 bytecode <> IR 변환, mangling 필요?? 본문

LLVM-STUDY/TODO

DEX to IR 개발일지 - 간단한 bytecode <> IR 변환, mangling 필요??

die4taoam 2023. 1. 17. 06:12

mangling 이 필요해진 이유

 

define internal i32 @"int ArithmeticOpInt.add_int(int, int)"(i32 %v1, i32 %v2) {
bb:
  %0 = alloca i32, align 4
  store i32 %v1, ptr %0, align 4
  %1 = load i32, ptr %0, align 4
  %2 = alloca i32, align 4
  store i32 %v2, ptr %2, align 4
  %3 = load i32, ptr %2, align 4
  %4 = add i32 %1, %3
  ret i32 %4
}

위처럼 테스트용 DEX에 산술연산 테스트용 메소드를 작성해 LLVM IR 로 변경했다.

컴파일은 문제 없이 진행된다.

 

컴파일 한 뒤 objdump 로 심볼을 확인해봤다.

00000000000005e4 l     F .text  0000000000000020              void ArithmeticOpInt.<init>()                    
0000000000000604 l     F .text  0000000000000020              int ArithmeticOpInt.add_int(int, int)            
0000000000000624 l     F .text  0000000000000020              int ArithmeticOpInt.and_int(int, int)            
0000000000000644 l     F .text  0000000000000020              int ArithmeticOpInt.div_int(int, int)            
0000000000000664 l     F .text  0000000000000020              int ArithmeticOpInt.mul_int(int, int)            
0000000000000684 l     F .text  0000000000000020              int ArithmeticOpInt.or_int(int, int)             
00000000000006a4 l     F .text  0000000000000028              int ArithmeticOpInt.rem_int(int, int)            
00000000000006cc l     F .text  0000000000000020              int ArithmeticOpInt.sub_int(int, int)            
00000000000006ec l     F .text  0000000000000020              int ArithmeticOpInt.xor_int(int, int)            
000000000000070c l     F .text  0000000000000020              void ArithmeticOpLong.<init>()                   
000000000000072c l     F .text  0000000000000020              long ArithmeticOpLong.add_long(long, long)       
000000000000074c l     F .text  0000000000000020              long ArithmeticOpLong.and_long(long, long)       
000000000000076c l     F .text  0000000000000020              long ArithmeticOpLong.div_long(long, long)       
000000000000078c l     F .text  0000000000000020              long ArithmeticOpLong.mul_long(long, long)       
00000000000007ac l     F .text  0000000000000020              long ArithmeticOpLong.or_long(long, long)        
00000000000007cc l     F .text  0000000000000028              long ArithmeticOpLong.rem_long(long, long)       
00000000000007f4 l     F .text  0000000000000020              long ArithmeticOpLong.sub_long(long, long)       
0000000000000814 l     F .text  0000000000000020              long ArithmeticOpLong.xor_long(long, long)

 

gdb로 열어서 info functions 로 확인해봤다.

Non-debugging symbols:                                                        
0x0000000000000488  _init                                                     
0x00000000000004c0  __cxa_finalize@plt                                        
0x00000000000004d0  void java.lang.Object.<init>()@plt                        
0x00000000000004e0  __gmon_start__@plt                                        
0x00000000000004f0  call_weak_fn                                              
0x0000000000000510  deregister_tm_clones                                      
0x0000000000000540  register_tm_clones                                        
0x0000000000000580  __do_global_dtors_aux                                     
0x00000000000005d0  frame_dummy                                               
0x00000000000005d4  void java.lang.Object.<init>()                            
0x00000000000005e4  void ArithmeticOpInt.<init>()                             
0x0000000000000604  int ArithmeticOpInt.add_int(int, int)                     
0x0000000000000624  int ArithmeticOpInt.and_int(int, int)                     
0x0000000000000644  int ArithmeticOpInt.div_int(int, int)                     
0x0000000000000664  int ArithmeticOpInt.mul_int(int, int)                     
0x0000000000000684  int ArithmeticOpInt.or_int(int, int)                      
0x00000000000006a4  int ArithmeticOpInt.rem_int(int, int)                     
0x00000000000006cc  int ArithmeticOpInt.sub_int(int, int)                     
0x00000000000006ec  int ArithmeticOpInt.xor_int(int, int)                     
0x000000000000070c  void ArithmeticOpLong.<init>()                            
0x000000000000072c  long ArithmeticOpLong.add_long(long, long)                
0x000000000000074c  long ArithmeticOpLong.and_long(long, long)                

 

근데 이 함수를 어떻게 찾을 방법이 없다.

(gdb) disas int ArithmeticOpInt.add_int(int, int)                      
A syntax error in expression, near `ArithmeticOpInt.add_int(int, int)'.
(gdb) disas "int ArithmeticOpInt.add_int(int, int)"                    
evaluation of this expression requires the target program to be active 

 

결국 주소참조해야 볼 수 있음.

(gdb) disas 0x0000000000000604                                                  
Dump of assembler code for function int ArithmeticOpInt.add_int(int, int):      
   0x0000000000000604 <+0>:     sub     sp, sp, #0x10                           
   0x0000000000000608 <+4>:     str     w0, [sp, #12]                           
   0x000000000000060c <+8>:     ldr     w8, [sp, #12]                           
   0x0000000000000610 <+12>:    str     w1, [sp, #8]                            
   0x0000000000000614 <+16>:    ldr     w9, [sp, #8]                            
   0x0000000000000618 <+20>:    add     w0, w8, w9                              
   0x000000000000061c <+24>:    add     sp, sp, #0x10                           
   0x0000000000000620 <+28>:    ret                                             
End of assembler dump.                                                          

 

원래 테스트 계획은 DEX -> IR 로 변경한 뒤 다음과 같은 c 코드를 작성하여,

테스트 메소드를 호출하고 그 결과를 확인하는 방식이었는데 아래 메인코드...;

 

define dso_local i32 @main() #0 {                                                    
  %1 = alloca i64, align 8                                                           
  %2 = alloca i64, align 8                                                           
  store i64 10, ptr %1, align 8                                                      
  store i64 10, ptr %2, align 8                                                      
  %3 = load i64, ptr %1, align 8                                                     
  %4 = load i64, ptr %2, align 8                                                     
  %5 = call i64 @"long ArithmeticOpLong.add_long(long, long)"(i64 %3, i64 %4)        
  ret i32 0                                                                          
}

 

메인코드는 IR로 작성해서 위처럼 " " 안에 들어가는 문자를 고대로 호출할 함수명으로 사용할 수 있다.

하지만 테스트 코드를 매번 IR로 작성해야 한다. 그래야 할까?

 

(gdb) disas                                                                                            
Dump of assembler code for function main:                                                              
   0x0000aaaae8510724 <+0>:     sub     sp, sp, #0x20                                                  
   0x0000aaaae8510728 <+4>:     str     x30, [sp, #16]                                                 
   0x0000aaaae851072c <+8>:     mov     w8, #0xa                        // #10                         
   0x0000aaaae8510730 <+12>:    str     x8, [sp, #8]                                                   
   0x0000aaaae8510734 <+16>:    str     x8, [sp]                                                       
   0x0000aaaae8510738 <+20>:    ldr     x0, [sp, #8]                                                   
   0x0000aaaae851073c <+24>:    ldr     x1, [sp]                                                       
   0x0000aaaae8510740 <+28>:    bl      0xaaaae851089c <long ArithmeticOpLong.add_long(long, long)>    
=> 0x0000aaaae8510744 <+32>:    mov     w0, wzr                                                        
   0x0000aaaae8510748 <+36>:    ldr     x30, [sp, #16]                                                 
   0x0000aaaae851074c <+40>:    add     sp, sp, #0x20                                                  
   0x0000aaaae8510750 <+44>:    ret                                                                    
End of assembler dump.                                                                                 
(gdb) info reg x0                                                                                      
x0             0x14                20                                                                                                 

 

IR로 테스트를 작성하고 실험한 결과, add-long instruction은 제대로 IR로 변환된거 같아 보인다.

테스트 안해봐도 IR이나 ASM코드 보면 잘 된걸 확인 가능한긴 하다.

 

고정관념인지 뭔지 모르겠지만 역시 IR 로 변환된 코드를 IR로 활용해야 하는건지 아니면 타 언어와 결합하여 확실히 input/output을 검증해야 맞는건지 잘 모르겠다. 어쨌든 참 재밌긴 하다. 함수명을 만드는데 이렇게 제약이 적다는건 재미있는 일이고, 간단하게 Java Method 를 Native 로 내리고 동일한 이름의 Native Method 선언만 주입하면 바인딩도 쉽게 될 수 있다는 뜻 아닐까?

 

 

'LLVM-STUDY > TODO' 카테고리의 다른 글

LLVM Optimization study - simplify with ChatGPT  (0) 2023.03.19
LLVM instruction 추가 PR  (0) 2023.03.17
LLVM 공부/기록/공유 고민  (0) 2023.02.27
LLVM Optimization study - DCE  (0) 2023.02.17
DEX to IR 개발일지 - Class 변환  (0) 2022.12.28
Comments