Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python binding lost mem operands of arm64 Insn by capstone v5.0.1 #2286

Open
DiamondHunters opened this issue Mar 10, 2024 · 4 comments
Open
Labels
Milestone

Comments

@DiamondHunters
Copy link

It's similar to #2260
cstool and python binding are the same version but give different results.

python binding api will lost the mem operand
python code(modified from capstone arm64 test):

from __future__ import print_function

import capstone
from capstone import *
from capstone.arm64 import *


ARM64_CODE = bytes.fromhex("2bb94239")

all_tests = (
        (CS_ARCH_ARM64, CS_MODE_ARM, ARM64_CODE, "ARM-64"),
        )


def print_insn_detail(insn):
    # print address, mnemonic and operands
    print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str))

    # "data" instruction generated by SKIPDATA option has no detail
    if insn.id == 0:
        return

    if len(insn.operands) > 0:
        print("\top_count: %u" % len(insn.operands))
        c = -1
        for i in insn.operands:
            c += 1
            if i.type == ARM64_OP_REG:
                print("\t\toperands[%u].type: REG = %s" % (c, insn.reg_name(i.reg)))
            if i.type == ARM64_OP_IMM:
                print("\t\toperands[%u].type: IMM = 0x%s" % (c, hex(i.imm)))
            if i.type == ARM64_OP_CIMM:
                print("\t\toperands[%u].type: C-IMM = %u" % (c, i.imm))
            if i.type == ARM64_OP_FP:
                print("\t\toperands[%u].type: FP = %f" % (c, i.fp))
            if i.type == ARM64_OP_MEM:
                print("\t\toperands[%u].type: MEM" % c)
                if i.mem.base != 0:
                    print("\t\t\toperands[%u].mem.base: REG = %s" \
                        % (c, insn.reg_name(i.mem.base)))
                if i.mem.index != 0:
                    print("\t\t\toperands[%u].mem.index: REG = %s" \
                        % (c, insn.reg_name(i.mem.index)))
                if i.mem.disp != 0:
                    print("\t\t\toperands[%u].mem.disp: 0x%s" \
                        % (c, to_x(i.mem.disp)))
            if i.type == ARM64_OP_REG_MRS:
                print("\t\toperands[%u].type: REG_MRS = 0x%x" % (c, i.reg))
            if i.type == ARM64_OP_REG_MSR:
                print("\t\toperands[%u].type: REG_MSR = 0x%x" % (c, i.reg))
            if i.type == ARM64_OP_PSTATE:
                print("\t\toperands[%u].type: PSTATE = 0x%x" % (c, i.pstate))
            if i.type == ARM64_OP_SYS:
                print("\t\toperands[%u].type: SYS = 0x%x" % (c, i.sys))
            if i.type == ARM64_OP_PREFETCH:
                print("\t\toperands[%u].type: PREFETCH = 0x%x" % (c, i.prefetch))
            if i.type == ARM64_OP_BARRIER:
                print("\t\toperands[%u].type: BARRIER = 0x%x" % (c, i.barrier))

            if i.shift.type != ARM64_SFT_INVALID and i.shift.value:
                print("\t\t\tShift: type = %u, value = %u" % (i.shift.type, i.shift.value))

            if i.ext != ARM64_EXT_INVALID:
                print("\t\t\tExt: %u" % i.ext)

            if i.vas != ARM64_VAS_INVALID:
                print("\t\t\tVector Arrangement Specifier: 0x%x" % i.vas)

            # if i.vess != ARM64_VESS_INVALID:
            #     print("\t\t\tVector Element Size Specifier: %u" % i.vess)

            if i.vector_index != -1:
                print("\t\t\tVector Index: %u" % i.vector_index)

            if i.access == CS_AC_READ:
                print("\t\toperands[%u].access: READ\n" % (c))
            elif i.access == CS_AC_WRITE:
                print("\t\toperands[%u].access: WRITE\n" % (c))
            elif i.access == CS_AC_READ | CS_AC_WRITE:
                print("\t\toperands[%u].access: READ | WRITE\n" % (c))


    if insn.writeback:
        print("\tWrite-back: True")
    if not insn.cc in [ARM64_CC_AL, ARM64_CC_INVALID]:
        print("\tCode-condition: %u" % insn.cc)
    if insn.update_flags:
        print("\tUpdate-flags: True")

    (regs_read, regs_write) = insn.regs_access()

    if len(regs_read) > 0:
        print("\tRegisters read:", end="")
        for r in regs_read:
            print(" %s" %(insn.reg_name(r)), end="")
        print("")

    if len(regs_write) > 0:
        print("\tRegisters modified:", end="")
        for r in regs_write:
            print(" %s" %(insn.reg_name(r)), end="")
        print("")


# ## Test class Cs
def test_class():

    for (arch, mode, code, comment) in all_tests:
        print("*" * 16)
        print("Platform: %s" % comment)
        print("Code: %s" % (code.hex()))
        print("Disasm:")

        try:
            md = Cs(arch, mode)
            md.detail = True
            for insn in md.disasm(code, 0):
                print_insn_detail(insn)
                print ()
            print("0x%x:\n" % (insn.address + insn.size))
        except CsError as e:
            print("ERROR: %s" % e)


if __name__ == '__main__':
    print("version :", capstone.__version__)
    test_class()
Output:

version : 5.0.1
****************
Platform: ARM-64
Code: 2bb94239
Disasm:
0x0:	ldrb	w11, [x9, #0xae]
	op_count: 1
		operands[0].type: REG = w11
		operands[0].access: WRITE

	Registers modified: w11

0x4:

but cstool gave the correct result:

$ cstool                  
Cstool for Capstone Disassembler Engine v5.0.1

$ cstool -d arm64 2bb94239
 0  2b b9 42 39  ldrb   w11, [x9, #0xae]
        ID: 561 (ldrb)
        op_count: 2
                operands[0].type: REG = w11
                operands[0].access: WRITE
                operands[1].type: MEM
                        operands[1].mem.base: REG = x9
                        operands[1].mem.disp: 0xae
                operands[1].access: READ
        Registers read: x9
        Registers modified: w11
@DiamondHunters
Copy link
Author

DYLD_PRINT_LIBRARIES=1 python -c 'import capstone'
library load:
dyld[18101]: <81B692C8-CFD7-3ADD-842D-AA9DFA176748> [delete]/.venv11/lib/python3.11/site-packages/capstone/lib/libcapstone.dylib
cs_version of this lib(decompiled by IDA):

__int64 __fastcall cs_version(_DWORD *a1, _DWORD *a2)
{
  if ( a1 && a2 )
  {
    *a1 = 5;
    *a2 = 0;
  }
  return 1280LL;
}

I also tried forcing python to loading libcapstone.5.dylib which bundle with cstool and get the same result
so I think python bindings use the correct library but always gave wrong result.that's so weird

@DiamondHunters
Copy link
Author

next branch could get correct result,but a lot of symbols was changed,It's heavy to refactor code.

version : 5.0.0
****************
Platform: ARM-64
Code: 2bb94239
Disasm:
0x0:	ldrb	w11, [x9, #0xae]
	op_count: 2
		operands[0].type: REG = w11
		operands[0].access: WRITE

		operands[1].type: MEM
			operands[1].mem.base: REG = x9
			operands[1].mem.disp: 0x0xae
		operands[1].access: READ

	Registers read: x9
	Registers modified: w11

0x4:

@Rot127
Copy link
Collaborator

Rot127 commented Mar 12, 2024

If you want to use next branch, you can refer to the release guide for v6 (see "Note about AArch64" section).
There we document how to use the meta-programming macros to make the refactor easier.

@DiamondHunters
Copy link
Author

release guide for v6

thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

2 participants