Skip to content

Optimize branch patterns in peephole optimizer #290

@jserv

Description

@jserv

Description

Branch instructions often follow patterns that can be optimized for better performance and code size. This includes combining compare-and-branch, eliminating redundant branches, and optimizing branch chains.

Branch Patterns to Optimize

1. Compare and Branch Fusion

# ARM - Before
cmp r0, #0
beq label

# ARM - After  
cbz r0, label    ; Combined compare-branch-zero

# RISC-V - Before
li t0, 0
beq a0, t0, label

# RISC-V - After
beqz a0, label   ; Pseudo-instruction for branch-equal-zero

2. Branch Chain Optimization

# Before
b .L1
.L1:
b .L2

# After
b .L2            ; Direct branch to final target

3. Conditional Branch Inversion

# Before (inefficient layout)
cmp r0, #0
bne .L1
b .L2
.L1:

# After (fall-through optimization)
cmp r0, #0
beq .L2
.L1:

4. Dead Code After Unconditional Branch

# Before
b end_func
add r0, r0, #1   ; Unreachable
mov r1, #0       ; Unreachable
end_func:

# After
b end_func
end_func:

5. Redundant Conditional Branches

# Before
cmp r0, r1
beq .same
bne .different   ; Redundant - always taken if beq not taken

# After
cmp r0, r1
beq .same
; Fall through to .different
.different:

6. Compare Elimination

# Before
sub r2, r0, r1   ; Sets flags
cmp r0, r1       ; Redundant comparison

# After
sub r2, r0, r1   ; Flags already set
; Use flags from sub

Implementation

1. Branch Target Analysis

typedef struct {
    char *label;
    int offset;
    bool is_forward;
    int jump_count;  // How many jumps to reach final target
} branch_target_t;

branch_target_t *analyze_branch_target(insn_t *branch) {
    branch_target_t *target = malloc(sizeof(branch_target_t));
    target->label = branch->target_label;
    
    // Follow branch chains
    while (is_unconditional_branch(get_insn_at_label(target->label))) {
        target->label = get_insn_at_label(target->label)->target_label;
        target->jump_count++;
    }
    
    return target;
}

2. Pattern Matching for Fusion

typedef struct {
    insn_t *cmp;
    insn_t *branch;
    bool can_fuse;
} cmp_branch_pair_t;

bool can_fuse_cmp_branch(insn_t *cmp, insn_t *branch) {
    // Check if consecutive
    if (cmp->next != branch) return false;
    
    // Check if comparing with zero
    if (cmp->op == OP_CMP && is_zero_operand(cmp->rs2)) {
        // Check branch type
        switch (branch->cond) {
        case COND_EQ:  // Can use cbz
        case COND_NE:  // Can use cbnz
            return true;
        }
    }
    
    return false;
}

void apply_cmp_branch_fusion(cmp_branch_pair_t *pair) {
    // Replace with fused instruction
    if (pair->branch->cond == COND_EQ) {
        create_cbz(pair->cmp->rs1, pair->branch->target);
    } else {
        create_cbnz(pair->cmp->rs1, pair->branch->target);
    }
    
    // Mark original instructions for deletion
    mark_deleted(pair->cmp);
    mark_deleted(pair->branch);
}

3. Branch Chain Elimination

void optimize_branch_chains(func_t *func) {
    for (bb in func->bbs) {
        insn_t *last = get_last_insn(bb);
        
        if (is_branch(last)) {
            branch_target_t *target = analyze_branch_target(last);
            
            if (target->jump_count > 0) {
                // Update to direct branch
                last->target_label = target->label;
                last->offset = calculate_offset(last, target->label);
            }
        }
    }
}

4. Layout Optimization

typedef struct {
    basic_block_t *bb;
    int exec_frequency;
    bool is_loop_header;
} bb_layout_t;

void optimize_branch_layout(func_t *func) {
    // Calculate execution frequencies
    bb_layout_t *layout = analyze_frequencies(func);
    
    // Place hot blocks together
    for (int i = 0; i < func->bb_count; i++) {
        if (layout[i].exec_frequency > HOT_THRESHOLD) {
            // Try to make hot path fall-through
            if (can_invert_branch(layout[i].bb)) {
                invert_branch_condition(layout[i].bb);
                swap_successors(layout[i].bb);
            }
        }
    }
}

5. Conditional Move Generation

bool try_conditional_move(insn_t *branch) {
    // Pattern: branch around single move
    if (!is_conditional_branch(branch)) return false;
    
    basic_block_t *taken = branch->taken_target;
    basic_block_t *not_taken = branch->not_taken_target;
    
    // Check for simple move pattern
    if (count_insns(taken) == 1 && is_move(taken->first_insn)) {
        if (arch_has_conditional_move()) {
            // Replace with conditional move
            create_conditional_move(branch->cond, 
                                  taken->first_insn->rd,
                                  taken->first_insn->rs1);
            remove_branch(branch);
            return true;
        }
    }
    
    return false;
}

Test Cases

// Compare and branch fusion
void test_cbz(int x) {
    if (x == 0) {     // Should use cbz
        do_something();
    }
}

// Branch chain
void test_chain() {
    goto label1;
label1:
    goto label2;      // Should optimize to direct jump
label2:
    return;
}

// Layout optimization
int test_hot_path(int x) {
    if (x > 0) {      // Assume hot path
        return x * 2;
    } else {          // Cold path
        return complex_calculation(x);
    }
}

// Dead code elimination
int test_dead_after_branch() {
    if (condition) {
        return 1;
        x = 5;        // Dead code
    }
    return 0;
}

Architecture-Specific Optimizations

ARM

  • Utilize cbz/cbnz instructions
  • Use conditional execution (IT blocks in Thumb-2)
  • Optimize for pipeline characteristics

RISC-V

  • Use compressed branch instructions when possible
  • Optimize JAL vs JALR usage
  • Consider branch prediction hints

Performance Impact

  • 5-10% reduction in branch instructions
  • Improved branch prediction accuracy
  • Better I-cache utilization
  • Reduced pipeline stalls

Success Metrics

  • Reduction in total branch count
  • Improved fall-through rate
  • Fewer mispredicted branches
  • Smaller code size

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions