diff -r 971d67e92dbf -r 7115e0d6c0ba lib/libc/sys/ptrace.2
--- a/lib/libc/sys/ptrace.2	Mon Jun 05 11:46:51 2006 +0200
+++ b/lib/libc/sys/ptrace.2	Mon Jun 05 20:14:31 2006 +0200
@@ -299,7 +299,55 @@ This allows the structure to grow withou
 This allows the structure to grow without affecting older programs.
 .El
 .Pp
-Additionally, machine-specific requests can exist.
+Additionally, machine-specific requests can exist. On i386 systems where
+the kernel has been configured with "options BRANCH_SINGLE_STEP", these
+include
+.Bl -tag -width 12n
+.hw PT_BSSTEP
+.hw PT_LASTBRANCH
+.hw PT_LASTEXCEPTION
+.hw BRANCH_SINGLE_STEP
+.It Dv PT_BSSTEP
+This request modifies the behaviour of the PT_STEP request.
+.Pp
+If the 
+.Fa data 
+argument to PT_BSSTEP is non-zero, the behaviour of PT_STEP is 
+changed to "single step on branches", i.e. a PT_STEP request will continue 
+execution of the ptraced program until it executes a branch instruction (e.g. JMP, 
+JCC, CALL, RET, etc.) or generates an interrupt or exception. 
+.Pp
+If the 
+.Fa data 
+argument is zero, PT_BSSTEP restores the default behaviour of the 
+PT_STEP request, i.e. single step on every instruction.
+.It Dv PT_LASTBRANCH
+When single-stepping on branches, this request will return the addresses
+of the last branch instruction taken and the target of that branch.
+The
+.Fa addr
+argument specifies a pointer to a
+.Vt "struct ptrace_bss_addrs" ,
+which is defined as follows:
+.Bd -literal
+struct ptrace_bss_addrs {
+	int	from;
+	int	to;
+};
+.Ed
+.It Dv PT_LASTEXCEPTION
+When single-stepping on branches, this request will return the address
+of the instruction that was interrupted by the last exception or interrupt
+(except for debug exceptions) and the address of the exception or interrupt 
+handler that was called to service the exception or interrupt. The
+.Fa addr
+argument specifies a pointer to a 
+.Vt "struct ptrace_bss_addrs".
+.El
+.Pp
+The result of PT_LASTBRANCH and PT_LASTEXCEPTION is undefined when not single
+stepping on branches. The PT_BSSTEP, PT_LASTBRANCH and PT_LASTEXCEPTION requests
+will return EINVAL if the kernel was compiled without "options BRANCH_SINGLE_STEP".
 .Sh RETURN VALUES
 Some requests can cause
 .Fn ptrace
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/conf/options.i386
--- a/sys/conf/options.i386	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/conf/options.i386	Mon Jun 05 20:14:31 2006 +0200
@@ -163,3 +163,6 @@ ASR_COMPAT		opt_asr.h
 # Debugging
 KDB_STOP_NMI		opt_kdb.h
 NPX_DEBUG		opt_npx.h
+
+# Branch Single Stepping
+BRANCH_SINGLE_STEP             opt_bss.h
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/conf/NOTES
--- a/sys/i386/conf/NOTES	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/conf/NOTES	Mon Jun 05 20:14:31 2006 +0200
@@ -1098,3 +1098,6 @@ device		io
 # asr old ioctls support, needed by raidutils
 
 options		ASR_COMPAT
+
+# Branch Single Stepping
+options		BRANCH_SINGLE_STEP
\ No newline at end of file
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/i386/exception.s
--- a/sys/i386/i386/exception.s	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/i386/exception.s	Mon Jun 05 20:14:31 2006 +0200
@@ -32,10 +32,15 @@
 
 #include "opt_apic.h"
 #include "opt_npx.h"
+#include "opt_bss.h"
 
 #include <machine/asmacros.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
+
+#ifdef BRANCH_SINGLE_STEP
+#include <machine/specialreg.h>
+#endif
 
 #include "assym.s"
 
@@ -304,6 +309,33 @@ doreti_exit:
 doreti_exit:
 	MEXITCOUNT
 
+#ifdef BRANCH_SINGLE_STEP
+	movl    PCPU(CURPCB),%edx
+	testl	$PCB_BRANCH_SINGLE_STEP,PCB_FLAGS(%edx)
+	jnz 1f
+	/*
+         * Static branch prediction should predict to not take this branch,
+	 * so we'll handle the default case here and accept a misprediction
+	 * if we're using branch single-stepping.
+	 */
+
+	xorl	%eax,%eax
+	jmp	2f
+
+	/* 
+	 * Prepare to write the DebugCtlMSR, lower part is in %eax, higher part in %edx, but not in use
+	 * Bit 0: If set, record last branch/exception information in the Last(Branch|Exception)(From|To)IP MSRs
+	 * Bit 1: If set, change semantics of the single step flag (TF) in EFLAGS to single step only on branches
+	 * We want both.
+	 */
+1:
+        movl	$3,%eax	
+	
+2:
+	xorl	%edx,%edx
+	movl	$MSR_DEBUGCTLMSR,%ecx
+	wrmsr
+#endif
 	.globl	doreti_popl_fs
 doreti_popl_fs:
 	popl	%fs
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/i386/genassym.c
--- a/sys/i386/i386/genassym.c	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/i386/genassym.c	Mon Jun 05 20:14:31 2006 +0200
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/ge
 #include "opt_apic.h"
 #include "opt_compat.h"
 #include "opt_kstack_pages.h"
+#include "opt_bss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -130,6 +131,13 @@ ASSYM(PCB_DR3, offsetof(struct pcb, pcb_
 ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
+#ifdef BRANCH_SINGLE_STEP
+ASSYM(PCB_LASTBRANCHFROMIP, offsetof(struct pcb, pcb_lastbranchfromip));
+ASSYM(PCB_LASTBRANCHTOIP, offsetof(struct pcb, pcb_lastbranchtoip));
+ASSYM(PCB_LASTEXCEPTIONFROMIP, offsetof(struct pcb, pcb_lastexceptionfromip));
+ASSYM(PCB_LASTEXCEPTIONTOIP, offsetof(struct pcb, pcb_lastexceptiontoip));
+ASSYM(PCB_BRANCH_SINGLE_STEP, PCB_BRANCH_SINGLE_STEP);
+#endif
 ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/i386/ptrace_machdep.c
--- a/sys/i386/i386/ptrace_machdep.c	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/i386/ptrace_machdep.c	Mon Jun 05 20:14:31 2006 +0200
@@ -29,6 +29,7 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/pt
 __FBSDID("$FreeBSD: src/sys/i386/i386/ptrace_machdep.c,v 1.3.2.1 2005/08/11 14:28:42 tobez Exp $");
 
 #include "opt_cpu.h"
+#include "opt_bss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -36,6 +37,7 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/pt
 #include <sys/ptrace.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
+#include <machine/specialreg.h>
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
@@ -44,13 +46,17 @@ int
 int
 cpu_ptrace(struct thread *td, int req, void *addr, int data)
 {
+	int error=0;
+	struct ptrace_bss_addrs *bss_addrs;
+
 #ifdef CPU_ENABLE_SSE
-	int error;
-
 	if (!cpu_fxsr)
 		return (EINVAL);
+#endif
+
 
 	switch (req) {
+#ifdef CPU_ENABLE_SSE
 	case PT_GETXMMREGS:
 		error = copyout(&td->td_pcb->pcb_save.sv_xmm, addr,
 		    sizeof(td->td_pcb->pcb_save.sv_xmm));
@@ -60,13 +66,36 @@ cpu_ptrace(struct thread *td, int req, v
 		error = copyin(addr, &td->td_pcb->pcb_save.sv_xmm,
 		    sizeof(td->td_pcb->pcb_save.sv_xmm));
 		break;
-
+#endif
+#ifdef BRANCH_SINGLE_STEP	
+	case PT_BSSTEP:
+                error = 0;
+		if( data != 0) {
+		   td->td_pcb->pcb_flags |= PCB_BRANCH_SINGLE_STEP;
+                   printf( "Enabled branch single stepping for some process\n");
+		} else {
+		   td->td_pcb->pcb_flags &= ~PCB_BRANCH_SINGLE_STEP;
+                   printf( "Disabled branch single stepping for some process\n");
+		}
+		break;
+	case PT_LASTBRANCH:
+	        bss_addrs = addr;
+                error = copyout(&td->td_pcb->pcb_lastbranchfromip, &(bss_addrs->from), sizeof(td->td_pcb->pcb_lastbranchfromip));
+                if( error == 0) {
+                   error = copyout(&td->td_pcb->pcb_lastbranchtoip, &(bss_addrs->to), sizeof(td->td_pcb->pcb_lastbranchtoip));
+                }
+		break;
+	case PT_LASTEXCEPTION:
+	        bss_addrs = addr;
+                error = copyout(&td->td_pcb->pcb_lastexceptionfromip, &(bss_addrs->from), sizeof(td->td_pcb->pcb_lastexceptionfromip));
+                if( error == 0) {
+                   error = copyout(&td->td_pcb->pcb_lastexceptiontoip, &(bss_addrs->to), sizeof(td->td_pcb->pcb_lastexceptiontoip));
+                }
+		break;
+#endif
 	default:
 		return (EINVAL);
 	}
 
 	return (error);
-#else
-	return (EINVAL);
-#endif
 }
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/i386/swtch.s
--- a/sys/i386/i386/swtch.s	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/i386/swtch.s	Mon Jun 05 20:14:31 2006 +0200
@@ -33,8 +33,10 @@
  */
 
 #include "opt_npx.h"
+#include "opt_bss.h"
 
 #include <machine/asmacros.h>
+#include <machine/specialreg.h>
 
 #include "assym.s"
 
@@ -305,7 +307,26 @@ cpu_switch_load_gs:
 	andl	$~0x0000fc00,%ecx
 	orl     %ecx,%eax
 	movl    %eax,%dr7
-1:
+1:	
+#ifdef BRANCH_SINGLE_STEP
+	/* 
+         * Set DebugCtlMSR if used by the current process 
+	 * Trying to reduce register/stack usage
+         */
+	movl	$MSR_DEBUGCTLMSR,%ecx
+
+	testl	$PCB_BRANCH_SINGLE_STEP,PCB_FLAGS(%edx)
+	jnz	1f
+
+	xorl	%eax,%eax	/* Default case: don't use branch single stepping or last branch recording */
+	jmp	2f
+
+1:
+	movl	$3,%eax		/* Use branch single stepping and last branch recording */
+
+2:	xorl	%edx,%edx
+	wrmsr
+#endif
 	ret
 
 #ifdef INVARIANTS
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/i386/trap.c
--- a/sys/i386/i386/trap.c	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/i386/trap.c	Mon Jun 05 20:14:31 2006 +0200
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/tr
 #include "opt_ktrace.h"
 #include "opt_npx.h"
 #include "opt_trap.h"
+#include "opt_bss.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -101,6 +102,7 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/tr
 #include <machine/clock.h>
 #endif
 
+#include <machine/specialreg.h>
 extern void trap(struct trapframe frame);
 extern void syscall(struct trapframe frame);
 
@@ -287,6 +289,54 @@ trap(frame)
 
 		case T_BPTFLT:		/* bpt instruction fault */
 		case T_TRCTRAP:		/* trace trap */
+#ifdef BRANCH_SINGLE_STEP
+			/* 
+			 * Is this process being single stepped on branches?
+			 * In that case, save the last branch recording MSRs someplace safe
+                         * XXX: We probably need to save the last exception MSRs more often
+                         * XXX: Check Intel docs:
+                         *       - Under what circumstances does the CPU generate debug traps?
+                         *       - Under what circumstances does the CPU update the last exception MSRs?
+			 */
+			if( PCPU_GET(curpcb)->pcb_flags & PCB_BRANCH_SINGLE_STEP) {
+				__asm__(
+					"mov %1,%%ecx\n\t"
+					"rdmsr\n\t"
+					"mov %%eax,%0\n\t"
+					: "=m"(i)
+					: "i"(MSR_LASTBRANCHFROMIP)
+					: "%ecx", "%eax", "%edx"
+					);
+				PCPU_GET(curpcb)->pcb_lastbranchfromip=i;
+				__asm__(
+					"mov %1,%%ecx\n\t"
+					"rdmsr\n\t"
+					"mov %%eax,%0\n\t"
+					: "=m"(i)
+					: "i"(MSR_LASTBRANCHTOIP)
+					: "%ecx", "%eax", "%edx"
+					);
+				PCPU_GET(curpcb)->pcb_lastbranchtoip=i;
+				__asm__(
+					"mov %1,%%ecx\n\t"
+					"rdmsr\n\t"
+					"mov %%eax,%0\n\t"
+					: "=m"(i)
+					: "i"(MSR_LASTINTFROMIP)
+					: "%ecx", "%eax", "%edx"
+					);
+				PCPU_GET(curpcb)->pcb_lastexceptionfromip=i;
+				__asm__(
+					"mov %1,%%ecx\n\t"
+					"rdmsr\n\t"
+					"mov %%eax,%0\n\t"
+					: "=m"(i)
+					: "i"(MSR_LASTINTTOIP)
+					: "%ecx", "%eax", "%edx"
+					);
+				PCPU_GET(curpcb)->pcb_lastexceptiontoip=i;
+                        }
+#endif
 			enable_intr();
 			frame.tf_eflags &= ~PSL_T;
 			i = SIGTRAP;
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/include/pcb.h
--- a/sys/i386/include/pcb.h	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/include/pcb.h	Mon Jun 05 20:14:31 2006 +0200
@@ -60,6 +60,11 @@ struct pcb {
 	int     pcb_dr6;
 	int     pcb_dr7;
 
+        int     pcb_lastbranchfromip;
+        int     pcb_lastbranchtoip;
+        int     pcb_lastexceptionfromip;
+        int     pcb_lastexceptiontoip;
+
 	union	savefpu	pcb_save;
 	u_int	pcb_flags;
 #define	FP_SOFTFP	0x01	/* process using software fltng pnt emulator */
@@ -67,6 +72,7 @@ struct pcb {
 #define	PCB_NPXTRAP	0x04	/* npx trap pending */
 #define	PCB_NPXINITDONE	0x08	/* fpu state is initialized */
 #define	PCB_VM86CALL	0x10	/* in vm86 call */
+#define PCB_BRANCH_SINGLE_STEP 0x20
 
 	caddr_t	pcb_onfault;	/* copyin/out fault recovery */
 	int	pcb_gs;
diff -r 971d67e92dbf -r 7115e0d6c0ba sys/i386/include/ptrace.h
--- a/sys/i386/include/ptrace.h	Mon Jun 05 11:46:51 2006 +0200
+++ b/sys/i386/include/ptrace.h	Mon Jun 05 20:14:31 2006 +0200
@@ -35,7 +35,16 @@
 
 #define	__HAVE_PTRACE_MACHDEP
 
+struct ptrace_bss_addrs {
+    int from;
+    int to;
+};
+
 #define PT_GETXMMREGS	(PT_FIRSTMACH + 0)
 #define PT_SETXMMREGS	(PT_FIRSTMACH + 1)
 
+#define PT_BSSTEP (PT_FIRSTMACH +2)
+#define PT_LASTBRANCH (PT_FIRSTMACH +3)
+#define PT_LASTEXCEPTION (PT_FIRSTMACH +4)
+
 #endif
