Go 的 interface.Method 性能为什么会比 swith type 差？

测试代码：

package main

import (
	"testing"
)

type InterfaceA interface {
	Name() string
}

type InterfaceB interface {
	Name() string
	Add()
}

type A struct {
	v int
}

func (*A) Name() string {
	return "A"
}

func (a *A) Add() {
	a.v += 1
}

type B struct {
	A
}

func (*B) Name() string {
	return "B"
}

func BenchmarkNormal(b *testing.B) {
	switchFunc := func(v *A) {
		v.Add()
	}
	for i := 0; i < b.N; i++ {
		v := new(A)
		switchFunc(v)
	}
}

func BenchmarkInterface(b *testing.B) {
	switchFunc := func(v interface{}) {
		switch n := v.(type) {
		case *A:
			n.Add()
		case *B:
			n.Add()
		}
	}
	for i := 0; i < b.N; i++ {
		v := new(A)
		switchFunc(v)
	}
}

func BenchmarkInterface1(b *testing.B) {
	switchFunc := func(v InterfaceA) {
		switch v.Name() {
		case "A":
			v.(*A).Add()
		case "B":
			v.(*B).Add()
		}
	}
	for i := 0; i < b.N; i++ {
		v := new(A)
		switchFunc(v)
	}
}

func BenchmarkInterface2(b *testing.B) {
	switchFunc := func(v interface{}) {
		v.(InterfaceB).Add()
	}
	for i := 0; i < b.N; i++ {
		v := new(A)
		switchFunc(v)
	}
}

func BenchmarkInterface3(b *testing.B) {
	switchFunc := func(v InterfaceB) {
		v.Add()
	}
	for i := 0; i < b.N; i++ {
		v := new(A)
		switchFunc(v)
	}
}

func BenchmarkInterface4(b *testing.B) {
	switchFunc := func(v InterfaceB) {
		v.Name()
	}
	for i := 0; i < b.N; i++ {
		v := new(A)
		switchFunc(v)
	}
}

func BenchmarkInterface5(b *testing.B) {
	switchFunc := func(v InterfaceB) {
		v.Name()
		v.Add()
	}
	for i := 0; i < b.N; i++ {
		v := new(A)
		switchFunc(v)
	}
}

测试结果：

└──╼ go test -test.bench=".*" . -benchmem 
goos: darwin
goarch: amd64
pkg: org
cpu: Intel(R) Core(TM) i5-8279U CPU @ 2.40GHz
BenchmarkNormal-8       	1000000000	         0.2542 ns/op	       0 B/op	       0 allocs/op
BenchmarkInterface-8    	1000000000	         0.8415 ns/op	       0 B/op	       0 allocs/op
BenchmarkInterface1-8   	72095432	        15.48 ns/op	       8 B/op	       1 allocs/op
BenchmarkInterface2-8   	55137806	        21.07 ns/op	       8 B/op	       1 allocs/op
BenchmarkInterface3-8   	799164643	         1.449 ns/op	       0 B/op	       0 allocs/op
BenchmarkInterface4-8   	767046265	         1.519 ns/op	       0 B/op	       0 allocs/op
BenchmarkInterface5-8   	72075118	        15.82 ns/op	       8 B/op	       1 allocs/op
PASS
ok  	org	7.915s

还有就是 interface 接口单独测试 Name()或者 Add()性能都差不多，可一旦两个一起调用，性能几乎减少了 10 倍，有大佬研究过这个问题吗？