mirror of https://github.com/golang/go.git
This CL implements the math/bits.OnesCount{8,16,32,64} functions
as intrinsics on s390x using the 'population count' (popcnt)
instruction. This instruction was released as the 'population-count'
facility which uses the same facility bit (45) as the
'distinct-operands' facility which is a pre-requisite for Go on
s390x. We can therefore use it without a feature check.
The s390x popcnt instruction treats a 64 bit register as a vector
of 8 bytes, summing the number of ones in each byte individually.
It then writes the results to the corresponding bytes in the
output register. Therefore to implement OnesCount{16,32,64} we
need to sum the individual byte counts using some extra
instructions. To do this efficiently I've added some additional
pseudo operations to the s390x SSA backend.
Unlike other architectures the new instruction sequence is faster
for OnesCount8, so that is implemented using the intrinsic.
name old time/op new time/op delta
OnesCount 3.21ns ± 1% 1.35ns ± 0% -58.00% (p=0.000 n=20+20)
OnesCount8 0.91ns ± 1% 0.81ns ± 0% -11.43% (p=0.000 n=20+20)
OnesCount16 1.51ns ± 3% 1.21ns ± 0% -19.71% (p=0.000 n=20+17)
OnesCount32 1.91ns ± 0% 1.12ns ± 1% -41.60% (p=0.000 n=19+20)
OnesCount64 3.18ns ± 4% 1.35ns ± 0% -57.52% (p=0.000 n=20+20)
Change-Id: Id54f0bd28b6db9a887ad12c0d72fcc168ef9c4e0
Reviewed-on: https://go-review.googlesource.com/114675
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
|
||
|---|---|---|
| .. | ||
| alias3.dir | ||
| bench | ||
| chan | ||
| closure3.dir | ||
| codegen | ||
| ddd2.dir | ||
| dwarf | ||
| fixedbugs | ||
| import2.dir | ||
| import4.dir | ||
| interface | ||
| intrinsic.dir | ||
| ken | ||
| linkname.dir | ||
| method4.dir | ||
| retjmp.dir | ||
| stress | ||
| syntax | ||
| uintptrescapes.dir | ||
| 64bit.go | ||
| 235.go | ||
| README.md | ||
| alg.go | ||
| alias.go | ||
| alias1.go | ||
| alias2.go | ||
| alias3.go | ||
| align.go | ||
| append.go | ||
| append1.go | ||
| args.go | ||
| armimm.go | ||
| assign.go | ||
| assign1.go | ||
| atomicload.go | ||
| bigalg.go | ||
| bigmap.go | ||
| blank.go | ||
| blank1.go | ||
| bom.go | ||
| bombad.go | ||
| bounds.go | ||
| chancap.go | ||
| chanlinear.go | ||
| char_lit.go | ||
| char_lit1.go | ||
| checkbce.go | ||
| clearfat.go | ||
| closedchan.go | ||
| closure.go | ||
| closure1.go | ||
| closure2.go | ||
| closure3.go | ||
| closure4.go | ||
| cmp.go | ||
| cmp6.go | ||
| cmplx.go | ||
| cmplxdivide.c | ||
| cmplxdivide.go | ||
| cmplxdivide1.go | ||
| complit.go | ||
| complit1.go | ||
| compos.go | ||
| const.go | ||
| const1.go | ||
| const2.go | ||
| const3.go | ||
| const4.go | ||
| const5.go | ||
| const6.go | ||
| convT2X.go | ||
| convert.go | ||
| convert1.go | ||
| convert2.go | ||
| convert3.go | ||
| convlit.go | ||
| convlit1.go | ||
| copy.go | ||
| copy1.go | ||
| crlf.go | ||
| ddd.go | ||
| ddd1.go | ||
| ddd2.go | ||
| decl.go | ||
| declbad.go | ||
| defer.go | ||
| deferfin.go | ||
| deferprint.go | ||
| deferprint.out | ||
| devirt.go | ||
| divide.go | ||
| divmod.go | ||
| empty.go | ||
| env.go | ||
| eof.go | ||
| eof1.go | ||
| escape.go | ||
| escape2.go | ||
| escape2n.go | ||
| escape3.go | ||
| escape4.go | ||
| escape5.go | ||
| escape_array.go | ||
| escape_because.go | ||
| escape_calls.go | ||
| escape_closure.go | ||
| escape_field.go | ||
| escape_iface.go | ||
| escape_indir.go | ||
| escape_level.go | ||
| escape_map.go | ||
| escape_param.go | ||
| escape_slice.go | ||
| escape_struct_param1.go | ||
| escape_struct_param2.go | ||
| escape_struct_return.go | ||
| fibo.go | ||
| finprofiled.go | ||
| float_lit.go | ||
| float_lit2.go | ||
| float_lit3.go | ||
| floatcmp.go | ||
| for.go | ||
| func.go | ||
| func1.go | ||
| func2.go | ||
| func3.go | ||
| func4.go | ||
| func5.go | ||
| func6.go | ||
| func7.go | ||
| func8.go | ||
| funcdup.go | ||
| funcdup2.go | ||
| gc.go | ||
| gc1.go | ||
| gc2.go | ||
| gcgort.go | ||
| gcstring.go | ||
| goprint.go | ||
| goprint.out | ||
| goto.go | ||
| heapsampling.go | ||
| helloworld.go | ||
| helloworld.out | ||
| if.go | ||
| import.go | ||
| import1.go | ||
| import2.go | ||
| import4.go | ||
| import5.go | ||
| import6.go | ||
| index.go | ||
| index0.go | ||
| index1.go | ||
| index2.go | ||
| indirect.go | ||
| indirect1.go | ||
| init.go | ||
| init1.go | ||
| initcomma.go | ||
| initialize.go | ||
| initializerr.go | ||
| initloop.go | ||
| inline.go | ||
| inline_big.go | ||
| inline_caller.go | ||
| inline_callers.go | ||
| inline_literal.go | ||
| inline_math_bits_rotate.go | ||
| inline_variadic.go | ||
| int_lit.go | ||
| intcvt.go | ||
| intrinsic.go | ||
| intrinsic_atomic.go | ||
| iota.go | ||
| label.go | ||
| label1.go | ||
| linkmain.go | ||
| linkmain_run.go | ||
| linkname.go | ||
| linkobj.go | ||
| linkx.go | ||
| linkx_run.go | ||
| literal.go | ||
| live.go | ||
| live1.go | ||
| live2.go | ||
| live_syscall.go | ||
| locklinear.go | ||
| loopbce.go | ||
| makechan.go | ||
| makemap.go | ||
| makenew.go | ||
| mallocfin.go | ||
| map.go | ||
| map1.go | ||
| mapclear.go | ||
| maplinear.go | ||
| mergemul.go | ||
| method.go | ||
| method1.go | ||
| method2.go | ||
| method3.go | ||
| method4.go | ||
| method5.go | ||
| method6.go | ||
| method7.go | ||
| named.go | ||
| named1.go | ||
| nil.go | ||
| nilcheck.go | ||
| nilptr.go | ||
| nilptr2.go | ||
| nilptr3.go | ||
| nilptr3_wasm.go | ||
| nilptr4.go | ||
| nosplit.go | ||
| notinheap.go | ||
| notinheap2.go | ||
| notinheap3.go | ||
| nowritebarrier.go | ||
| nul1.go | ||
| opt_branchlikely.go | ||
| parentype.go | ||
| peano.go | ||
| phiopt.go | ||
| print.go | ||
| print.out | ||
| printbig.go | ||
| printbig.out | ||
| prove.go | ||
| range.go | ||
| recover.go | ||
| recover1.go | ||
| recover2.go | ||
| recover3.go | ||
| recover4.go | ||
| recover5.go | ||
| reflectmethod1.go | ||
| reflectmethod2.go | ||
| reflectmethod3.go | ||
| reflectmethod4.go | ||
| rename.go | ||
| rename1.go | ||
| reorder.go | ||
| reorder2.go | ||
| retjmp.go | ||
| return.go | ||
| rotate.go | ||
| rotate0.go | ||
| rotate1.go | ||
| rotate2.go | ||
| rotate3.go | ||
| run.go | ||
| rune.go | ||
| runtime.go | ||
| shift1.go | ||
| shift2.go | ||
| sieve.go | ||
| sigchld.go | ||
| sigchld.out | ||
| simassign.go | ||
| sinit.go | ||
| sinit_run.go | ||
| sizeof.go | ||
| slice3.go | ||
| slice3err.go | ||
| slicecap.go | ||
| sliceopt.go | ||
| solitaire.go | ||
| stack.go | ||
| strcopy.go | ||
| strength.go | ||
| string_lit.go | ||
| stringrange.go | ||
| struct0.go | ||
| switch.go | ||
| switch2.go | ||
| switch3.go | ||
| switch4.go | ||
| switch5.go | ||
| switch6.go | ||
| switch7.go | ||
| tinyfin.go | ||
| torture.go | ||
| turing.go | ||
| typecheck.go | ||
| typecheckloop.go | ||
| typeswitch.go | ||
| typeswitch1.go | ||
| typeswitch2.go | ||
| typeswitch3.go | ||
| uintptrescapes.go | ||
| uintptrescapes2.go | ||
| undef.go | ||
| unsafereject1.go | ||
| unsafereject2.go | ||
| utf.go | ||
| varerr.go | ||
| varinit.go | ||
| writebarrier.go | ||
| zerodivide.go | ||
README.md
The test directory contains tests of the Go tool chain and runtime. It includes black box tests, regression tests, and error output tests. They are run as part of all.bash.
To run just these tests, execute:
../bin/go run run.go
Standard library tests should be written as regular Go tests in the appropriate package.
The tool chain and runtime also have regular Go tests in their packages. The main reasons to add a new test to this directory are:
- it is most naturally expressed using the test runner; or
- it is also applicable to
gccgoand other Go tool chains.