net: fix a race in TestLookupContextCancel

If the actual DNS lookup in LookupIPAddr completes quickly enough,
it may succeed even if the passed-in Context is already canceled.
That would (rarely) cause TestLookupContextCancel to fail due to an
unexpectedly-nil error.

This change uses the existing testHookLookupIP hook to delay the
cancellation until the lookup has started (to try to provoke the code
path for which the test was added), and then block the lookup result
until LookupIPAddr has noticed it.

Fixes #51084
Updates #22724

Change-Id: I331ac61a652ac88f6d4c85bf62466237b76d53ed
Reviewed-on: https://go-review.googlesource.com/c/go/+/384237
Trust: Bryan Mills <bcmills@google.com>
Run-TryBot: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Bryan C. Mills 2022-02-08 17:09:40 -05:00 committed by Bryan Mills
parent 9cec77ac11
commit 846c06d33b
1 changed files with 57 additions and 12 deletions

View File

@ -883,21 +883,66 @@ func TestLookupNonLDH(t *testing.T) {
func TestLookupContextCancel(t *testing.T) {
mustHaveExternalNetwork(t)
defer dnsWaitGroup.Wait()
testenv.SkipFlakyNet(t)
ctx, ctxCancel := context.WithCancel(context.Background())
ctxCancel()
_, err := DefaultResolver.LookupIPAddr(ctx, "google.com")
if err.(*DNSError).Err != errCanceled.Error() {
testenv.SkipFlakyNet(t)
t.Fatal(err)
}
ctx = context.Background()
_, err = DefaultResolver.LookupIPAddr(ctx, "google.com")
origTestHookLookupIP := testHookLookupIP
defer func() {
dnsWaitGroup.Wait()
testHookLookupIP = origTestHookLookupIP
}()
lookupCtx, cancelLookup := context.WithCancel(context.Background())
unblockLookup := make(chan struct{})
// Set testHookLookupIP to start a new, concurrent call to LookupIPAddr
// and cancel the original one, then block until the canceled call has returned
// (ensuring that it has performed any synchronous cleanup).
testHookLookupIP = func(
ctx context.Context,
fn func(context.Context, string, string) ([]IPAddr, error),
network string,
host string,
) ([]IPAddr, error) {
select {
case <-unblockLookup:
default:
// Start a concurrent LookupIPAddr for the same host while the caller is
// still blocked, and sleep a little to give it time to be deduplicated
// before we cancel (and unblock) the caller.
// (If the timing doesn't quite work out, we'll end up testing sequential
// calls instead of concurrent ones, but the test should still pass.)
t.Logf("starting concurrent LookupIPAddr")
dnsWaitGroup.Add(1)
go func() {
defer dnsWaitGroup.Done()
_, err := DefaultResolver.LookupIPAddr(context.Background(), host)
if err != nil {
testenv.SkipFlakyNet(t)
t.Fatal(err)
t.Error(err)
}
}()
time.Sleep(1 * time.Millisecond)
}
cancelLookup()
<-unblockLookup
// If the concurrent lookup above is deduplicated to this one
// (as we expect to happen most of the time), it is important
// that the original call does not cancel the shared Context.
// (See https://go.dev/issue/22724.) Explicitly check for
// cancellation now, just in case fn itself doesn't notice it.
if err := ctx.Err(); err != nil {
t.Logf("testHookLookupIP canceled")
return nil, err
}
t.Logf("testHookLookupIP performing lookup")
return fn(ctx, network, host)
}
_, err := DefaultResolver.LookupIPAddr(lookupCtx, "google.com")
if dnsErr, ok := err.(*DNSError); !ok || dnsErr.Err != errCanceled.Error() {
t.Errorf("unexpected error from canceled, blocked LookupIPAddr: %v", err)
}
close(unblockLookup)
}
// Issue 24330: treat the nil *Resolver like a zero value. Verify nothing