diff --git a/src/fmt/doc.go b/src/fmt/doc.go index eb97e51d5d..8b0c7d10af 100644 --- a/src/fmt/doc.go +++ b/src/fmt/doc.go @@ -244,31 +244,42 @@ Scanln, Fscanln and Sscanln stop scanning at a newline and require that the items be followed by a newline or EOF. - Scanf, Fscanf and Sscanf require that (after skipping spaces) - newlines in the format are matched by newlines in the input - and vice versa. This behavior differs from the corresponding - routines in C, which uniformly treat newlines as spaces. - - When scanning with Scanf, Fscanf, and Sscanf, all non-empty - runs of space characters (except newline) are equivalent - to a single space in both the format and the input. With - that proviso, text in the format string must match the input - text; scanning stops if it does not, with the return value - of the function indicating the number of arguments scanned. - Scanf, Fscanf, and Sscanf parse the arguments according to a - format string, analogous to that of Printf. For example, %x - will scan an integer as a hexadecimal number, and %v will scan - the default representation format for the value. + format string, analogous to that of Printf. In the text that + follows, 'space' means any Unicode whitespace character + except newline. - The formats behave analogously to those of Printf with the - following exceptions: + In the format string, a verb introduced by the % character + consumes and parses input; these verbs are described in more + detail below. A character other than %, space, or newline in + the format consumes exactly that input character, which must + be present. A newline with zero or more spaces before it in + the format string consumes zero or more spaces in the input + followed by a single newline or the end of the input. A space + following a newline in the format string consumes zero or more + spaces in the input. Otherwise, any run of one or more spaces + in the format string consumes as many spaces as possible in + the input. Unless the run of spaces in the format string + appears adjacent to a newline, the run must consume at least + one space from the input or find the end of the input. - %p is not implemented - %T is not implemented - %e %E %f %F %g %G are all equivalent and scan any floating point or complex value - %s and %v on strings scan a space-delimited token - Flags # and + are not implemented. + The handling of spaces and newlines differs from that of C's + scanf family: in C, newlines are treated as any other space, + and it is never an error when a run of spaces in the format + string finds no spaces to consume in the input. + + The verbs behave analogously to those of Printf. + For example, %x will scan an integer as a hexadecimal number, + and %v will scan the default representation format for the value. + The Printf verbs %p and %T and the flags # and + are not implemented, + and the verbs %e %E %f %F %g and %G are all equivalent and scan any + floating-point or complex value. + + Input processed by verbs is implicitly space-delimited: the + implementation of every verb except %c starts by discarding + leading spaces from the remaining input, and the %s verb + (and %v reading into a string) stops consuming input at the first + space or newline character. The familiar base-setting prefixes 0 (octal) and 0x (hexadecimal) are accepted when scanning integers without @@ -297,6 +308,9 @@ All arguments to be scanned must be either pointers to basic types or implementations of the Scanner interface. + Like Scanf and Fscanf, Sscanf need not consume its entire input. + There is no way to recover how much of the input string Sscanf used. + Note: Fscan etc. can read one character (rune) past the input they return, which means that a loop calling a scan routine may skip some of the input. This is usually a problem only diff --git a/src/fmt/scan.go b/src/fmt/scan.go index fdf419795d..cd7232c33c 100644 --- a/src/fmt/scan.go +++ b/src/fmt/scan.go @@ -1075,6 +1075,58 @@ func (s *ss) doScan(a []interface{}) (numProcessed int, err error) { func (s *ss) advance(format string) (i int) { for i < len(format) { fmtc, w := utf8.DecodeRuneInString(format[i:]) + + // Space processing. + // In the rest of this comment "space" means spaces other than newline. + // Newline in the format matches input of zero or more spaces and then newline or end-of-input. + // Spaces in the format before the newline are collapsed into the newline. + // Spaces in the format after the newline match zero or more spaces after the corresponding input newline. + // Other spaces in the format match input of one or more spaces or end-of-input. + if isSpace(fmtc) { + newlines := 0 + trailingSpace := false + for isSpace(fmtc) && i < len(format) { + if fmtc == '\n' { + newlines++ + trailingSpace = false + } else { + trailingSpace = true + } + i += w + fmtc, w = utf8.DecodeRuneInString(format[i:]) + } + for j := 0; j < newlines; j++ { + inputc := s.getRune() + for isSpace(inputc) && inputc != '\n' { + inputc = s.getRune() + } + if inputc != '\n' && inputc != eof { + s.errorString("newline in format does not match input") + } + } + if trailingSpace { + inputc := s.getRune() + if newlines == 0 { + // If the trailing space stood alone (did not follow a newline), + // it must find at least one space to consume. + if !isSpace(inputc) && inputc != eof { + s.errorString("expected space in input to match format") + } + if inputc == '\n' { + s.errorString("newline in input does not match format") + } + } + for isSpace(inputc) && inputc != '\n' { + inputc = s.getRune() + } + if inputc != eof { + s.UnreadRune() + } + } + continue + } + + // Verbs. if fmtc == '%' { // % at end of string is an error. if i+w == len(format) { @@ -1087,48 +1139,8 @@ func (s *ss) advance(format string) (i int) { } i += w // skip the first % } - sawSpace := false - wasNewline := false - // Skip spaces in format but absorb at most one newline. - for isSpace(fmtc) && i < len(format) { - if fmtc == '\n' { - if wasNewline { // Already saw one; stop here. - break - } - wasNewline = true - } - sawSpace = true - i += w - fmtc, w = utf8.DecodeRuneInString(format[i:]) - } - if sawSpace { - // There was space in the format, so there should be space - // in the input. - inputc := s.getRune() - if inputc == eof { - return - } - if !isSpace(inputc) { - // Space in format but not in input. - s.errorString("expected space in input to match format") - } - // Skip spaces but stop at newline. - for inputc != '\n' && isSpace(inputc) { - inputc = s.getRune() - } - if inputc == '\n' { - if !wasNewline { - s.errorString("newline in input does not match format") - } - // We've reached a newline, stop now; don't read further. - return - } - s.UnreadRune() - if wasNewline { - s.errorString("newline in format does not match input") - } - continue - } + + // Literals. inputc := s.mustReadRune() if fmtc != inputc { s.UnreadRune() diff --git a/src/fmt/scan_test.go b/src/fmt/scan_test.go index 9e58a6a5f7..d7019d9439 100644 --- a/src/fmt/scan_test.go +++ b/src/fmt/scan_test.go @@ -349,12 +349,12 @@ var scanfTests = []ScanfTest{ {"X%dX", " X27X ", &intVal, nil}, // input does not match format {"X%dX\n", "X27X", &intVal, 27}, - {"X%dX\n", "X27X ", &intVal, nil}, // newline in format does not match input + {"X%dX \n", "X27X ", &intVal, 27}, {"X%dX\n", "X27X\n", &intVal, 27}, {"X%dX\n", "X27X \n", &intVal, 27}, {"X%dX \n", "X27X", &intVal, 27}, - {"X%dX \n", "X27X ", &intVal, nil}, // newline in format does not match input + {"X%dX \n", "X27X ", &intVal, 27}, {"X%dX \n", "X27X\n", &intVal, 27}, {"X%dX \n", "X27X \n", &intVal, 27}, @@ -363,7 +363,7 @@ var scanfTests = []ScanfTest{ {"X %c", "X!", &runeVal, nil}, // expected space in input to match format {"X %c", "X\n", &runeVal, nil}, // newline in input does not match format {"X %c", "X !", &runeVal, '!'}, - {"X %c", "X \n", &runeVal, nil}, // newline in input does not match format + {"X %c", "X \n", &runeVal, '\n'}, {" X%dX", "X27X", &intVal, nil}, // expected space in input to match format {" X%dX", "X27X ", &intVal, nil}, // expected space in input to match format @@ -381,7 +381,7 @@ var scanfTests = []ScanfTest{ {" X%dX ", " X27X ", &intVal, 27}, {"%d\nX", "27\nX", &intVal, 27}, - {"%dX\n X", "27X\n X", &intVal, nil}, // input does not match format + {"%dX\n X", "27X\n X", &intVal, 27}, } var overflowTests = []ScanTest{ @@ -1230,18 +1230,18 @@ func TestScanfNewlineMatchFormat(t *testing.T) { {"space vs newline no-percent 0001", "1\n2", "1\n 2", 0, true}, {"space vs newline no-percent 0010", "1\n2", "1 \n2", 0, true}, {"space vs newline no-percent 0011", "1\n2", "1 \n 2", 0, true}, - {"space vs newline no-percent 0100", "1\n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern - {"space vs newline no-percent 0101", "1\n 2", "1\n2 ", 0, false}, // fails: space after nl in input but not pattern - {"space vs newline no-percent 0110", "1\n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern - {"space vs newline no-percent 0111", "1\n 2", "1 \n 2", 0, false}, // fails: hard to explain + {"space vs newline no-percent 0100", "1\n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern + {"space vs newline no-percent 0101", "1\n 2", "1\n2 ", 0, false}, // fails: space after nl in input but not pattern + {"space vs newline no-percent 0110", "1\n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern + {"space vs newline no-percent 0111", "1\n 2", "1 \n 2", 0, true}, {"space vs newline no-percent 1000", "1 \n2", "1\n2", 0, true}, {"space vs newline no-percent 1001", "1 \n2", "1\n 2", 0, true}, {"space vs newline no-percent 1010", "1 \n2", "1 \n2", 0, true}, {"space vs newline no-percent 1011", "1 \n2", "1 \n 2", 0, true}, - {"space vs newline no-percent 1100", "1 \n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern - {"space vs newline no-percent 1101", "1 \n 2", "1\n 2", 0, false}, // fails: hard to explain - {"space vs newline no-percent 1110", "1 \n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern - {"space vs newline no-percent 1111", "1 \n 2", "1 \n 2", 0, false}, // fails: hard to explain + {"space vs newline no-percent 1100", "1 \n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern + {"space vs newline no-percent 1101", "1 \n 2", "1\n 2", 0, true}, + {"space vs newline no-percent 1110", "1 \n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern + {"space vs newline no-percent 1111", "1 \n 2", "1 \n 2", 0, true}, } for _, test := range tests { var n int