unified.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package diff
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"regexp"
    11  	"strconv"
    12  	"strings"
    13  )
    14  
    15  // DefaultContextLines is the number of unchanged lines of surrounding
    16  // context displayed by Unified. Use ToUnified to specify a different value.
    17  const DefaultContextLines = 3
    18  
    19  // Unified returns a unified diff of the old and new strings.
    20  // The old and new labels are the names of the old and new files.
    21  // If the strings are equal, it returns the empty string.
    22  func Unified(oldLabel, newLabel, old, new string) string {
    23  	edits := Strings(old, new)
    24  	unified, err := ToUnified(oldLabel, newLabel, old, edits, DefaultContextLines)
    25  	if err != nil {
    26  		// Can't happen: edits are consistent.
    27  		log.Fatalf("internal error in diff.Unified: %v", err)
    28  	}
    29  	return unified
    30  }
    31  
    32  // ToUnified applies the edits to content and returns a unified diff,
    33  // with contextLines lines of (unchanged) context around each diff hunk.
    34  // The old and new labels are the names of the content and result files.
    35  // It returns an error if the edits are inconsistent; see ApplyEdits.
    36  func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines int) (string, error) {
    37  	u, err := toUnified(oldLabel, newLabel, content, edits, contextLines)
    38  	if err != nil {
    39  		return "", err
    40  	}
    41  	return u.String(), nil
    42  }
    43  
    44  // unified represents a set of edits as a unified diff.
    45  type unified struct {
    46  	// from is the name of the original file.
    47  	from string
    48  	// to is the name of the modified file.
    49  	to string
    50  	// hunks is the set of edit hunks needed to transform the file content.
    51  	hunks []*hunk
    52  }
    53  
    54  // Hunk represents a contiguous set of line edits to apply.
    55  type hunk struct {
    56  	// The line in the original source where the hunk starts.
    57  	fromLine int
    58  	// The line in the original source where the hunk finishes.
    59  	toLine int
    60  	// The set of line based edits to apply.
    61  	lines []line
    62  }
    63  
    64  // Line represents a single line operation to apply as part of a Hunk.
    65  type line struct {
    66  	// kind is the type of line this represents, deletion, insertion or copy.
    67  	kind opKind
    68  	// content is the content of this line.
    69  	// For deletion it is the line being removed, for all others it is the line
    70  	// to put in the output.
    71  	content string
    72  }
    73  
    74  // opKind is used to denote the type of operation a line represents.
    75  type opKind int
    76  
    77  const (
    78  	// opDelete is the operation kind for a line that is present in the input
    79  	// but not in the output.
    80  	opDelete opKind = iota
    81  	// opInsert is the operation kind for a line that is new in the output.
    82  	opInsert
    83  	// opEqual is the operation kind for a line that is the same in the input and
    84  	// output, often used to provide context around edited lines.
    85  	opEqual
    86  )
    87  
    88  // String returns a human readable representation of an OpKind. It is not
    89  // intended for machine processing.
    90  func (k opKind) String() string {
    91  	switch k {
    92  	case opDelete:
    93  		return "delete"
    94  	case opInsert:
    95  		return "insert"
    96  	case opEqual:
    97  		return "equal"
    98  	default:
    99  		panic("unknown operation kind")
   100  	}
   101  }
   102  
   103  // toUnified takes a file contents and a sequence of edits, and calculates
   104  // a unified diff that represents those edits.
   105  func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) {
   106  	gap := contextLines * 2
   107  	u := unified{
   108  		from: fromName,
   109  		to:   toName,
   110  	}
   111  	if len(edits) == 0 {
   112  		return u, nil
   113  	}
   114  	var err error
   115  	edits, err = lineEdits(content, edits) // expand to whole lines
   116  	if err != nil {
   117  		return u, err
   118  	}
   119  	lines, _ := splitLines(content)
   120  	var h *hunk
   121  	last := 0
   122  	toLine := 0
   123  	for _, edit := range edits {
   124  		// Compute the zero-based line numbers of the edit start and end.
   125  		// TODO(adonovan): opt: compute incrementally, avoid O(n^2).
   126  		start := strings.Count(content[:edit.Start], "\n")
   127  		end := strings.Count(content[:edit.End], "\n")
   128  		if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' {
   129  			end++ // EOF counts as an implicit newline
   130  		}
   131  
   132  		switch {
   133  		case h != nil && start == last:
   134  			//direct extension
   135  		case h != nil && start <= last+gap:
   136  			//within range of previous lines, add the joiners
   137  			addEqualLines(h, lines, last, start)
   138  		default:
   139  			//need to start a new hunk
   140  			if h != nil {
   141  				// add the edge to the previous hunk
   142  				addEqualLines(h, lines, last, last+contextLines)
   143  				u.hunks = append(u.hunks, h)
   144  			}
   145  			toLine += start - last
   146  			h = &hunk{
   147  				fromLine: start + 1,
   148  				toLine:   toLine + 1,
   149  			}
   150  			// add the edge to the new hunk
   151  			delta := addEqualLines(h, lines, start-contextLines, start)
   152  			h.fromLine -= delta
   153  			h.toLine -= delta
   154  		}
   155  		last = start
   156  		for i := start; i < end; i++ {
   157  			h.lines = append(h.lines, line{kind: opDelete, content: lines[i]})
   158  			last++
   159  		}
   160  		if edit.New != "" {
   161  			v, _ := splitLines(edit.New)
   162  			for _, content := range v {
   163  				h.lines = append(h.lines, line{kind: opInsert, content: content})
   164  				toLine++
   165  			}
   166  		}
   167  	}
   168  	if h != nil {
   169  		// add the edge to the final hunk
   170  		addEqualLines(h, lines, last, last+contextLines)
   171  		u.hunks = append(u.hunks, h)
   172  	}
   173  	return u, nil
   174  }
   175  
   176  // split into lines removing a final empty line,
   177  // and also return the offsets of the line beginnings.
   178  func splitLines(text string) ([]string, []int) {
   179  	var lines []string
   180  	offsets := []int{0}
   181  	start := 0
   182  	for i, r := range text {
   183  		if r == '\n' {
   184  			lines = append(lines, text[start:i+1])
   185  			start = i + 1
   186  			offsets = append(offsets, start)
   187  		}
   188  	}
   189  	if start < len(text) {
   190  		lines = append(lines, text[start:])
   191  		offsets = append(offsets, len(text))
   192  	}
   193  	return lines, offsets
   194  }
   195  
   196  func addEqualLines(h *hunk, lines []string, start, end int) int {
   197  	delta := 0
   198  	for i := start; i < end; i++ {
   199  		if i < 0 {
   200  			continue
   201  		}
   202  		if i >= len(lines) {
   203  			return delta
   204  		}
   205  		h.lines = append(h.lines, line{kind: opEqual, content: lines[i]})
   206  		delta++
   207  	}
   208  	return delta
   209  }
   210  
   211  // String converts a unified diff to the standard textual form for that diff.
   212  // The output of this function can be passed to tools like patch.
   213  func (u unified) String() string {
   214  	if len(u.hunks) == 0 {
   215  		return ""
   216  	}
   217  	b := new(strings.Builder)
   218  	fmt.Fprintf(b, "--- %s\n", u.from)
   219  	fmt.Fprintf(b, "+++ %s\n", u.to)
   220  	for _, hunk := range u.hunks {
   221  		fromCount, toCount := 0, 0
   222  		for _, l := range hunk.lines {
   223  			switch l.kind {
   224  			case opDelete:
   225  				fromCount++
   226  			case opInsert:
   227  				toCount++
   228  			default:
   229  				fromCount++
   230  				toCount++
   231  			}
   232  		}
   233  		fmt.Fprint(b, "@@")
   234  		if fromCount > 1 {
   235  			fmt.Fprintf(b, " -%d,%d", hunk.fromLine, fromCount)
   236  		} else if hunk.fromLine == 1 && fromCount == 0 {
   237  			// Match odd GNU diff -u behavior adding to empty file.
   238  			fmt.Fprintf(b, " -0,0")
   239  		} else {
   240  			fmt.Fprintf(b, " -%d", hunk.fromLine)
   241  		}
   242  		if toCount > 1 {
   243  			fmt.Fprintf(b, " +%d,%d", hunk.toLine, toCount)
   244  		} else if hunk.toLine == 1 && toCount == 0 {
   245  			// Match odd GNU diff -u behavior adding to empty file.
   246  			fmt.Fprintf(b, " +0,0")
   247  		} else {
   248  			fmt.Fprintf(b, " +%d", hunk.toLine)
   249  		}
   250  		fmt.Fprint(b, " @@\n")
   251  		for _, l := range hunk.lines {
   252  			switch l.kind {
   253  			case opDelete:
   254  				fmt.Fprintf(b, "-%s", l.content)
   255  			case opInsert:
   256  				fmt.Fprintf(b, "+%s", l.content)
   257  			default:
   258  				fmt.Fprintf(b, " %s", l.content)
   259  			}
   260  			if !strings.HasSuffix(l.content, "\n") {
   261  				fmt.Fprintf(b, "\n\\ No newline at end of file\n")
   262  			}
   263  		}
   264  	}
   265  	return b.String()
   266  }
   267  
   268  // ApplyUnified applies the unified diffs.
   269  func ApplyUnified(udiffs, bef string) (string, error) {
   270  	before := strings.Split(bef, "\n")
   271  	unif := strings.Split(udiffs, "\n")
   272  	var got []string
   273  	left := 0
   274  	// parse and apply the unified diffs
   275  	for _, l := range unif {
   276  		if len(l) == 0 {
   277  			continue // probably the last line (from Split)
   278  		}
   279  		switch l[0] {
   280  		case '@': // The @@ line
   281  			m := atregexp.FindStringSubmatch(l)
   282  			fromLine, err := strconv.Atoi(m[1])
   283  			if err != nil {
   284  				return "", fmt.Errorf("missing line number in %q", l)
   285  			}
   286  			// before is a slice, so0-based; fromLine is 1-based
   287  			for ; left < fromLine-1; left++ {
   288  				got = append(got, before[left])
   289  			}
   290  		case '+': // add this line
   291  			if strings.HasPrefix(l, "+++ ") {
   292  				continue
   293  			}
   294  			got = append(got, l[1:])
   295  		case '-': // delete this line
   296  			if strings.HasPrefix(l, "--- ") {
   297  				continue
   298  			}
   299  			left++
   300  		case ' ':
   301  			return "", fmt.Errorf("unexpected line %q", l)
   302  		default:
   303  			return "", fmt.Errorf("impossible unified %q", udiffs)
   304  		}
   305  	}
   306  	// copy any remaining lines
   307  	for ; left < len(before); left++ {
   308  		got = append(got, before[left])
   309  	}
   310  	return strings.Join(got, "\n"), nil
   311  }
   312  
   313  // The first number in the @@ lines is the line number in the 'before' data
   314  var atregexp = regexp.MustCompile(`@@ -(\d+).* @@`)
   315
View as plain text