Source file tour/concurrency/exercise-web-crawler.go

     1  // +build OMIT
     2  
     3  package main
     4  
     5  import (
     6  	"fmt"
     7  )
     8  
     9  type Fetcher interface {
    10  	// Fetch returns the body of URL and
    11  	// a slice of URLs found on that page.
    12  	Fetch(url string) (body string, urls []string, err error)
    13  }
    14  
    15  // Crawl uses fetcher to recursively crawl
    16  // pages starting with url, to a maximum of depth.
    17  func Crawl(url string, depth int, fetcher Fetcher) {
    18  	// TODO: Fetch URLs in parallel.
    19  	// TODO: Don't fetch the same URL twice.
    20  	// This implementation doesn't do either:
    21  	if depth <= 0 {
    22  		return
    23  	}
    24  	body, urls, err := fetcher.Fetch(url)
    25  	if err != nil {
    26  		fmt.Println(err)
    27  		return
    28  	}
    29  	fmt.Printf("found: %s %q\n", url, body)
    30  	for _, u := range urls {
    31  		Crawl(u, depth-1, fetcher)
    32  	}
    33  	return
    34  }
    35  
    36  func main() {
    37  	Crawl("https://golang.org/", 4, fetcher)
    38  }
    39  
    40  // fakeFetcher is Fetcher that returns canned results.
    41  type fakeFetcher map[string]*fakeResult
    42  
    43  type fakeResult struct {
    44  	body string
    45  	urls []string
    46  }
    47  
    48  func (f fakeFetcher) Fetch(url string) (string, []string, error) {
    49  	if res, ok := f[url]; ok {
    50  		return res.body, res.urls, nil
    51  	}
    52  	return "", nil, fmt.Errorf("not found: %s", url)
    53  }
    54  
    55  // fetcher is a populated fakeFetcher.
    56  var fetcher = fakeFetcher{
    57  	"https://golang.org/": &fakeResult{
    58  		"The Go Programming Language",
    59  		[]string{
    60  			"https://golang.org/pkg/",
    61  			"https://golang.org/cmd/",
    62  		},
    63  	},
    64  	"https://golang.org/pkg/": &fakeResult{
    65  		"Packages",
    66  		[]string{
    67  			"https://golang.org/",
    68  			"https://golang.org/cmd/",
    69  			"https://golang.org/pkg/fmt/",
    70  			"https://golang.org/pkg/os/",
    71  		},
    72  	},
    73  	"https://golang.org/pkg/fmt/": &fakeResult{
    74  		"Package fmt",
    75  		[]string{
    76  			"https://golang.org/",
    77  			"https://golang.org/pkg/",
    78  		},
    79  	},
    80  	"https://golang.org/pkg/os/": &fakeResult{
    81  		"Package os",
    82  		[]string{
    83  			"https://golang.org/",
    84  			"https://golang.org/pkg/",
    85  		},
    86  	},
    87  }
    88  

View as plain text