endless/components/modules/scraper.go
kayos@tcp.direct c24756973e
Fix: go module issue
fixes:
---
go: endless/internal/moonproxy imports
        git.tcp.direct/moony/endless/internal/moonproxy: git.tcp.direct/moony/endless@v0.0.0-20230317092824-035eb62124ec: parsing go.mod:
        module declares its path as: endless
                but was required as: git.tcp.direct/moony/endless
2023-03-18 02:37:33 -07:00

74 lines
1.2 KiB
Go

package modules
import (
"fmt"
"io/ioutil"
"net/http"
"strings"
"time"
"git.tcp.direct/moony/endless/internal/utils"
"github.com/zenthangplus/goccm"
)
func ScrapeUrl(Url string, ProxyType string) {
utils.Log(fmt.Sprintf("[+] Scraping [%s] proxies from [%s]", ProxyType, Url))
resp, err := http.Get(Url)
if utils.HandleError(err) {
return
}
defer resp.Body.Close()
if resp.StatusCode == 403 || resp.StatusCode == 404 {
return
}
content, err := ioutil.ReadAll(resp.Body)
if utils.HandleError(err) {
return
}
lines := strings.Split(string(content), "\n")
for _, proxy := range lines {
if proxy == "" {
continue
}
utils.AppendFile("proxies.txt", fmt.Sprintf("%s://%s", ProxyType, proxy))
}
}
func Scrape() {
url_list, err := utils.ReadLines("url,csv")
if utils.HandleError(err) {
return
}
StartTime := time.Now()
c := goccm.New(utils.Config.Options.ScrapeThreads)
for _, url := range url_list {
c.Wait()
s := strings.Split(url, ",")
go func(u string, t string) {
ScrapeUrl(u, t)
c.Done()
}(s[1], s[0])
}
c.WaitAllDone()
utils.Log(
fmt.Sprintf(
"[*] Scraped [%d] urils in [%fs]",
len(url_list),
time.Since(StartTime).Seconds(),
),
)
}