1 Star 0 Fork 0

Joran/wtquery

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
package.go 6.74 KB
一键复制 编辑 原始数据 按行查看 历史
Joran 提交于 2023-09-01 12:47 . fix: faster query
package main
import (
"errors"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/tealeg/xlsx"
)
type TableRow struct {
Columns []string
}
type CellSummary struct {
headers []string
totalRecords int
currentPage int
totalPage int
}
func GenerateXls(headers []string, rows []TableRow, xlsName string) (err error) {
file := xlsx.NewFile()
sheet, err_ := file.AddSheet("sheet1")
if err_ != nil {
return err_
}
headerRow := sheet.AddRow()
headerRow.SetHeight(20)
for _, header := range headers {
cell := headerRow.AddCell()
cell.Value = header
}
for _, row := range rows {
dataRow := sheet.AddRow()
for _, col := range row.Columns {
cell := dataRow.AddCell()
cell.Value = col
}
}
err = file.Save(xlsName + ".xlsx")
currentTime := time.Now()
timeString := currentTime.Format("2006-01-02 15:04:05")
fmt.Println(timeString + " " + xlsName + ".xlsx ok")
if err != nil {
log.Fatal(err)
return err
}
return
}
func GetCitysAndCateInfo() (cityCombs [][]string, cateInfo map[string]string, err error) {
url := "http://index.0256.cn/pricex.htm"
time.Sleep(3 * time.Second)
resp, err := http.Get(url)
if err != nil {
return
}
defer resp.Body.Close()
doc, err_ := goquery.NewDocumentFromReader(resp.Body)
if err_ != nil {
err = err_
return
}
cities := make([]string, 0)
doc.Find("div.tcdiv_ul ul li").Each(func(i int, s *goquery.Selection) {
city := s.Text()
cities = append(cities, city)
})
for i := 0; i < len(cities); i++ {
for j := i + 1; j < len(cities); j++ {
cityCombs = append(cityCombs, []string{cities[i], cities[j]})
cityCombs = append(cityCombs, []string{cities[j], cities[i]})
}
}
cateInfo = make(map[string]string, 0)
doc.Find("#selCateId option").Each(func(i int, s *goquery.Selection) {
id := s.AttrOr("value", "")
value := s.Text()
cateInfo[id] = value
})
return
}
func PostPrice(citys []string, cateId string, pageNumber int) (resp *http.Response, err error) {
formData := url.Values{}
formData.Set("orgId", "")
formData.Set("type", "-1")
formData.Set("marketId", "1")
formData.Set("cateId", cateId)
formData.Set("startLine", citys[0])
formData.Set("endLine", citys[1])
formData.Set("startTime", "")
formData.Set("endTime", "")
formData.Set("limit", "1000")
formData.Set("pageNumber", strconv.Itoa(pageNumber))
req, err_ := http.NewRequest("POST", "http://index.0256.cn/request_report_for_pub_price.action", strings.NewReader(formData.Encode()))
if err_ != nil {
err = err_
return
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
req.Header.Set("X-Requested-With", "XMLHttpRequest")
req.Header.Set("Accept", "text/html, */*; q=0.01")
req.Header.Set("Origin", "http://index.0256.cn")
req.Header.Set("Referer", "http://index.0256.cn/pricex.htm")
req.Header.Set("Connection", "keep-alive")
client := &http.Client{}
time.Sleep(3 * time.Second)
resp, err = client.Do(req)
return
}
func FetchRows(doc *goquery.Document) (rows []TableRow, err error) {
doc.Find("table tbody").Each(func(i int, table *goquery.Selection) {
table.Find("tr").Each(func(j int, row *goquery.Selection) {
columns := []string{}
row.Find("td, th").Each(func(k int, cell *goquery.Selection) {
text := strings.TrimSpace(cell.Text())
columns = append(columns, text)
// fmt.Printf("Row %d, Cell %d: %s\n", j+1, k+1, text)
})
srow := TableRow{Columns: columns}
rows = append(rows, srow)
})
})
return
}
func FetchSummary(doc *goquery.Document) (summary *CellSummary, err error) {
doc.Find("div.w_page").Each(func(i int, s *goquery.Selection) {
html := s.Text()
totalRecords := getTextBetween(html, "共", "条记录")
pageInfo := getPageInfo(html)
currentPage := pageInfo[0]
totalPages := pageInfo[1]
if summary == nil {
summary = new(CellSummary)
}
summary.totalRecords, _ = strconv.Atoi(totalRecords)
summary.currentPage, _ = strconv.Atoi(currentPage)
summary.totalPage, _ = strconv.Atoi(totalPages)
})
summary.headers = make([]string, 0)
doc.Find("table thead tr td").Each(func(i int, s *goquery.Selection) {
summary.headers = append(summary.headers, s.Text())
})
return
}
func QueryPrice(citys []string, cateId string) (rows []TableRow, summary *CellSummary, err error) {
resp, err := PostPrice(citys, cateId, 1)
if err != nil {
return
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
body, _ := ioutil.ReadAll(resp.Body)
fmt.Printf("error status code :%d %s", resp.StatusCode, string(body))
err = errors.New("error status code")
return
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return
}
summary, err = FetchSummary(doc)
rows_, _ := FetchRows(doc)
rows = append(rows, rows_...)
if summary.totalPage > 1 {
rows_r, err_r := GetRemainRecords(citys, cateId, summary)
if err_r != nil {
err = err_r
return
}
rows = append(rows, rows_r...)
}
return
}
func GetRemainRecords(citys []string, cateId string, summary *CellSummary) (rows []TableRow, err error) {
for i := 2; i <= summary.totalPage; i++ {
resp_, err_ := PostPrice(citys, cateId, i)
if err_ != nil {
err = err_
return
}
defer resp_.Body.Close()
if resp_.StatusCode != 200 {
body_, _ := ioutil.ReadAll(resp_.Body)
fmt.Printf("error status code :%d %s", resp_.StatusCode, string(body_))
err = errors.New("error status code")
return
}
doc_, err_ := goquery.NewDocumentFromReader(resp_.Body)
if err_ != nil {
err = err_
return
}
rows_tmp, _ := FetchRows(doc_)
rows = append(rows, rows_tmp...)
}
return
}
func getTextBetween(str, start, end string) string {
startIndex := strings.Index(str, start)
if startIndex == -1 {
return ""
}
startIndex += len(start)
endIndex := strings.Index(str, end)
if endIndex == -1 || endIndex <= startIndex {
return ""
}
return str[startIndex:endIndex]
}
func getPageInfo(str string) []string {
re := regexp.MustCompile(`(\d+)/(\d+)页`)
matches := re.FindStringSubmatch(str)
if len(matches) < 3 {
return []string{"", ""}
}
return matches[1:]
}
func main() {
cityCombs, cateInfos, err := GetCitysAndCateInfo()
if err != nil {
fmt.Println(err)
return
}
for _, cityComb := range cityCombs {
for cateId, cateInfo := range cateInfos {
rows, summary, err := QueryPrice(cityComb, cateId)
if err != nil {
fmt.Println(err)
return
}
if summary.totalRecords > 0 {
name := fmt.Sprintf("%s-%s-%s-%d条", cityComb[0], cityComb[1], cateInfo, summary.totalRecords)
currentTime := time.Now()
timeString := currentTime.Format("2006-01-02 15:04:05")
fmt.Println(timeString + " " + name + "xlsx start")
if err = GenerateXls(summary.headers, rows, name); err != nil {
fmt.Println(err)
return
}
}
}
}
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/Joran/wtquery.git
git@gitee.com:Joran/wtquery.git
Joran
wtquery
wtquery
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385