记一次失败的网络空间测绘引擎薅羊毛行动

0x00 前言

规则列表 - 网络空间测绘,网络空间安全搜索引擎,网络空间搜索引擎,安全态势感知 - FOFA网络空间测绘系统 得知 Fofa 有累计 32万左右的指纹规则集:

image-20230615004257272

于是开始想着咋样能把这些规则薅过来,有一个办法是先获取到每个产品对应的 query,然后对命中的网站进行聚类分析,这样能减少许多工作量,于是开始准备第一步,获取所有产品对应的 query。

0x01 失败的尝试

在网站下方可以看到 Fofa 已经对所有产品进行了一个概括性的分类:

image-20230615004448438

点击某个 Tab 就会跳转到查询页面:

image-20230615004537973

点击查看更多可以获得更多的产品以及其对应的 query:

image-20230615004607835

于是开始抓包分析:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
GET /v1/rules/categories?cid=10579&keyword=&page=1&size=10&ts=1686761038776&sign=pZSYgHM8tSCbGltzIn9D3yLTmAD4JJ0HJAg8TtwHtvljJ1VZOJg9FnXY2zfKM5l%2FkhFqjy5lJik8tsSHAU%2FjhB26h38ys7NmVveZ%2FT6MoVvLm25uRlSsdNL1RcEEd%2F8iIbu3N3g%2BXbEOPR%2Fq11lKNldqiavMuf%2BVvnDDcXmtoSfLKidFxKs5VY6cCKueVKtYmVTc0tf%2F85Wa4J4%2FvchIaDFpcfGfJPAizdBKBQr72o8Ax5nRN4Jw0gTl98%2FZpCE5jLZJJVITN%2Bjjc8lFtcY9%2BhiktVS%2Blr9gpJJxvBSWRyTLLS5sE8ich%2FD6S00IXplZ8Yktn1Ky2Dm%2BFvSsKCKzSg%3D%3D&app_id=9e9fb94330d97833acfbc041ee1a76793f1bc691 HTTP/1.1
Host: api.fofa.info
Sec-Ch-Ua: "Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"
Accept: application/json, text/plain, */*
Sec-Ch-Ua-Mobile: ?0
Authorization: <auth>
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36
Sec-Ch-Ua-Platform: "macOS"
Origin: https://fofa.info
Sec-Fetch-Site: same-site
Sec-Fetch-Mode: cors
Sec-Fetch-Dest: empty
Referer: https://fofa.info/
Accept-Encoding: gzip, deflate
Accept-Language: en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7
Connection: close


整个包由两个部分组成,请求的 Query 以及身份认证用的 Authorization,其中 Query 里的 sign 用于签名校验,因此还需要逆一下签名算法才能自己构造请求发包。

前端 js 只经过压缩没有混淆,可以比较轻松的找到加密代码:

image-20230615004822927

sortFun 用于加密,代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
function(e) {
var t = arguments.length > 1 && void 0 !== arguments[1] ? arguments[1] : ""
, n = {}
, o = {};
for (var a in e)
String(e[a]) && (n[a] = e[a]);
Object.keys(n).sort().map((function(e) {
o[e] = n[e]
}
));
var i = "";
Object.keys(o).map((function(e) {
i += "".concat(e).concat(o[e])
}
));
var r = Zo.createSign("RSA-SHA256");
return r.update(i),
r.sign(t, "base64")
}

加密逻辑也比较清晰,先是走 RSA-SHA256,然后通过 base64 加密,加密之前做了个简单的排序,按照这个逻辑可以写一个 Go 的加密代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
func genSign(s []byte) string {
hashed := sha256.Sum256(s)
privateKeyPEM := []byte("-----BEGIN RSA PRIVATE KEY-----\r\nMIIEogIBAAKCAQEAv0xjefuBTF6Ox940ZqLLUFFBDtTcB9dAfDjWgyZ2A55K+VdG\r\nc1L5LqJWuyRkhYGFTlI4K5hRiExvjXuwIEed1norp5cKdeTLJwmvPyFgaEh7Ow19\r\nTu9sTR5hHxThjT8ieArB2kNAdp8Xoo7O8KihmBmtbJ1umRv2XxG+mm2ByPZFlTdW\r\nRFU38oCPkGKlrl/RzOJKRYMv10s1MWBPY6oYkRiOX/EsAUVae6zKRqNR2Q4HzJV8\r\ngOYMPvqkau8hwN8i6r0z0jkDGCRJSW9djWk3Byi3R2oSdZ0IoS+91MFtKvWYdnNH\r\n2Ubhlnu1P+wbeuIFdp2u7ZQOtgPX0mtQ263e5QIDAQABAoIBAD67GwfeTMkxXNr3\r\n5/EcQ1XEP3RQoxLDKHdT4CxDyYFoQCfB0e1xcRs0ywI1be1FyuQjHB5Xpazve8lG\r\nnTwIoB68E2KyqhB9BY14pIosNMQduKNlygi/hKFJbAnYPBqocHIy/NzJHvOHOiXp\r\ndL0AX3VUPkWW3rTAsar9U6aqcFvorMJQ2NPjijcXA0p1MlZAZKODO2wqidfQ487h\r\nxy0ZkriYVi419j83a1cCK0QocXiUUeQM6zRNgQv7LCmrFo2X4JEzlujEveqvsDC4\r\nMBRgkK2lNH+AFuRwOEr4PIlk9rrpHA4O1V13P3hJpH5gxs5oLLM1CWWG9YWLL44G\r\nzD9Tm8ECgYEA8NStMXyAmHLYmd2h0u5jpNGbegf96z9s/RnCVbNHmIqh/pbXizcv\r\nmMeLR7a0BLs9eiCpjNf9hob/JCJTms6SmqJ5NyRMJtZghF6YJuCSO1MTxkI/6RUw\r\nmrygQTiF8RyVUlEoNJyhZCVWqCYjctAisEDaBRnUTpNn0mLvEXgf1pUCgYEAy1kE\r\nd0YqGh/z4c/D09crQMrR/lvTOD+LRMf9lH+SkScT0GzdNIT5yuscRwKsnE6SpC5G\r\nySJFVhCnCBsQqq+ohsrXt8a99G7ePTMSAGK3QtC7QS3liDmvPBk6mJiLrKiRAZos\r\nvgPg7nTP8VuF0ZIKzkdWbGoMyNxVFZXovQ8BYxECgYBvCR9xGX4Qy6KiDlV18wNu\r\nElYkxVqFBBE0AJRg/u+bnQ9jWhi2zxLa1eWZgtss80c876I8lbkGNWedOVZioatm\r\nMFLC4bFalqyZWyO7iP7i60LKvfDJfkOSlDUu3OikahFOiqyG1VBz4+M4U500alIU\r\nAVKD14zTTZMopQSkgUXsoQKBgHd8RgiD3Qde0SJVv97BZzP6OWw5rqI1jHMNBK72\r\nSzwpdxYYcd6DaHfYsNP0+VIbRUVdv9A95/oLbOpxZNi2wNL7a8gb6tAvOT1Cvggl\r\n+UM0fWNuQZpLMvGgbXLu59u7bQFBA5tfkhLr5qgOvFIJe3n8JwcrRXndJc26OXil\r\n0Y3RAoGAJOqYN2CD4vOs6CHdnQvyn7ICc41ila/H49fjsiJ70RUD1aD8nYuosOnj\r\nwbG6+eWekyLZ1RVEw3eRF+aMOEFNaK6xKjXGMhuWj3A9xVw9Fauv8a2KBU42Vmcd\r\nt4HRyaBPCQQsIoErdChZj8g7DdxWheuiKoN4gbfK4W1APCcuhUA=\r\n-----END RSA PRIVATE KEY-----")
block, _ := pem.Decode(privateKeyPEM)
privateKey, _ := x509.ParsePKCS1PrivateKey(block.Bytes)

signature, _ := rsa.SignPKCS1v15(rand.Reader, privateKey, crypto.SHA256, hashed[:])
return base64.StdEncoding.EncodeToString(signature)
}

func genTimestampStr() string {
now := time.Now()
unixMilli := now.UnixNano() / int64(time.Millisecond)
return fmt.Sprintf("%d", unixMilli)
}

func generateData(params map[string]string) string {
// 对参数按照字典序排序
keys := make([]string, 0, len(params))
for k := range params {
keys = append(keys, k)
}
sort.Strings(keys)

// 拼接所有参数的字符串形式
var buf bytes.Buffer
for _, k := range keys {
v := fmt.Sprintf("%v", params[k])
if v != "" {
buf.WriteString(k)
buf.WriteString(v)
}
}
return buf.String()
}

func main(){
arguments := map[string]string{"cid": "10579", "keyword": "", "page": "1", "size": "10", "ts": genTimestampStr()}
fmt.Println(genSign([]byte(generateData(arguments))))
}

解决了 sign 的问题之后就可以尝试爬虫了,写了个比较简单的体验逻辑,没有做递归:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package main

import (
"bytes"
"crypto"
"crypto/rand"
"crypto/rsa"
"crypto/sha256"
"crypto/x509"
"encoding/base64"
"encoding/json"
"encoding/pem"
"fmt"
"io/ioutil"
"net/http"
"os"
"sort"
"time"
)

func genSign(s []byte) string {
hashed := sha256.Sum256(s)
privateKeyPEM := []byte("-----BEGIN RSA PRIVATE KEY-----\r\nMIIEogIBAAKCAQEAv0xjefuBTF6Ox940ZqLLUFFBDtTcB9dAfDjWgyZ2A55K+VdG\r\nc1L5LqJWuyRkhYGFTlI4K5hRiExvjXuwIEed1norp5cKdeTLJwmvPyFgaEh7Ow19\r\nTu9sTR5hHxThjT8ieArB2kNAdp8Xoo7O8KihmBmtbJ1umRv2XxG+mm2ByPZFlTdW\r\nRFU38oCPkGKlrl/RzOJKRYMv10s1MWBPY6oYkRiOX/EsAUVae6zKRqNR2Q4HzJV8\r\ngOYMPvqkau8hwN8i6r0z0jkDGCRJSW9djWk3Byi3R2oSdZ0IoS+91MFtKvWYdnNH\r\n2Ubhlnu1P+wbeuIFdp2u7ZQOtgPX0mtQ263e5QIDAQABAoIBAD67GwfeTMkxXNr3\r\n5/EcQ1XEP3RQoxLDKHdT4CxDyYFoQCfB0e1xcRs0ywI1be1FyuQjHB5Xpazve8lG\r\nnTwIoB68E2KyqhB9BY14pIosNMQduKNlygi/hKFJbAnYPBqocHIy/NzJHvOHOiXp\r\ndL0AX3VUPkWW3rTAsar9U6aqcFvorMJQ2NPjijcXA0p1MlZAZKODO2wqidfQ487h\r\nxy0ZkriYVi419j83a1cCK0QocXiUUeQM6zRNgQv7LCmrFo2X4JEzlujEveqvsDC4\r\nMBRgkK2lNH+AFuRwOEr4PIlk9rrpHA4O1V13P3hJpH5gxs5oLLM1CWWG9YWLL44G\r\nzD9Tm8ECgYEA8NStMXyAmHLYmd2h0u5jpNGbegf96z9s/RnCVbNHmIqh/pbXizcv\r\nmMeLR7a0BLs9eiCpjNf9hob/JCJTms6SmqJ5NyRMJtZghF6YJuCSO1MTxkI/6RUw\r\nmrygQTiF8RyVUlEoNJyhZCVWqCYjctAisEDaBRnUTpNn0mLvEXgf1pUCgYEAy1kE\r\nd0YqGh/z4c/D09crQMrR/lvTOD+LRMf9lH+SkScT0GzdNIT5yuscRwKsnE6SpC5G\r\nySJFVhCnCBsQqq+ohsrXt8a99G7ePTMSAGK3QtC7QS3liDmvPBk6mJiLrKiRAZos\r\nvgPg7nTP8VuF0ZIKzkdWbGoMyNxVFZXovQ8BYxECgYBvCR9xGX4Qy6KiDlV18wNu\r\nElYkxVqFBBE0AJRg/u+bnQ9jWhi2zxLa1eWZgtss80c876I8lbkGNWedOVZioatm\r\nMFLC4bFalqyZWyO7iP7i60LKvfDJfkOSlDUu3OikahFOiqyG1VBz4+M4U500alIU\r\nAVKD14zTTZMopQSkgUXsoQKBgHd8RgiD3Qde0SJVv97BZzP6OWw5rqI1jHMNBK72\r\nSzwpdxYYcd6DaHfYsNP0+VIbRUVdv9A95/oLbOpxZNi2wNL7a8gb6tAvOT1Cvggl\r\n+UM0fWNuQZpLMvGgbXLu59u7bQFBA5tfkhLr5qgOvFIJe3n8JwcrRXndJc26OXil\r\n0Y3RAoGAJOqYN2CD4vOs6CHdnQvyn7ICc41ila/H49fjsiJ70RUD1aD8nYuosOnj\r\nwbG6+eWekyLZ1RVEw3eRF+aMOEFNaK6xKjXGMhuWj3A9xVw9Fauv8a2KBU42Vmcd\r\nt4HRyaBPCQQsIoErdChZj8g7DdxWheuiKoN4gbfK4W1APCcuhUA=\r\n-----END RSA PRIVATE KEY-----")
block, _ := pem.Decode(privateKeyPEM)
privateKey, _ := x509.ParsePKCS1PrivateKey(block.Bytes)

signature, _ := rsa.SignPKCS1v15(rand.Reader, privateKey, crypto.SHA256, hashed[:])
return base64.StdEncoding.EncodeToString(signature)
}

func genTimestampStr() string {
now := time.Now()
unixMilli := now.UnixNano() / int64(time.Millisecond)
return fmt.Sprintf("%d", unixMilli)
}

func generateData(params map[string]string) string {
// 对参数按照字典序排序
keys := make([]string, 0, len(params))
for k := range params {
keys = append(keys, k)
}
sort.Strings(keys)

// 拼接所有参数的字符串形式
var buf bytes.Buffer
for _, k := range keys {
v := fmt.Sprintf("%v", params[k])
if v != "" {
buf.WriteString(k)
buf.WriteString(v)
}
}
return buf.String()
}

type Response struct {
Code int `json:"code"`
Message string `json:"message"`
Data struct {
Categories []struct {
ID int `json:"id"`
Name string `json:"name"`
Total int `json:"total"`
Rules []struct {
Name string `json:"name"`
Code string `json:"code"`
} `json:"rules"`
} `json:"categories"`
Total int `json:"total"`
Page struct {
Num int `json:"num"`
Size int `json:"size"`
Total int `json:"total"`
} `json:"page"`
} `json:"data"`
}

func SaveCategoriesData(response Response) {
for _, category := range response.Data.Categories {
for _, rule := range category.Rules {
// 追加写入到 rules.txt 文件中
f, _ := os.OpenFile("rules.txt", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
defer f.Close()
f.WriteString(rule.Name + ":::" + rule.Code + "\n")
}
}
}

func main() {
auth := "<auth>"

arguments := map[string]string{"cid": "10579", "keyword": "", "page": "1", "size": "10", "ts": genTimestampStr()}
firstReq, _ := http.NewRequest("GET", "https://api.fofa.info/v1/rules/categories?cid="+arguments["cid"]+"&keyword=&page=1&size=10&ts="+arguments["ts"]+"&sign="+genSign([]byte(generateData(arguments)))+"&app_id=9e9fb94330d97833acfbc041ee1a76793f1bc691", nil)
firstReq.Header.Add("Authorization", auth)
httpResp, err := http.DefaultClient.Do(firstReq)
if err != nil {
panic(err)
}

var resp Response
err = json.NewDecoder(httpResp.Body).Decode(&resp)
if err != nil {
panic(err)
}

totalPage := resp.Data.Page.Total/resp.Data.Page.Size + 1
for i := 101; i <= totalPage; i++ {
fmt.Println("正在爬取第" + fmt.Sprintf("%d", i) + "页数据,总页数:" + fmt.Sprintf("%d", totalPage))
arguments["page"] = fmt.Sprintf("%d", i)
arguments["ts"] = genTimestampStr()
req, _ := http.NewRequest("GET", "https://api.fofa.info/v1/rules/categories?cid="+arguments["cid"]+"&keyword=&page="+arguments["page"]+"&size=10&ts="+arguments["ts"]+"&sign="+genSign([]byte(generateData(arguments)))+"&app_id=9e9fb94330d97833acfbc041ee1a76793f1bc691", nil)
req.Header.Add("Authorization", auth)
httpResp, err := http.DefaultClient.Do(req)
if err != nil {
panic(err)
}
content, _ := ioutil.ReadAll(httpResp.Body)
fmt.Println(string(content))

err = json.NewDecoder(httpResp.Body).Decode(&resp)
if err != nil {
panic(err)
}
SaveCategoriesData(resp)
}
}

爬虫也遇到了一些问题,比如会说频率太快,所以最后在每次请求前都加了:time.Sleep(1 * time.Second)

然后出现了第二个问题:

image-20230615005300983

在今年早些时候,FoFa 给每个账号都添加了每日请求限制,这个问题无解,只能通过添加更多的账号来爬更多的数据(单日);过了十二点,我继续尝试,发现了另外的问题:

image-20230615005428148

当请求的 page 超过 100 时就会报权限不足的错误,这个问题更是无解,直接给这次的 “薅羊毛行动” 判了死刑,至此这次 “薅羊毛行动” 也落下了失败的帷幕。