diff options
| author | Shulhan <ms@kilabit.info> | 2025-01-12 01:49:28 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2025-01-12 01:49:28 +0700 |
| commit | 7dc39f66a28bf8493087dd0a102c02f5a064913e (patch) | |
| tree | 26dcfa11af6c03f60f89b93adb9230b18ea4574d | |
| parent | 95c8ba5699efa80fc19f7e210bcf4d1e189244e2 (diff) | |
| download | kbbi-7dc39f66a28bf8493087dd0a102c02f5a064913e.tar.xz | |
all: use login with two attempts
The first attempt detect if we have been blocked by Cloudflare
and store the response cookies.
The second attempt request again with cookies.
Hopefully, this solve the request login being blocked.
While at it, add debug code to dump the HTTP request headers
on preLogin and Lookup.
| -rw-r--r-- | client.go | 102 |
1 files changed, 64 insertions, 38 deletions
@@ -12,6 +12,7 @@ import ( "log" "net/http" "net/http/cookiejar" + "net/http/httputil" "net/url" "os" "path/filepath" @@ -64,6 +65,9 @@ const ( maxPageNumber = 501 ) +var errBlocked = errors.New(`you have been blocked`) +var errTokenNotFound = errors.New(`token login not found`) + // Client for official KBBI web using HTTP. type Client struct { httpc *libhttp.Client @@ -81,15 +85,6 @@ func NewClient(opts ClientOptions) (cl *Client, err error) { return nil, fmt.Errorf("New: %w", err) } - jarOpt := &cookiejar.Options{ - PublicSuffixList: publicsuffix.List, - } - - jar, err := cookiejar.New(jarOpt) - if err != nil { - return nil, fmt.Errorf("New: %w", err) - } - var clientOpts = libhttp.ClientOptions{ ServerURL: kbbiUrlBase, Timeout: defTimeout, @@ -101,10 +96,16 @@ func NewClient(opts ClientOptions) (cl *Client, err error) { opts: opts, } - cl.httpc.Jar = jar - var transport *http.Transport = cl.httpc.Transport() + var jarOpt = &cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + } + cl.httpc.Jar, err = cookiejar.New(jarOpt) + if err != nil { + return nil, fmt.Errorf("New: %w", err) + } // This fix connection being blocked by CloudFlare. + var transport *http.Transport = cl.httpc.Transport() transport.TLSClientConfig.MinVersion = tls.VersionTLS13 err = cl.loadCookies() @@ -112,15 +113,15 @@ func NewClient(opts ClientOptions) (cl *Client, err error) { return nil, fmt.Errorf("New: %w", err) } - if cl.cookies != nil { - jar.SetCookies(cookieURL, cl.cookies) - } + cl.httpc.Jar.SetCookies(cookieURL, cl.cookies) return cl, nil } // Lookup lookup definition of one or more words. func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) { + var logp = `Lookup` + res = make(LookupResponse, len(ins)) for _, in := range ins { @@ -142,6 +143,14 @@ func (cl *Client) Lookup(ins []string) (res LookupResponse, err error) { continue } + if cl.opts.Debug == 1 { + var rawRequest []byte + rawRequest, err = httputil.DumpRequest(resp.HTTPResponse.Request, false) + if err == nil { + log.Printf(`%s: HTTP raw request: %s`, logp, rawRequest) + } + } + kata, err = cl.parseHTMLEntri(in, resp.Body) if err != nil { kata.err = err @@ -165,7 +174,7 @@ func (cl *Client) parseHTMLEntri(in string, htmlBody []byte) (word *Word, err er if cl.opts.Debug == 1 { var htmlText = html.Sanitize(htmlBody) - log.Printf("%s:\n%s", logp, htmlText) + log.Printf(`%s: HTML response text: %s`, logp, htmlText) } iter, err := html.Parse(bytes.NewReader(htmlBody)) @@ -305,22 +314,17 @@ func (cl *Client) Login(email, pass string) (err error) { } if resp.HTTPResponse.StatusCode >= http.StatusBadRequest { + var htmlText = html.Sanitize(resp.Body) return fmt.Errorf(`%s: %d %s`, logp, - resp.HTTPResponse.StatusCode, resp.Body) + resp.HTTPResponse.StatusCode, htmlText) } cl.cookies = cl.httpc.Jar.Cookies(cl.cookieURL) - cl.setCookies() cl.saveCookies() return nil } -// setCookies for HTTP request that need an authentication. -func (cl *Client) setCookies() { - cl.httpc.Jar.SetCookies(cl.cookieURL, cl.cookies) -} - func (cl *Client) parseHTMLRootWords(htmlBody []byte) ( rootWords Words, err error, ) { @@ -355,14 +359,15 @@ func (cl *Client) parseHTMLLogin(htmlBody []byte) ( ) { var logp = `parseHTMLLogin` - if cl.opts.Debug == 1 { - var htmlText = html.Sanitize(htmlBody) - log.Printf("%s:\n%s", logp, htmlText) + var htmlText = html.Sanitize(htmlBody) + if bytes.Contains(htmlText, []byte(errBlocked.Error())) { + // We have been blocked by Cloudflare. + return ``, fmt.Errorf(`%s: %w`, logp, errBlocked) } iter, err := html.Parse(bytes.NewReader(htmlBody)) if err != nil { - return "", err + return ``, fmt.Errorf(`%s: %w`, logp, err) } for node := iter.Next(); node != nil; node = iter.Next() { @@ -384,24 +389,45 @@ func (cl *Client) parseHTMLLogin(htmlBody []byte) ( } } - return ``, errors.New(`token login not found`) + return ``, fmt.Errorf(`%s: %w`, logp, errTokenNotFound) } // preLogin initialize the client to get the first cookie. func (cl *Client) preLogin() (token string, err error) { - var req = libhttp.ClientRequest{ - Method: http.MethodGet, - Path: kbbiUrlLogin, - } - var resp *libhttp.ClientResponse - resp, err = cl.httpc.Get(req) - if err != nil { - return "", err - } + var logp = `preLogin` + var attempt = 0 + + for attempt <= 1 { + var req = libhttp.ClientRequest{ + Method: http.MethodGet, + Path: kbbiUrlLogin, + } - token, err = cl.parseHTMLLogin(resp.Body) + var resp *libhttp.ClientResponse + resp, err = cl.httpc.Get(req) + if err != nil { + return ``, fmt.Errorf(`%s: %w`, logp, err) + } + + if cl.opts.Debug == 1 { + var rawRequest []byte + rawRequest, err = httputil.DumpRequest(resp.HTTPResponse.Request, false) + if err == nil { + log.Printf(`%s: HTTP request: %s`, logp, rawRequest) + } + } + + token, err = cl.parseHTMLLogin(resp.Body) + if err != nil { + if errors.Is(err, errBlocked) { + attempt++ + continue + } + return ``, fmt.Errorf(`%s: %w`, logp, err) + } + } if err != nil { - return "", err + return ``, fmt.Errorf(`%s: %w`, logp, err) } return token, nil |
