Project not available for free download! If you are interested in purchasing the software part of the project with the functionality described on the given page, contact: martinius96@gmail.com
ESP8266 WiFi Web scrapping HTTPClient WiFiClient

Web scraper built on wifi chip ESP8266

  • The system developed under ESP8266 allows you to read data from the Web and retrieve the required information from the site.
  • In practice, you can get phone numbers, email addresses, contacts, business addresses, and similar interesting information about that URL
  • ESP8266 crawls the site and collects data and archives them into a prepared MySQL database where the data can be exported to, for example, .csv or .xml format and used for other tasks.
  • Data is converted to the correct shape, for example: Phone number with spaces before triplets.
  • The board with ESP8266 also supports HTTPS connectivity and can also be connected to encrypted websites where it can safely collect data.
  • The originality of the data can be solved on the ESP8266 page or the webserver where the collected data is sent and postprocessing done here.
  • The board primarily passes the entire HTML source code of the site or the site library in .js.
  • In addition to retrieving data from the Web, other non-priority data such as: image addresses, their resolutions, .json data, or even the .rss channel can be downloaded.
  • Acquired data can be incorporated into your web application or mobile app in real-time.
  • Thanks to HTTP header authentication, it is also possible to download data beyond the "login" of the site.
  • #include <ESP8266WiFi.h>
    #include <WiFiClientSecure.h> //only for 2.3.0. core
    #include <ESP8266HTTPClient.h>
    const char * ssid = "my_ssid";
    const char * password = "my_pass";
    const char * host = "www.cielovyweb.sk";
    const int httpsPort = 443;
    const char* fingerprint = "‎‎a6 02 4d e1 32 b0 0b fe 56 85 0f 84 03 ec b2 18 23 09 f0 63;
    String inData;
    void setup() {
      Serial.begin(500000);
      Serial.println();
      IPAddress ip(192, 168, 2, 50);
      IPAddress gateway(192, 168, 2, 20);
      Serial.print(F("Setting static ip to : "));
      Serial.println(ip);
      IPAddress subnet(255, 255, 255, 0); // set subnet mask to match your
      WiFi.config(ip, gateway, subnet);
      Serial.println("pripajam na ");
      Serial.println(ssid);
      WiFi.begin(ssid, password); //pripoj sa na wifi siet s heslom
      while (WiFi.status() != WL_CONNECTED) { //pokial sa nepripojime na wifi opakuj pripajanie a spustaj funkcie pre ovladanie v offline rezime
        delay(500);
        Serial.println(".");
      }
      Serial.println("");
      Serial.println("WiFi pripojene"); //uspesne pripojenie na wifi siet
      Serial.println("IP adresa: ");
      Serial.println(WiFi.localIP()); // pridelena IP adresa pre dosku
      index();
    }
    void index() {
      WiFiClientSecure client; //HTTPS client
      if (client.verify(fingerprint, host)) {} else {}
      if (client.connect(host, httpsPort)) {
        Serial.println("Connected to server!");
        String url = "/";
        client.print(String("GET ") + url + " HTTP/1.1\r\n" + "Host: " + host + "\r\n" + "User-Agent: NodeMCU\r\n" + "Connection: close\r\n\r\n");
    
    
        // if there are incoming bytes available
        // from the server, read them and print them:
        while (client.available()) {
          char thisChar = client.read();
          inData += thisChar;
    
    
          // echo to the server what's been received to confirm we have the string
          if (thisChar == '\n')
          {
            Serial.print("\nreceived:");
            Serial.print(inData);
            HTTPClient http;
            http.begin("http://www.arduino.php5.sk/connect.php");
            http.addHeader("Content-Type", "application/x-www-form-urlencoded");
            http.POST("c=" + inData);
            http.writeToStream(&Serial);
            http.end();
            inData = "";
          }
        }
        client.stop();
        Serial.println("Vsetko");
      } else {
        Serial.println("Nepripojene");
      }
    }
    void loop() {
    
    }