diff --git a/webCrawlerGoogle/Form1.Designer.cs b/webCrawlerGoogle/Form1.Designer.cs index f1ec423..e119349 100644 --- a/webCrawlerGoogle/Form1.Designer.cs +++ b/webCrawlerGoogle/Form1.Designer.cs @@ -28,64 +28,65 @@ /// private void InitializeComponent() { - this.txtCategory = new System.Windows.Forms.TextBox(); - this.txtCity = new System.Windows.Forms.TextBox(); - this.btnSearch = new System.Windows.Forms.Button(); - this.lstResults = new System.Windows.Forms.ListBox(); - this.SuspendLayout(); - + txtCategory = new TextBox(); + txtCity = new TextBox(); + btnSearch = new Button(); + lstResults = new ListBox(); + SuspendLayout(); // // txtCategory // - this.txtCategory.Location = new System.Drawing.Point(12, 12); - this.txtCategory.Name = "txtCategory"; - this.txtCategory.Size = new System.Drawing.Size(260, 20); - this.txtCategory.TabIndex = 0; - this.txtCategory.PlaceholderText = "Rubrik (z.B. Restaurant)"; - + txtCategory.Location = new Point(14, 14); + txtCategory.Margin = new Padding(4, 3, 4, 3); + txtCategory.Name = "txtCategory"; + txtCategory.PlaceholderText = "Rubrik (z.B. Restaurant)"; + txtCategory.Size = new Size(303, 23); + txtCategory.TabIndex = 0; // // txtCity // - this.txtCity.Location = new System.Drawing.Point(12, 38); - this.txtCity.Name = "txtCity"; - this.txtCity.Size = new System.Drawing.Size(260, 20); - this.txtCity.TabIndex = 1; - this.txtCity.PlaceholderText = "Stadt (z.B. Berlin)"; - + txtCity.Location = new Point(14, 44); + txtCity.Margin = new Padding(4, 3, 4, 3); + txtCity.Name = "txtCity"; + txtCity.PlaceholderText = "Stadt (z.B. Berlin)"; + txtCity.Size = new Size(303, 23); + txtCity.TabIndex = 1; // // btnSearch // - this.btnSearch.Location = new System.Drawing.Point(12, 64); - this.btnSearch.Name = "btnSearch"; - this.btnSearch.Size = new System.Drawing.Size(260, 23); - this.btnSearch.TabIndex = 2; - this.btnSearch.Text = "Suchen"; - this.btnSearch.UseVisualStyleBackColor = true; - this.btnSearch.Click += new System.EventHandler(this.btnSearch_Click); - + btnSearch.Location = new Point(14, 74); + btnSearch.Margin = new Padding(4, 3, 4, 3); + btnSearch.Name = "btnSearch"; + btnSearch.Size = new Size(303, 27); + btnSearch.TabIndex = 2; + btnSearch.Text = "Suchen"; + btnSearch.UseVisualStyleBackColor = true; + btnSearch.Click += btnSearch_Click; // // lstResults // - this.lstResults.FormattingEnabled = true; - this.lstResults.Location = new System.Drawing.Point(12, 93); - this.lstResults.Name = "lstResults"; - this.lstResults.Size = new System.Drawing.Size(260, 147); - this.lstResults.TabIndex = 3; - + lstResults.FormattingEnabled = true; + lstResults.ItemHeight = 15; + lstResults.Location = new Point(14, 107); + lstResults.Margin = new Padding(4, 3, 4, 3); + lstResults.Name = "lstResults"; + lstResults.Size = new Size(303, 169); + lstResults.TabIndex = 3; // // Form1 // - this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); - this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; - this.ClientSize = new System.Drawing.Size(284, 261); - this.Controls.Add(this.lstResults); - this.Controls.Add(this.btnSearch); - this.Controls.Add(this.txtCity); - this.Controls.Add(this.txtCategory); - this.Name = "Form1"; - this.Text = "Google Business Suche"; - this.ResumeLayout(false); - this.PerformLayout(); + AutoScaleDimensions = new SizeF(7F, 15F); + AutoScaleMode = AutoScaleMode.Font; + ClientSize = new Size(331, 301); + Controls.Add(lstResults); + Controls.Add(btnSearch); + Controls.Add(txtCity); + Controls.Add(txtCategory); + Margin = new Padding(4, 3, 4, 3); + Name = "Form1"; + Text = "Google Business Suche"; + ResumeLayout(false); + PerformLayout(); } #endregion diff --git a/webCrawlerGoogle/Form1.cs b/webCrawlerGoogle/Form1.cs index bd94e70..fb4271a 100644 --- a/webCrawlerGoogle/Form1.cs +++ b/webCrawlerGoogle/Form1.cs @@ -1,13 +1,19 @@ using MySql.Data.MySqlClient; -using Newtonsoft.Json.Linq; -using static System.Windows.Forms.VisualStyles.VisualStyleElement.ListView; +using HtmlAgilityPack; +using System; +using System.Net.Http; +using System.Threading.Tasks; +using System.Diagnostics; namespace webCrawlerGoogle { public partial class Form1 : Form { - private const string ApiKey = "AIzaSyCjdysuuyc2bs4ikqT8xyIpPJHiDZ4CEo4"; // Füge hier deinen API-Schlüssel ein - private string connectionString = "Server=192.168.178.201;Database=domainchecker;User ID=root;Password=1td5rugut8;"; + private static string connectionString = "Server=192.168.178.201;Database=domainchecker;User ID=root;Password=1td5rugut8;"; + private static int searchCounter = 0; + private static int maxSearchesBeforeVPNChange = 20; // Anzahl der Suchanfragen, bevor der VPN gewechselt wird + private static int pauseAfterEachSearchMs = 2000; // Pause nach jeder Suchanfrage (2 Sekunden) + private static int longPauseAfterBatchMs = 60000; // Längere Pause nach einer Reihe von Suchanfragen (1 Minute) public Form1() { @@ -16,86 +22,177 @@ namespace webCrawlerGoogle private async void btnSearch_Click(object sender, EventArgs e) { - string category = txtCategory.Text; - string city = txtCity.Text; + var searchParams = LoadSearchParamsFromDatabase(); - if (string.IsNullOrEmpty(category) || string.IsNullOrEmpty(city)) + foreach (var param in searchParams) { - MessageBox.Show("Bitte sowohl eine Rubrik als auch eine Stadt eingeben."); - return; - } + string category = param.Rubrik; + string city = param.Stadt; - string requestUri = $"https://maps.googleapis.com/maps/api/place/textsearch/json?query={category}+in+{city}&key={ApiKey}"; + AppendToOutput($"Suche nach: {category} in {city}"); + await ScrapeGoogleResults(category, city); - using (HttpClient client = new HttpClient()) - { - HttpResponseMessage response = await client.GetAsync(requestUri); - if (response.IsSuccessStatusCode) + searchCounter++; + if (searchCounter >= maxSearchesBeforeVPNChange) { - string jsonResult = await response.Content.ReadAsStringAsync(); - JObject result = JObject.Parse(jsonResult); + ChangeVPN(); + searchCounter = 0; - lstResults.Items.Clear(); + AppendToOutput("Längere Pause nach VPN-Wechsel..."); + await Task.Delay(longPauseAfterBatchMs); + } + else + { + AppendToOutput("Kurze Pause nach Suchanfrage..."); + await Task.Delay(pauseAfterEachSearchMs); + } + } + } - foreach (var place in result["results"]) + private async Task ScrapeGoogleResults(string category, string city) + { + string query = $"{category} in {city}"; + string searchUrl = $"https://www.google.com/search?q={Uri.EscapeDataString(query)}"; + + using (var httpClient = new HttpClient()) + { + var html = await httpClient.GetStringAsync(searchUrl); + + var doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(html); + + var resultNodes = doc.DocumentNode.SelectNodes("//div[@class='BNeawe UPmit AP7Wnd']/a"); + + if (resultNodes != null) + { + foreach (var node in resultNodes) { - string website = await GetPlaceDetails((string)place["place_id"]); - if (!string.IsNullOrEmpty(website)) + var url = node.GetAttributeValue("href", string.Empty); + if (!string.IsNullOrEmpty(url)) { - lstResults.Items.Add(website); - SaveToDatabase(website); + AppendToOutput($"Gefundene URL: {url}"); + // Hier kannst du die URL in der Datenbank speichern oder weiterverarbeiten } } } else { - MessageBox.Show("Fehler bei der Anfrage: " + response.StatusCode); + AppendToOutput("Keine Ergebnisse gefunden."); } } } - private async Task GetPlaceDetails(string placeId) + private void ChangeVPN() { - string requestUri = $"https://maps.googleapis.com/maps/api/place/details/json?place_id={placeId}&fields=website&key={ApiKey}"; + AppendToOutput("Wechsel des VPN-Servers..."); - using (HttpClient client = new HttpClient()) + // VPN trennen + ExecuteCommand("nordvpn disconnect"); + + // Wähle einen zufälligen VPN-Server aus einer Liste oder verbinde mit dem nächsten verfügbaren Server + ExecuteCommand("nordvpn connect"); + + AppendToOutput("VPN-Server gewechselt."); + } + + private void ExecuteCommand(string command) + { + var processInfo = new ProcessStartInfo("cmd.exe", "/c " + command) { - HttpResponseMessage response = await client.GetAsync(requestUri); - if (response.IsSuccessStatusCode) - { - string jsonResult = await response.Content.ReadAsStringAsync(); - JObject result = JObject.Parse(jsonResult); + CreateNoWindow = true, + UseShellExecute = false, + RedirectStandardOutput = true, + RedirectStandardError = true + }; - return (string)result["result"]["website"]; + using (var process = Process.Start(processInfo)) + { + process.WaitForExit(); + var output = process.StandardOutput.ReadToEnd(); + var error = process.StandardError.ReadToEnd(); + + if (!string.IsNullOrEmpty(output)) + { + AppendToOutput(output); } - return null; + if (!string.IsNullOrEmpty(error)) + { + AppendToOutput("Fehler: " + error); + } } } - private void SaveToDatabase(string website) + private void AppendToOutput(string text) { + if (InvokeRequired) + { + Invoke(new Action(() => AppendToOutput(text))); + return; + } + + lstResults.Items.Add(text + Environment.NewLine); + } + + private List LoadSearchParamsFromDatabase() + { + var searchParams = new List(); + using (MySqlConnection conn = new MySqlConnection(connectionString)) { conn.Open(); - string queryCheck = "SELECT COUNT(*) FROM webCrawler WHERE webseite = @website"; - using (MySqlCommand cmdCheck = new MySqlCommand(queryCheck, conn)) + + // Städte aus der Tabelle Staedte laden + var staedte = new List(); + string queryStaedte = "SELECT staedte FROM staedte"; + using (MySqlCommand cmdStaedte = new MySqlCommand(queryStaedte, conn)) { - cmdCheck.Parameters.AddWithValue("@website", website); - int count = Convert.ToInt32(cmdCheck.ExecuteScalar()); - - if (count == 0) + using (MySqlDataReader readerStaedte = cmdStaedte.ExecuteReader()) { - string queryInsert = "INSERT INTO webCrawler (webseite) VALUES (@website)"; - using (MySqlCommand cmdInsert = new MySqlCommand(queryInsert, conn)) + while (readerStaedte.Read()) { - cmdInsert.Parameters.AddWithValue("@website", website); - - cmdInsert.ExecuteNonQuery(); + staedte.Add(readerStaedte.GetString("staedte")); } } } + + // Rubriken aus der Tabelle Rubriken laden + var rubriken = new List(); + string queryRubriken = "SELECT rubriken FROM rubriken"; + using (MySqlCommand cmdRubriken = new MySqlCommand(queryRubriken, conn)) + { + using (MySqlDataReader readerRubriken = cmdRubriken.ExecuteReader()) + { + while (readerRubriken.Read()) + { + rubriken.Add(readerRubriken.GetString("rubriken")); + } + } + } + + // Kombiniere jede Rubrik mit jeder Stadt und füge sie zur Liste hinzu + foreach (var stadt in staedte) + { + foreach (var rubrik in rubriken) + { + searchParams.Add(new SearchParam + { + Rubrik = rubrik, + Stadt = stadt + }); + } + } } + + return searchParams; } } + + public class SearchParam + { + public string Rubrik { get; set; } + public string Stadt { get; set; } + } } + + diff --git a/webCrawlerGoogle/Sicherung.txt b/webCrawlerGoogle/Sicherung.txt new file mode 100644 index 0000000..5c6e0e3 --- /dev/null +++ b/webCrawlerGoogle/Sicherung.txt @@ -0,0 +1,191 @@ + private const string ApiKey = "GELÖSCHT"; //"AIzaSyCjdysuuyc2bs4ikqT8xyIpPJHiDZ4CEo4"; // Füge hier deinen API-Schlüssel ein + private string connectionString = "Server=192.168.178.201;Database=domainchecker;User ID=root;Password=1td5rugut8;"; + + public Form1() + { + InitializeComponent(); + } + + private async void btnSearch_Click(object sender, EventArgs e) + { + var searchParams = LoadSearchParamsFromDatabase(); + + foreach (var param in searchParams) + { + string category = param.Rubrik; + string city = param.Stadt; + + string requestUri = $"https://maps.googleapis.com/maps/api/place/textsearch/json?query={category}+in+{city}&key={ApiKey}"; + await FetchResults(requestUri, city, category); + + } + Console.Beep(); + MessageBox.Show("Fertig!"); + } + + + private async Task FetchResults(string requestUri, string stadt, string rubrik) + { + using (HttpClient client = new HttpClient()) + { + bool hasNextPage = true; + + while (hasNextPage) + { + HttpResponseMessage response = await client.GetAsync(requestUri); + if (response.IsSuccessStatusCode) + { + string jsonResult = await response.Content.ReadAsStringAsync(); + JObject result = JObject.Parse(jsonResult); + + foreach (var place in result["results"]) + { + string placeId = (string)place["place_id"]; + string website = await GetPlaceDetails(placeId); + if (!string.IsNullOrEmpty(website)) + { + string cleanedUrl = CleanUrl(website); + lstResults.Items.Add(cleanedUrl); + SaveToDatabase(cleanedUrl, stadt, rubrik); + } + } + + if (result["next_page_token"] != null) + { + string nextPageToken = (string)result["next_page_token"]; + await Task.Delay(2000); // 2 Sekunden warten + requestUri = $"https://maps.googleapis.com/maps/api/place/textsearch/json?pagetoken={nextPageToken}&key={ApiKey}"; + } + else + { + hasNextPage = false; + } + } + else + { + MessageBox.Show("Fehler bei der Anfrage: " + response.StatusCode); + hasNextPage = false; + } + } + } + } + + + private async Task GetPlaceDetails(string placeId) + { + string requestUri = $"https://maps.googleapis.com/maps/api/place/details/json?place_id={placeId}&fields=website&key={ApiKey}"; + + using (HttpClient client = new HttpClient()) + { + HttpResponseMessage response = await client.GetAsync(requestUri); + if (response.IsSuccessStatusCode) + { + string jsonResult = await response.Content.ReadAsStringAsync(); + JObject result = JObject.Parse(jsonResult); + + return (string)result["result"]["website"]; + } + + return null; + } + } + + private void SaveToDatabase(string website, string stadt, string rubrik) + { + using (MySqlConnection conn = new MySqlConnection(connectionString)) + { + conn.Open(); + string queryCheck = "SELECT COUNT(*) FROM webCrawler WHERE webseite = @website"; + using (MySqlCommand cmdCheck = new MySqlCommand(queryCheck, conn)) + { + cmdCheck.Parameters.AddWithValue("@website", website); + int count = Convert.ToInt32(cmdCheck.ExecuteScalar()); + + if (count == 0) + { + string queryInsert = "INSERT INTO webCrawler (webseite, stadt, rubrik) VALUES (@website, @stadt, @rubrik)"; + using (MySqlCommand cmdInsert = new MySqlCommand(queryInsert, conn)) + { + cmdInsert.Parameters.AddWithValue("@website", website); + cmdInsert.Parameters.AddWithValue("@stadt", stadt); + cmdInsert.Parameters.AddWithValue("@rubrik", rubrik); + + cmdInsert.ExecuteNonQuery(); + } + } + } + } + } + public static string CleanUrl(string url) + { + // Entferne das Protokoll (http, https) und 'www.' + Uri uri = new Uri(url); + string host = uri.Host; + + // Prüfen, ob "www." am Anfang des Hosts ist und entfernen + if (host.StartsWith("www.")) + { + host = host.Substring(4); + } + + return host; + } + private List LoadSearchParamsFromDatabase() + { + var searchParams = new List(); + + using (MySqlConnection conn = new MySqlConnection(connectionString)) + { + conn.Open(); + + // Zuerst alle Städte aus der Tabelle Staedte laden + var staedte = new List(); + string queryStaedte = "SELECT staedte FROM staedte"; + using (MySqlCommand cmdStaedte = new MySqlCommand(queryStaedte, conn)) + { + using (MySqlDataReader readerStaedte = cmdStaedte.ExecuteReader()) + { + while (readerStaedte.Read()) + { + staedte.Add(readerStaedte.GetString("staedte")); + } + } + } + + // Dann alle Rubriken aus der Tabelle Rubriken laden + var rubriken = new List(); + string queryRubriken = "SELECT rubriken FROM rubriken"; + using (MySqlCommand cmdRubriken = new MySqlCommand(queryRubriken, conn)) + { + using (MySqlDataReader readerRubriken = cmdRubriken.ExecuteReader()) + { + while (readerRubriken.Read()) + { + rubriken.Add(readerRubriken.GetString("rubriken")); + } + } + } + + // Kombiniere jede Rubrik mit jeder Stadt und füge sie zur Liste hinzu + foreach (var stadt in staedte) + { + foreach (var rubrik in rubriken) + { + searchParams.Add(new SearchParam + { + Rubrik = rubrik, + Stadt = stadt + }); + } + } + } + + return searchParams; + } + } + + public class SearchParam + { + public string Rubrik { get; set; } + public string Stadt { get; set; } + } \ No newline at end of file diff --git a/webCrawlerGoogle/webCrawlerGoogle.csproj b/webCrawlerGoogle/webCrawlerGoogle.csproj index 9addb57..b2df46c 100644 --- a/webCrawlerGoogle/webCrawlerGoogle.csproj +++ b/webCrawlerGoogle/webCrawlerGoogle.csproj @@ -9,6 +9,7 @@ +