Spaces:
Sleeping
Sleeping
| from selenium import webdriver | |
| from selenium.common.exceptions import WebDriverException | |
| from PIL import Image | |
| from io import BytesIO | |
| import time, requests | |
| from bs4 import BeautifulSoup | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| def take_webdata(url): | |
| options = webdriver.ChromeOptions() | |
| options.add_argument('--headless') | |
| options.add_argument('--no-sandbox') | |
| options.add_argument('--disable-dev-shm-usage') | |
| try: | |
| wd = webdriver.Chrome(options=options) | |
| wd.set_window_size(1080, 720) # Adjust the window size here | |
| wd.get(url) | |
| wd.implicitly_wait(5) | |
| # Get the page title | |
| page_title = wd.title | |
| screenshot = wd.get_screenshot_as_png() | |
| except WebDriverException as e: | |
| return Image.new('RGB', (1, 1)), page_title | |
| finally: | |
| if wd: | |
| wd.quit() | |
| return Image.open(BytesIO(screenshot)) , page_title | |
| def scrape_vehicle(driver): | |
| data_kendaraan = {} | |
| try: | |
| rows = driver.find_elements(By.CSS_SELECTOR, "table tr") | |
| for row in rows: | |
| cols = row.find_elements(By.TAG_NAME, "td") | |
| if len(cols) >= 3: | |
| key = cols[0].text.strip().lower().replace(".", "").replace(" ", "_") | |
| value = cols[2].text.strip() | |
| data_kendaraan[key] = value | |
| except Exception as e: | |
| print("Gagal parsing tabel:", e) | |
| # rincians = [] | |
| # try: | |
| # container = driver.find_element(By.ID, "det_pkb") | |
| # rows = container.find_elements(By.CLASS_NAME, "row") | |
| # for row in rows[1:]: # skip header | |
| # cols = row.find_elements(By.TAG_NAME, "p") | |
| # if len(cols) >= 3: | |
| # rincian = { | |
| # "pokok": cols[0].text.strip(), | |
| # "denda": cols[1].text.strip(), | |
| # "total": cols[2].text.strip(), | |
| # } | |
| # if len(cols) > 3: | |
| # rincian["jenis"] = cols[3].text.strip().upper() | |
| # rincians.append(rincian) | |
| # except Exception as e: | |
| # print("Gagal parsing det_pkb:", e) | |
| total_tagihan = [] | |
| try: | |
| all_rows = driver.find_elements(By.CSS_SELECTOR, "div.row") | |
| for row in all_rows: | |
| print("[ROW TOTAL]", row.text) | |
| if not ("Pokok" in row.text or "Denda" in row.text or "Total" in row.text): | |
| cols = row.find_elements(By.TAG_NAME, "p") | |
| print("[COLS TOTAL]", [x.text for x in cols]) | |
| if len(cols) >= 4: | |
| total_tagihan.append({ | |
| "pokok": cols[0].text.strip(), | |
| "denda": cols[1].text.strip(), | |
| "total": cols[2].text.strip(), | |
| "jenis": cols[3].text.strip() | |
| }) | |
| except Exception as e: | |
| print("Gagal parsing total tagihan:", e) | |
| rincians_pkb = [] | |
| try: | |
| pkb_rows = driver.find_elements(By.CSS_SELECTOR, "#det_pkb .row")[1:] # skip header | |
| for row in pkb_rows: | |
| print("[ROW PKB]", row.text) | |
| cols = row.find_elements(By.TAG_NAME, "p") | |
| print("[COLS PKB]", [x.text for x in cols]) | |
| if len(cols) >= 3: | |
| rincians_pkb.append({ | |
| "pokok": cols[0].text.strip(), | |
| "denda": cols[1].text.strip(), | |
| "total": cols[2].text.strip() | |
| }) | |
| except Exception as e: | |
| print("Gagal parsing det_pkb:", e) | |
| rincians_swd = [] | |
| try: | |
| swd_rows = driver.find_elements(By.CSS_SELECTOR, "#det_swd .row")[1:] # skip header | |
| for row in swd_rows: | |
| print("[ROW SWD]", row.text) | |
| cols = row.find_elements(By.TAG_NAME, "p") | |
| print("[COLS SWD]", [x.text for x in cols]) | |
| if len(cols) >= 3: | |
| rincians_swd.append({ | |
| "pokok": cols[0].text.strip(), | |
| "denda": cols[1].text.strip(), | |
| "total": cols[2].text.strip() | |
| }) | |
| except Exception as e: | |
| print("Gagal parsing det_swd:", e) | |
| # rincians = [total_tagihan, rincians_pkb, rincians_swd] | |
| # return data_kendaraan, rincians | |
| return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd | |
| def get_vehicle_info_bs4(nopol: str): | |
| content = requests.get("https://www.jambisamsat.net/infopkb.php", params={"no_polisi":nopol}).content | |
| soup = BeautifulSoup(content, "html.parser") | |
| # === 1. Data Kendaraan === | |
| data_kendaraan = {} | |
| table = soup.find("table") | |
| if table: | |
| for row in table.find_all("tr"): | |
| cols = row.find_all("td") | |
| if len(cols) >= 3: | |
| key = cols[0].get_text(strip=True).lower().replace(".", "").replace(" ", "_") | |
| val = cols[2].get_text(strip=True) | |
| data_kendaraan[key] = val | |
| # === 2. Total Tagihan (div.row outside det_pkb and det_swd) === | |
| all_rows = soup.find_all("div", class_="row") | |
| total_tagihan = [] | |
| for row in all_rows: | |
| if row.find_parent(id="det_pkb") or row.find_parent(id="det_swd"): | |
| continue | |
| if not ("POKOK" in row.text or "DENDA" in row.text or "TOTAL" in row.text): | |
| ps = row.find_all("p") | |
| if len(ps) >= 4: | |
| total_tagihan.append({ | |
| "pokok": ps[0].get_text(strip=True), | |
| "denda": ps[1].get_text(strip=True), | |
| "total": ps[2].get_text(strip=True), | |
| "jenis": ps[3].get_text(strip=True) | |
| }) | |
| # === 3. Rincian PKB === | |
| rincians_pkb = [] | |
| pkb_div = soup.find("div", id="det_pkb") | |
| if pkb_div: | |
| rows = pkb_div.find_all("div", class_="row")[1:] # skip header | |
| for row in rows: | |
| cols = row.find_all("p") | |
| if len(cols) >= 3: | |
| rincians_pkb.append({ | |
| "pokok": cols[0].get_text(strip=True), | |
| "denda": cols[1].get_text(strip=True), | |
| "total": cols[2].get_text(strip=True) | |
| }) | |
| # === 4. Rincian SWDKLLJ === | |
| rincians_swd = [] | |
| swd_div = soup.find("div", id="det_swd") | |
| if swd_div: | |
| rows = swd_div.find_all("div", class_="row")[1:] # skip header | |
| for row in rows: | |
| cols = row.find_all("p") | |
| if len(cols) >= 3: | |
| rincians_swd.append({ | |
| "pokok": cols[0].get_text(strip=True), | |
| "denda": cols[1].get_text(strip=True), | |
| "total": cols[2].get_text(strip=True) | |
| }) | |
| return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd | |
| def get_vehicle_info(driver, plate_number: str): | |
| # options = webdriver.ChromeOptions() | |
| # options.add_argument("--headless") | |
| # options.add_argument("--disable-gpu") | |
| # options.add_argument("--no-sandbox") | |
| # driver = webdriver.Chrome(options=options) | |
| try: | |
| driver.get("https://www.jambisamsat.net/infopkb.html") | |
| time.sleep(1) | |
| print("[GET URL]") | |
| WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.ID, "no_polisi")) | |
| ) | |
| input_field = driver.find_element(By.ID, "no_polisi") | |
| input_field.clear() | |
| input_field.send_keys(plate_number) | |
| submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') | |
| submit_button.click() | |
| print("BUTTON CLICKED") | |
| # Wait for the new page to load | |
| WebDriverWait(driver, 10).until( | |
| EC.url_contains("infopkb.php") | |
| ) | |
| print("PHP LOADED") | |
| driver.implicitly_wait(3) | |
| scroll_height = driver.execute_script("return document.body.scrollHeight") | |
| driver.set_window_size(1920, scroll_height + 200) # force full-page height | |
| button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_pkb"))) | |
| button.click() | |
| button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_swd"))) | |
| button.click() | |
| print("SCRIPT EXECUTED") | |
| time.sleep(1) | |
| return scrape_vehicle(driver) | |
| # print(data_kendaraan, rincian) | |
| # page_title = driver.title | |
| # screenshot = driver.get_screenshot_as_png() | |
| # return Image.open(BytesIO(screenshot)) , page_title | |
| except WebDriverException as e: | |
| return Image.new('RGB', (1, 1)), page_title | |
| finally: | |
| driver.quit() |