
    :i                     h
   d dl mZmZ d dlZd dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d d	lmZ d d
lmZ d dl m!Z! d dl"m"Z"  e       Z# ejH                  e%      Z&de'de(de'de(fdZ)e#jU                  de       eejV                         edd       eddd       edd      d eejX                        fdede'de'de-d ee-   f
d!       Z.e#j_                  de0e	jb                  "       ed#d$%       ed&d'd(d)*       ed#d+%       eejV                         eejX                        fd,ed-e
d.e(d/e'de(defd0       Z2e#j_                  d1ee	jb                  "       ed2d3%       eejV                         eejX                        fd4edefd5       Z3e#j_                  d6ee	jb                  "       eejV                         eejX                        fd7edefd8       Z4e#jk                  d9e0       eejV                         eejX                        fd:e'd,edefd;       Z6e#jo                  d9e0       eejV                         eejX                        fd:e'defd<       Z8e#jU                  d9e       eejV                         eejX                        fd:e'ded=efd>       Z9e#j_                  d?e       ed#d@%       ed&d'd(dA*       ed#dB%       eejV                         eejX                        fd:e'd.e(d/e'de(def
dC       Z:e#j_                  dDe0e	jv                  "       ed#d@%       ed&d'd(dA*       ed#dB%       eejV                         eejX                        fd.e(d/e'de(defdE       Z<e#j_                  dFe0       ed#d@%       ed&d'd(dA*       ed#dG%       eejV                         eejX                        fd:e'd.e(d/e'de(def
dH       Z=e#j_                  dIe0e	jv                  "       ed#d@%       ed&d'd(dA*       ed#dG%       eejV                         eejX                        fd.e(d/e'de(defdJ       Z>e#j_                  dKe0       eejV                         eejX                        fd:e'defdL       Z?de'fdMZ@e#j_                  dNe0       eejV                         eejX                        fd:e'd-e
defdO       ZAe#jU                  dPe0       eejV                         eejX                        fd:e'defdQ       ZBy)R    )ListOptionalN)	APIRouterDependsHTTPExceptionQuerystatusBackgroundTasks
UploadFileFile)AsyncSession)select)deps)Product)ProductCreateProductUpdateProductResponseProductListResponseBulkProductUploadBulkUploadResponse)ScrapingResponse)ProductService)ScraperServicedatetime
product_idheadless_modetimeout_secondsenable_discoveryc                   K   	 ddl m} ddlm} t        j	                  d|          |       4 d{   }|j                  t        t              j                  t        j                  | k(               d{   }|j                         j                         }|s+t        j                  d|  d       	 ddd      d{    y|j                  }	|j                  }
t        |j                         }t        j	                  d|	 d	|  d
       t        j	                  d       g }g }ddddddddd}t        j	                  d       	 t#        j$                  |j                  |j                  |j                   ||       d{   \  }}|j'                  |       |j'                  |       t)        |      |d   d<   t)        |      |d   d<   d|d   d<   t        j	                  dt)        |       dt)        |       d       t        j	                  d       	 t#        j.                  || |||       d{   }|j1                  dg       }|j1                  dg       }|j'                  |       |j'                  |       t)        |      |d   d<   t)        |      |d   d<   d|d   d<   t        j	                  d t)        |       dt)        |       d       t3               }g }|D ]v  }|j1                  d"      |j1                  d#      |j1                  d$      t5        |j1                  d%d      d&      f}||vsU|j7                  |       |j9                  |       x  |j:                         j=                         }||_        |jA                          d{    t        j	                  d'd(        t        j	                  d)|         t        j	                  d(        t        j	                  d*|	        t        j	                  d+t)        |              t        j	                  d,t)        |              t        j	                  d-|        t        j	                  d.|d   d    d/|d   d    d|d   d    d0       t        j	                  d1|d   d    d/|d   d    d|d   d    d0       t        j	                  d( d'       ddd      d{    y7 7 s7 ,7 i# t*        $ rB}dt-        |       |d   d<   t        j                  dt-        |       d       Y d}~0d}~ww xY w7 # t*        $ rB}dt-        |       |d   d<   t        j                  d!t-        |       d       Y d}~d}~ww xY w7 7 # 1 d{  7  sw Y   yxY w# t*        $ r0}t        j                  d2|  d3t-        |       d4       Y d}~yd}~ww xY ww)5z
    Background task to perform scraping after product creation.
    This runs asynchronously without blocking the API response.
    Executes UNIFIED SCRAPING: Google Search + Shopping Light (same as /run-scraping endpoint)
    r   AsyncSessionLocalr   uG   🚀 [Background] Starting UNIFIED first-time scraping for product ID: Nu   ❌ [Background] Product z
 not foundz[Background] Product:  (ID: )zC[Background] Using UNIFIED approach: Google Search + Shopping Lightpendingresults
violationsr	   google_searchshopping_lightu?   [Background] [STEP 1/2] 🔍 Google Search (Organic Results)...product_namebarcodemspproductdbr*   r'   r(   	completedr	   u    [Background] ✅ Google Search: 
 results,  violations	failed - u*   [Background] ⚠️ Google Search failed: Fexc_infouJ   [Background] [STEP 2/2] 🛒 Google Shopping Light + Registered Vendors...r   r   r   r   r+   u!   [Background] ✅ Shopping Light: u+   [Background] ⚠️ Shopping Light failed: vendor_namer-   r/   scraped_price   
P================================================================================u<   ✅ [Background] UNIFIED SCRAPING COMPLETED for product ID: u      • Product:       • Total Results: u(      • Total Violations (deduplicated): u      • Execution Time: u      • Google Search: z (z violations)u      • Shopping Light: u;   ⚠️ [Background] UNIFIED scraping failed for product ID : T)!app.db.sessionr"   r   loggerinfoexecuter   r   whereidscalarsfirsterrorr-   r.   floatr/   r   search_google_serpextendlen	Exceptionstrscrape_product_serpgetsetroundaddappendutcnow	isoformatlast_execution_timecommit)r   r   r   r   r"   r   r1   resultr0   product_name_cachedproduct_barcode_cachedproduct_msp_cachedall_resultsall_violationssearch_summarygoogle_resultsgoogle_violations
google_errshopping_resultshopping_resultsshopping_violationsshopping_errseen_violationsdeduped_violationsvviolation_keyexecution_time_stres                               =/var/www/html/marco-python-backend/app/api/routes/products.py_perform_background_scrapingrn      s|    oz4%]^h]ijk$& f	' f	'"::fWo&;&;GJJ*<T&UVVFnn&,,.G8JOPf	' f	' f	' #*"6"6%,__"!&w{{!3KK01D0EVJ<WXYZKK]_KN-.a9!U./qI"VN KKY[m:H:[:[!(!5!5#OO#; 51 1 "">2%%&78=@=P/	:@CDU@V/=<G/9>s>?R>SS]^abs^t]u  vA  B  C KKdfp(6(J(J)%5"/$3) # $3#6#6y"#E &5&9&9,&K#""#34%%&9:>ABR>S/0;ADEXAY/0>=H/0:?DT@U?VV`adexay`z  {F  G  H "eO!## 	1EE-(EE.)EE%L!%%3Q7	! !7#''6&--a0	1 "1!2!<!<!> +=G'))+KK"VH&KKVWaVbcdKK6($KK*+>*?@AKK0[1A0BCDKKB3GYCZB[\]KK12D1EFGKK01PQY1Z0[[]^lm|^}  H  _I  ^J  JT  Uc  ds  Ut  uA  UB  TC  CO  P  QKK1.AQ2RS[2\1]]_`no  aA  BK  aL  `M  MW  Xf  gw  Xx  yE  XF  WG  GS  T  UKK6("&Mf	' f	' f	'Vf	'65  m>GJGX<Y/9I#j/IZ[fkllm#&  p?H\IZH[=\/0:J3|K\J]^inoop. wf	' f	' f	' f	'P  zRS]R^^`adefag`hitxyyzsF  W*/V. S)V. AV9S,:>V8V. S/V. W*	BV;S5	S2
BS5V!U>U?B"U!A"VAV!V"D6VV. #V$V. (W*)V. ,V/V. 2S55	U >7T;5V;U  VU	V7VVVVV. V+V" V+'V. *W*+V. .	W'7&W"W*"W''W*/)response_model   )ge
   d   )rr   ler-   z&^(product_name|msp|last_scraped_date)$)regexr1   pagelimitsortBysearchc                 v   K   t        j                  | ||||       d{   \  }}t        ||||      S 7 w)zN
    Retrieve a list of products with pagination, sorting, and searching.
    )rw   rx   sort_byrz   N)itemstotalrw   rx   )r   get_productsr   )r1   rw   rx   ry   rz   current_userproductsr~   s           rm   r   r      sD      +77
UF6 OHe XUUSSs   979)rp   status_codeTz1Run browser in headless mode for initial scraping)description      <   z8Timeout in seconds for page load during initial scraping)rr   ru   r   zADiscover alternative vendors via SERP API during initial scraping
product_inbackground_tasksheadlesstimeoutc                 X  K   t        j                  ||        d{   }t        j                  d|j                   d|j
                   d       |j                  t        |j
                  |||       t        j                  d|j
                          d|j
                  dd	d
S 7 w)a  
    Add a new product and automatically trigger scraping in the background.
    
    **Returns immediately** (HTTP 201) without waiting for scraping to complete.
    Scraping happens asynchronously in the background using SERP API.
    
    Parameters:
    - product_in: Product creation data
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: Discover alternative vendors via SERP API (default: True)
    
    **Response (Immediate):**
    - message: Product creation success
    - product_id: ID of created product
    - scraping_status: "in_progress" - indicates scraping is running in background
    
    **Note:** Scraping continues in background. You can check results via GET /products/{id} 
    or monitor logs for completion message: "First-time scraping completed for product ID: {id}"
    Nu   ✅ Product created: r#   r$   )r   r   r   r   u3   📌 Background scraping scheduled for product ID: zCProduct added successfully! Scraping is starting in the background.in_progresszuYou can start working with the product while scraping happens. Check logs or GET /products/{id} for scraping results.)messager   scraping_statusnote)r   create_productrA   rB   r-   rE   add_taskrn   )r   r   r   r   r   r1   r   r0   s           rm   add_productr      s     > #11"jAAG
KK'(<(<'=VGJJ<qQR $::)   KKEgjj\RS Yjj( H	  Bs   B*B(BB*z/bulk-upload/excel.z,Excel file (.xlsx or .xls) with product datafilec           
      x  K   	 ddl }ddl}| j                          d{   }|j                  |      }|j	                  |d      }|j
                  }t        j                  d| j                          t        j                  d|j                          t        j                  d|j                          g }	d}
d	}d
}d}t        d|j                  dz         D ]  }	 |j                  ||
      j                  }|j                  ||      j                  }|j                  ||      j                  }|j                  ||      j                  }t        ||||g      s|rt        t!        |            nd}|rt#        t        t!        |                  nd}|rt!        |      nd}|rt#        |      j%                         nd}t'        ||||d      }|	j)                  |        |	st1        t2        j4                  d      t        j                  dt7        |	       d       t9        j:                  ||	       d{   }t        j                  dd        t        j                  d       t        j                  d|d           t        j                  d|d           t        j                  d|d           t        j                  d|d            |d!   r:t        j=                  d"       |d!   dd D ]  }t        j=                  d#|         t        j                  d d       t?        |d   |d   |d   |d    |d!   |d$   %      S 7 @# t*        $ r/}t        j-                  d| dt#        |              Y d}~d}~wt.        $ r/}t        j-                  d| dt#        |              Y d}~d}~ww xY w7 # t0        $ r  t.        $ rO}t        jA                  d&t#        |       d'       t1        t2        jB                  d(t#        |             d}~ww xY ww))a#  
    Bulk upload products from an Excel file.
    
    **File Format Requirements:**
    - Sheet name can be anything (uses first sheet)
    - Header row should be at row 4 with columns: Reference, Name, Barcode / EAN, PVP, Price unit, Pack 2, Pack 3, Pack 4, Pack 5, Pack 6, Pack 12
    - Data rows start from row 5
    - Columns mapping:
        - A: Reference ID (int)
        - B: Product Name (string)
        - C: Barcode (14 digits)
        - D: MSP/PVP (float)
        - E-K: Pack prices (auto-calculated, can be formulas)
    
    **Features:**
    - Auto-calculates pack prices based on MSP (same discount logic applied)
    - Validates barcode uniqueness (skips duplicates)
    - Validates product names and barcodes
    - Returns detailed error report with row numbers
    
    **Response includes:**
    - total_processed: Total rows in file
    - successful: Products created
    - failed: Products with validation errors
    - skipped_duplicates: Products with duplicate barcodes
    - errors: Detailed error list with row info
    
    **Example response:**
    ```json
    {
        "total_processed": 80,
        "successful": 78,
        "failed": 1,
        "skipped_duplicates": 1,
        "errors": [
            {"row": 10, "product": "COMPOSOR 5", "error": "Duplicate barcode: 8422947152055 already exists"},
            {"row": 25, "product": "INVALID", "error": "Validation error: Product name cannot consist solely of numbers"}
        ],
        "message": "Bulk upload completed: 78 created, 1 failed, 1 duplicates skipped"
    }
    ```
    r   NT)	data_onlyu*   📄 [BULK UPLOAD] Processing Excel file: z   Sheet name: z   Total rows: rq   r;         r   )rowcolumn)reference_idr-   r.   r/   r	   z   Skipping row z: Invalid data format - r?   z=No valid products found in Excel file. Check format and data.r   detailu      ✅ Parsed z products from Excelr<   r=   u   ✅ [BULK UPLOAD] Completed      • Total: total_processed      • Successful: 
successful      • Failed: failed      • Duplicates: skipped_duplicateserrorsu      • Errors:z     r   r   r   r   r   r   r   u)   ❌ [BULK UPLOAD] Error processing file: r6   zError processing Excel file: )"openpyxlioreadBytesIOload_workbookactiverA   rB   filenametitlemax_rowrangecellvalueallintrI   rN   stripr   rT   
ValueErrordebugrM   r   r	   HTTP_400_BAD_REQUESTrL   r   bulk_create_productswarningr   rH   HTTP_500_INTERNAL_SERVER_ERROR)r   r1   r   r   r   contentsfile_streamwbwsproducts_listREFERENCE_COLNAME_COLBARCODE_COLMSP_COLrow_idxr   r-   r.   r/   r0   rl   rY   errs                          rm   bulk_upload_excelr      s    `c
 $jj* ##K4#@YY@PQobhhZ01objj\23  Q

Q/ !	G !ww7=wIOO!ww78wDJJ''gk'BHHgg''g:@@ L,EF <Hs5#67T6=#c%.124$'eCjT<Hs<0668d (!-!-# $$W-5!	F "77V 
 	nS%7$88LMN &::2}MMbM"13nV,=%>$?@A)&*>)?@AofX&6%789)&1E*F)GHI(NN^-h'+ .se}-.vhbM"!"#45l+(#%&:;(#9%
 	
] %`  /y8PQTUVQWPXYZ /y3q6(CD N.   
@QITXY==23q6(;
 	

s   P:O MCO "BM"%O &BM",AO ODO P:O "	O+$NO O!$OO OO P7(A
P22P77P:z/bulk-upload/jsonbulk_uploadc           	      ^  K   t         j                  dt        | j                         d       t	        j
                  || j                         d{   }t         j                  dd        t         j                  d       t         j                  d|d           t         j                  d	|d
           t         j                  d|d           t         j                  d|d           t         j                  d d       t        |d   |d
   |d   |d   |d   |d         S 7 حw)aq  
    Bulk upload products from JSON data.
    
    **Request body example:**
    ```json
    {
        "products": [
            {
                "reference_id": 15203,
                "product_name": "COMPOSOR 3 HEPAVESICAL COMPLEX SXXI",
                "barcode": "8422947152031",
                "msp": 13.26,
                "status": true
            },
            {
                "reference_id": 15205,
                "product_name": "COMPOSOR 5 SEDANER C",
                "barcode": "8422947152055",
                "msp": 17.45,
                "status": true
            }
        ]
    }
    ```
    
    **Features:**
    - Auto-calculates pack prices based on MSP using same discount logic
    - Validates barcode uniqueness (skips duplicates)
    - Validates all fields according to ProductCreate schema
    - Returns detailed error report with row numbers
    
    **Response includes:**
    - total_processed: Number of items in request
    - successful: Products created
    - failed: Products with validation errors
    - skipped_duplicates: Products with duplicate barcodes
    - errors: Detailed error list
    u#   📦 [BULK UPLOAD JSON] Processing 	 productsNr<   r=   u    ✅ [BULK UPLOAD JSON] Completedr   r   r   r   r   r   r   r   r   r   r   )rA   rB   rL   r   r   r   r   )r   r1   r   rY   s       rm   bulk_upload_jsonr   t  s'    X KK5c+:N:N6O5PPYZ[ "66r;;O;OPPF
KK"VH
KK24
KK.(9!: ;<=
KK%f\&:%;<=
KK/&"2!345
KK%f-A&B%CDE
KK6("01,'h!"67hy!  Qs   AD-D+CD-z/{id}rE   c                 T   K   t        j                  || |       d{    ddiS 7 w)z0
    Edit an existing product. (Admin only)
    Nr   zProduct updated successfully)r   update_product)rE   r   r1   r   s       rm   r   r     s1      
'
'B

;;;566 <s   (&	(c                 R   K   t        j                  ||        d{    ddiS 7 w)z(
    Delete a product. (Admin only)
    Nr   zProduct deleted successfully)r   delete_product)rE   r1   r   s      rm   r   r     s/      
'
'B
///566 0s   '%	'returnc                 N   K   t        j                  ||        d{   }|S 7 w)z.
    Retrieve a single product by its ID.
    N)r   get_product_by_idrE   r1   r   r0   s       rm   get_productr     s'      #44R<<GN =s   %#%z/{id}/scrapezRun browser in headless modez Timeout in seconds for page loadz3Also discover alternative vendors via Google searchc                    K   t        j                  ||        d{   }|j                  st        t        j                  d      t        j                  || |||       d{   }|S 7 O7 w)a  
    Manually trigger scraping for a specific product.
    Scrapes the product against all registered active vendors.
    When discovery is enabled, also searches Google for alternative sellers and scrapes their prices.
    
    Parameters:
    - id: Product ID to scrape
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: if true, discover and scrape alternative vendors (default: true)
    N2Scraping can only be triggered for active productsr   r8   )r   r   r	   r   r   r   scrape_productrE   r   r   r   r1   r   r0   rY   s           rm   trigger_scrapingr     su     ( #44R<<G>>33G
 	
 "00
) F M! ="   A0A,AA0%A.&A0.A0z/scrape-allc                 @  K   t        t              j                  t        j                  dk(        }|j	                  |       d{   }|j                         j                         }|st        t        j                  d      g }d}	d}
g }t        j                  dd        t        j                  dt        |       d	       t        j                  d d
       	 t        |d      D ]"  \  }}d}	 ddlm}  |       }t        j                  d
d        t        j                  d| dt        |       d|j                   d|j                    d	       t        j                  d        t        j                  d|j"                          t        j                  d|j$                          t        j                  d|        t'        j(                  ||j                   || |       d{   }|j+                  dd      }|j+                  dd      }t        j                  d|j                    d       t        j                  d|        t        j                  d|        |j+                  dg       }|rTt        j-                  d|j                   d       |D ],  }t        j-                  d|d     d!|d"   d#d$|d%   d#d       . |	|z  }	|
|z  }
|j/                  |       |s
	 |j9                          d{    % t        j                  dd        t        j                  d,       t        j                  d        t        j                  d-t        |              t        j                  d.t        |              t        j                  d/t        |              t        j                  d0|	        t        j                  d1|
        t        j                  d d
       d2d3t        |       d	t        |      t        |      t        |      |	|
||r|nd|d4
S 7 7 I# t        $ ro}t        j1                  d&|j                    d'|j2                   d()       |j/                  |j                   |j                  |j2                  d*       Y d}~d}~wt4        $ rm}t        j1                  d&|j                    d't7        |       d)       |j/                  |j                   |j                  t7        |      d*       Y d}~4d}~ww xY w7 $# t4        $ r,}t        j-                  d+t7        |              Y d}~ud}~ww xY w# |rR	 |j9                          d{  7   w # t4        $ r+}t        j-                  d+t7        |              Y d}~w d}~ww xY ww xY w# t4        $ rO}t        j1                  d5t7        |       d)       t        t        j:                  d6t7        |             d}~ww xY ww)7a<  
    Manually trigger scraping for ALL active products.
    Scrapes EACH product against ALL registered active vendors.
    For each vendor's website URL: Extract price using search and price detection.
    Then if discovery enabled: Google search for alternative vendors and scrape them too.
    
    This is a SYNCHRONOUS operation - waits for all products to complete before returning.
    Products are scraped sequentially to avoid browser and connection issues.
    
    Parameters:
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: Also discover and scrape alternative vendors via Google search (default: True)
    
    Returns:
    - Complete results including all scraped vendors and violations for each product
    TN%No active products found for scrapingr   r   

r=   u(   🚀 [SCRAPE ALL] Starting scraping for r   r<   rq   r!      ────────────────────────────────────────────────────────────────────────────────[ro      ] 📦 PRODUCT: r#   r$           Barcode:            MSP: ₹        Discovery Enabled: r8   scraped_countviolation_count   
✅ [RESULT] Product :u      • Vendors Scraped: u      • Violations Found: r(      
   🚨 VIOLATIONS FOR       - r9      : ₹r:   z.3f
    (MSP: ₹r/      
❌ [ERROR] Product r?   Fr6   r   r-   rH   Warning closing session: u   📊 [SCRAPE ALL] FINAL SUMMARYTotal Products: Successfully Scraped: Failed: zTotal Vendor URLs Scraped: Total Violations Found: r2   Scraping completed for )
r	   r   total_productssuccessful_productsfailed_productstotal_vendors_scrapedtotal_violationsr   failed_listr   u'   

❌ [FATAL ERROR] Scrape All failed: Scraping failed: )r   r   rD   r	   rC   rF   r   r   r   rA   rB   rL   	enumerater@   r"   r-   rE   r.   r/   r   r   rP   r   rT   rH   r   rM   rN   closer   )r   r   r   r1   r   stmtrY   r   all_products_resultstotal_scrapedr   r   idxr0   fresh_dbr"   product_resultr   r   r(   ri   herl   s                          rm   scrape_all_productsr    s    4 '?  4!78D::d##F~~##%H33:
 	

 MO
KK$vh 
KK:3x=/ST
KK6("]
%h2 =	MLCH;M<,.b
O,auAc(m_4DWEYEYDZZ`ahakak`llmnoxj*//@AB.w{{m<=9:J9KLM (6'D'D&zz%5"*$+( " !/ 2 2?A F"0"4"45F"J5gjj\CD6}oFG77HIJ ,//bA
NN%>w?S?S>TTU#VW' |!M2B1C5?I[\_H``jklmrkstwjxxy'z{| . O3 $++N;$ M&nn...w=	M@ 	d6(O$57vh &s8}o67,S1E-F,GHIhs?34561-AB./?.@ABvhbM" "05I1J0K9U!(m#&';#<"?3%2 0,.=?4 0
 	
{ $D"4 ! 5gjj\BII;OZ_`&&")**$+$8$8YY(  
  5gjj\CF8LW[\&&")**$+$8$8 V(   /$ M)B3q6('KLLM M&nn...$ M)B3q6('KLLM <  
@QITXY==&s1vh/
 	

s	  AXP:BX!W 7C=Q 4P=5C>Q 3W 8T2T/T2D)W 9X=Q  	T,	A$R3-U*3T,?A"T'!U*'T,,U*/T22	U';!U"W "U''W *W .VV
VW 	V<	!V7	2W 7V<	<W  W 	XA
XXXz/{id}/scrape-serpz>Also discover alternative vendors via SERP API Google Shoppingc                    K   t        j                  ||        d{   }|j                  st        t        j                  d      t        j                  || |||       d{   }|S 7 O7 w)a  
    Manually trigger scraping for a specific product using SERP API for discovery.
    Same functionality as /{id}/scrape, but uses SERP API Google Shopping Light Engine
    instead of Tavily API for discovering alternative vendors.
    
    Scrapes the product against all registered active vendors.
    When discovery is enabled, also searches SERP API Google Shopping for alternative sellers 
    and scrapes their prices.
    
    Parameters:
    - id: Product ID to scrape
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: if true, discover and scrape alternative vendors via SERP API (default: true)
    Nr   r   r8   )r   r   r	   r   r   r   rO   r   s           rm   trigger_scraping_serpr    su     0 #44R<<G>>33G
 	
 "55
) F M! =r   z/scrape-all-serpc                   K   t        t              j                  t        j                  dk(        }|j	                  |       d{   }|j                         j                         }|st        t        j                  d      g }d}	d}
g }t        j                  dd        t        j                  dt        |       d	       t        j                  d d
       	 t        |d      D ]\  \  }}d}	 ddlm}  |       }t        j                  d
d        t        j                  d| dt        |       d|j                   d|j                    d	       t        j                  d        t        j                  d|j"                          t        j                  d|j$                          t        j                  d       t        j                  d|        t'        j(                  ||j                   || |       d{   }|j+                  dd      }|j+                  dd      }t        j                  d|j                    d       t        j                  d|        t        j                  d|        |j+                  dg       }|ryt        j-                  d|j                   d       |D ]Q  }t        j-                  d |j+                  d!d"       d#|j+                  d$d%       d&|j+                  d'd%       d       S |	|z  }	|
|z  }
|j/                  |       |sD	 |j9                          d{    _ t        j                  dd        t        j                  d.       t        j                  d        t        j                  d/t        |              t        j                  d0t        |              t        j                  d1t        |              t        j                  d2|	        t        j                  d3|
        t        j                  d4       t        j                  d d
       d5d6t        |       d	t        |      t        |      t        |      |	|
||r|nd|d7d8d9S 7 E7 # t        $ ro}t        j1                  d(|j                    d)|j2                   d*+       |j/                  |j                   |j                  |j2                  d,       Y d}~d}~wt4        $ rm}t        j1                  d(|j                    d)t7        |       d+       |j/                  |j                   |j                  t7        |      d,       Y d}~Kd}~ww xY w7 ;# t4        $ r,}t        j-                  d-t7        |              Y d}~d}~ww xY w# |rR	 |j9                          d{  7   w # t4        $ r+}t        j-                  d-t7        |              Y d}~w d}~ww xY ww xY w# t4        $ rO}t        j1                  d:t7        |       d+       t        t        j:                  d;t7        |             d}~ww xY ww)<a  
    Manually trigger scraping for ALL active products using SERP API for discovery.
    Same functionality as /scrape-all, but uses SERP API Google Shopping Light Engine
    instead of Tavily API for discovering alternative vendors.
    
    Scrapes EACH product against ALL registered active vendors.
    For each vendor's website URL: Extract price using search and price detection.
    Then if discovery enabled: SERP API search for alternative vendors and scrape them too.
    
    This is a SYNCHRONOUS operation - waits for all products to complete before returning.
    Products are scraped sequentially to avoid browser and connection issues.
    
    Parameters:
    - headless: Whether to run browser in headless mode (default: True)
    - timeout: Page load timeout in seconds (default: 15)
    - enable_discovery: Also discover and scrape alternative vendors via SERP API (default: True)
    
    Returns:
    - Complete results including all scraped vendors and violations for each product
    TNr   r   r   r   r=   u-   🚀 [SCRAPE ALL SERP] Starting scraping for z products using SERP APIr<   rq   r!   r   r   ro   r   r#   r$   r   r   z2        Discovery Method: SERP API Google Shoppingr   r8   total_resultsr   r   r   r>   u      • Total Violations Found: r(   r   r   r9   Unknownr   r:   zN/Ar   r/   r   r?   Fr6   r   r   u$   📊 [SCRAPE ALL SERP] FINAL SUMMARYr   r   r   zTotal Results: r   z7Discovery Method: SERP API Google Shopping Light Enginer2   r   serp_api_google_shoppingbrowser_with_serp_api_discovery)r	   r   r   r   r   r  r   r   r   r   discovery_methodsource_typeu,   

❌ [FATAL ERROR] Scrape All SERP failed: r   )r   r   rD   r	   rC   rF   r   r   r   rA   rB   rL   r   r@   r"   r-   rE   r.   r/   r   rO   rP   r   rT   rH   r   rM   rN   r   r   )r   r   r   r1   r   r   rY   r   r   r   r   r   r   r0   r  r"   r  r  total_violation_countr(   ri   r  rl   s                          rm   scrape_all_products_serpr    s    : '?  4!78D::d##F~~##%H33:
 	

 MO
KK$vh 
KK?HNfgh
KK6("a
%h2 >	MLCH<M<,.b
O,auAc(m_4DWEYEYDZZ`ahakak`llmnoxj*//@AB.w{{m<=PR9:J9KLM (6'I'I&zz%5"*$+( " !/ 2 2?A F(6(:(:;Mq(Q%5gjj\CD4]ODE=>S=TUV ,//bA
NN%>w?S?S>TTU#VW' Y!%%y2Q1RRWXYX]X]^motXuWv  wA  BC  BG  BG  HM  OT  BU  AV  VW  (X  YY . $99 $++N;$ M&nn...y>	MB 	d6(O$:<vh &s8}o67,S1E-F,GHIhs?3456om_56./?.@ABMOvhbM" "05I1J0KKcd!(m#&';#<"?3* 0,.=?4 0 :<
 	
 $F"4 ! 5gjj\BII;OZ_`&&")**$+$8$8YY(  
  5gjj\CF8LW[\&&")**$+$8$8 V(   /$ M)B3q6('KLLM M&nn...$ M)B3q6('KLLM B  
Ec!fXNY]^==&s1vh/
 	

s	  AY/RBY/!X 7DR	R
D#R-X 2VV V
E X 
Y/R	U=A$T>V;U=A"U82V;8U==V; V	V8!V3-X 3V88X ;X?WW
WX	X	"!X	XX	XX 	Y,A
Y''Y,,Y/z/{id}/search-googlec                 0  K   	 t        j                  ||        d{   }t        j                  dd        t        j                  d|j                          t        j                  d d       t        j                  |j                  |j                  |j                  ||       d{   \  }}t        j                  dt        |       dt        |       d       d	| |j                  |j                  t        |j                        t        |      ||t        |      d
dddS 7 7 # t        $ r  t        $ rO}t        j                  dt        |       d       t        t        j                   dt        |             d}~ww xY ww)ae  
    Search for a product using Google Search (SERP API - regular search engine).
    Creates violations in the database following the same schema as SERP Shopping Light API.
    
    Parameters:
    - id: Product ID to search for
    
    Returns:
    - Search results from Google Search (organic_results)
    - Violations found and stored in database
    Nr<   r=   u(   🔍 [Google Search] Searching product: r,   u,   [Google Search] ✓ Search completed. Found r3   r4   r2   r*   serp_apiz^Results and violations are stored in database following same schema as SERP Shopping Light API)r	   r   r-   r.   r/   r  r'   r(   r   search_enginesourcer   u   ❌ Google Search failed: Tr6   zSearch failed: r   )r   r   rA   rB   r-   r   rJ   r.   r/   rL   rI   r   rM   rH   rN   r	   r   )rE   r1   r   r0   r'   r(   rl   s          rm   search_google_productr  R  sw    ")
&88R@@bM">w?S?S>TUVvhbM" %3$E$E --OO%
 
 	B3w<.PZ[^_i[jZkkvwx "#00% \$ #J, t
 	
# A
2   
1#a&:TJ==$SVH-
 	

sM   FD2 D-BD2 /D00A<D2 ,F-D2 0D2 2FA
FFFc                   K   ddl m} ddlm}  |       4 d {   }	 t        j                  ||        d {   }d|_         |j                         |_        d |_        |j                          d {    |j                  }|j                  }t        |j                        }t        j                  dd        t        j                  d       t        j                  d| d	|  d
       t        j                  d d       g }g }	ddddddddd}
	 t!        j"                  |j                  |j                  |j                  ||       d {   \  }}|j%                  |       |	j%                  |       t'        |      |
d   d<   t'        |      |
d   d<   d|
d   d<   	 t!        j,                  || ddd       d {   }|j/                  dg       }|j/                  dg       }|j%                  |       |	j%                  |       t'        |      |
d   d<   t'        |      |
d   d<   d|
d   d<   t1               }g }|	D ]v  }|j/                  d      |j/                  d      |j/                  d      t3        |j/                  dd      d      f}||vsU|j5                  |       |j7                  |       x  |j                         j9                         }t        j                  d|         t        j                  d t'        |       d!t'        |              d|_         |j                         |_        |j                          d {    d d d       d {    y 7 S7 77 7 !# t(        $ r}dt+        |       |
d   d<   Y d }~d }~ww xY w7 # t(        $ r}dt+        |       |
d   d<   Y d }~d }~ww xY w7 z# t(        $ rs}t        j=                  d"t+        |       d#       d$_        t+        |      |_         |j                         |_        |j                          d {  7   Y d }~d }~ww xY w7 # 1 d {  7  sw Y   y xY ww)%Nr   r!   r   r   r   r=   u+   🎯 [BACKGROUND UNIFIED SCRAPING] Startingz	Product: r#   r$   r<   r%   r&   r)   r,   r*   r'   r(   r2   r	   r5   Tr   r8   r+   r9   r-   r/   r:   r;   u   ✅ [BACKGROUND DONE] Product z	Results: z, Violations: u(   ❌ Background unified scraping failed: r6   r   )r@   r"   r   r   r   r   rU   scraping_started_atscraping_errorrX   r-   r.   rI   r/   rA   rB   r   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rV   scraping_completed_atrH   )r   r"   r   r1   r0   rZ   r[   r\   r]   r^   r_   r`   ra   rl   rc   rd   re   seendedupedri   keyexecution_times                         rm    _run_scraping_unified_backgroundr    sA    0! " b bba	*<<RLLG '4G#*9(//*;G'%)G"))+")"6"6%,__"!&w{{!3KK$vh(KKEGKK)$7#8zl!LMKK6("&KN-.a9!U./qI"VNQ:H:[:[!(!5!5#OO#; 51 1 "">2%%&78=@=P/	:@CDU@V/=<G/9R(6(J(J)%)"&$&) # $3#6#6y"#E &5&9&9,&K#""#34%%&9:>ABR>S/0;ADEXAY/0>=H/0: 5DG# 	&EE-(EE.)EE%L!%%3Q7	 d?HHSMNN1%	& -X__.88:NKK8EFKK)C$4#5^CL>RS '2G#,;HOO,=G)))+ub b bL (5  Q>GAx<P/9Q
#$  R?HQ=Q/0:R2  	LLCCF8LW[L\&.G#%(VG",;HOO,=G)))+	yb b b bs  Q*M-Q*QOM0;O5M36B#O;M9M6AM9*N'N$A4N'<A"OB8OOOQ*'Q(Q*0O3O6M99	N!NON!!O$N''	O0O
O
OO	QA#Q QQQQQQ*Q'QQ'#Q*z/{id}/run-scrapingc                   K   t        j                  ||        d {   }|j                  st        t        j                  d      |j                  t        |        t        j                  d|         d| ddS 7 `w)Nr   r   )r   z4Unified scraping scheduled (background) for product r   z&Unified scraping started in background)r	   r   r   )	r   r   r	   r   r   r   r  rA   rB   )rE   r   r1   r   r0   s        rm   run_scraping_unifiedr    s      #44R<<G>>33G
 	
 (  
 KKFrdKL  ; # =   A?A=A!A?z/{id}/scraping-statusc                   K   t        j                  ||        d {   }|st        t        j                  d      | |j
                  |j                  xs d|j                  |j                  |j                  dS 7 `w)NzProduct not foundr   idle)r   r-   r	   
started_atcompleted_atrH   )
r   r   r   r	   HTTP_404_NOT_FOUNDr-   r   r  r  r  r   s       rm   get_scraping_statusr&    s      #44R<<G11&
 	
 ,,))3V1155''  =r   )Ctypingr   r   loggingfastapir   r   r   r   r	   r
   r   r   sqlalchemy.ext.asyncior   
sqlalchemyr   app.apir   app.models.productr   app.schemas.productr   r   r   r   r   r   app.schemas.scrapingr   app.services.product_servicer   app.services.scraper_servicer   r   router	getLogger__name__rA   r   boolrn   rP   get_dbget_current_userrN   r   postdictHTTP_201_CREATEDr   r   r   putr   deleter   r   r   HTTP_200_OKr  r  r  r  r  r  r&       rm   <module>r@     s   !  g g g /   &  2 7 7 				8	$zzzzzz zz 	zzz C 34t{{+aAraC(.VW ../TT
T T 	T
 SMT 5T" S63J3JK 4-`ab6pq"45xyt{{+../11%1 1 	1
 1 	1 L1h !2DRXRiRijC-[\t{{+../R

R
R
 kR
j  1CQWQhQhi t{{+../?"?? j?D GD) t{{+../	
7
7
7 	
7 *
7 wt, t{{+../	7	7	7 -	7 GO4 t{{+../			 		 5	 ^,<= 4-KLb6XY"45jkt{{+../### # 	#
 	# >#L ]4V=O=OP4-KLb6XY"45jkt{{+../I
I
I
 I
 		I
 QI
X  6 4-KLb6XY"45uvt{{+../''' ' 	'
 	' 7'T &BTBTU4-KLb6XY"45uvt{{+../P
P
P
 P
 		P
 VP
f "48 t{{+../9
9
9
 99
xfs fP !$7 t{{+../	% 	 8: #D9 t{{+../ :r?  