<?php
// property-intel.php
// MLS9: Live web-powered property intelligence endpoint.
//
// Flow:
// 1) Takes JSON { "address": "..." } from browser.
// 2) Uses SerpAPI (Google engine) to find Zillow/Redfin/Realtor/Movoto/Trulia URLs.
// 3) Uses ScraperAPI to fetch full HTML for each listing page.
// 4) Sends address + page HTMLs to OpenAI GPT.
// 5) GPT extracts/reconciles all property data + analytics and returns a single JSON object.
//
// IMPORTANT: fill in your API keys below.

header('Content-Type: application/json');

// ==== CONFIG: PUT YOUR KEYS HERE ====

// SerpAPI key (for finding listing URLs)
$SERPAPI_KEY    = 'YOUR_SERPAPI_KEY_HERE';

// ScraperAPI key (for fetching HTML from Zillow/Redfin/Realtor/etc.)
$SCRAPERAPI_KEY = 'YOUR_SCRAPERAPI_KEY_HERE';

// OpenAI key (for GPT analysis)
$OPENAI_API_KEY = 'YOUR_OPENAI_API_KEY_HERE';

// ====================================

// Simple helper for HTTP GET with cURL
function http_get($url, $timeout = 25) {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_TIMEOUT        => $timeout,
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_SSL_VERIFYHOST => 2,
        CURLOPT_USERAGENT      => 'MLS9-PropertyIntel/1.0'
    ]);
    $body = curl_exec($ch);
    $err  = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);
    if ($body === false) {
        return ['ok' => false, 'status' => 0, 'error' => $err, 'body' => null];
    }
    return ['ok' => ($code >= 200 && $code < 300), 'status' => $code, 'error' => $err, 'body' => $body];
}

// Read JSON input from client
$raw = file_get_contents('php://input');
$data = json_decode($raw, true);

if (!is_array($data)) {
    http_response_code(400);
    echo json_encode(['error' => 'Invalid JSON payload']);
    exit;
}

$address = trim($data['address'] ?? '');
if ($address === '') {
    http_response_code(400);
    echo json_encode(['error' => 'Missing address']);
    exit;
}

// 1) Use SerpAPI (Google) to find listing URLs on major sites
$query = $address . ' site:zillow.com OR site:redfin.com OR site:realtor.com OR site:movoto.com OR site:trulia.com';

$serpUrl = 'https://serpapi.com/search?' . http_build_query([
    'engine' => 'google',
    'q'      => $query,
    'api_key'=> $SERPAPI_KEY,
    'num'    => 20,
    'hl'     => 'en',
    'gl'     => 'us'
]);

$serpRes = http_get($serpUrl);
if (!$serpRes['ok']) {
    http_response_code(500);
    echo json_encode([
        'error' => 'SerpAPI request failed',
        'status'=> $serpRes['status'],
        'detail'=> $serpRes['error'] ?: $serpRes['body']
    ]);
    exit;
}

$serpJson = json_decode($serpRes['body'], true);
if (!is_array($serpJson)) {
    http_response_code(500);
    echo json_encode(['error' => 'Failed to decode SerpAPI JSON']);
    exit;
}

$organic = $serpJson['organic_results'] ?? [];

// Target domains we care about
$targets = [
    'zillow.com'   => null,
    'redfin.com'   => null,
    'realtor.com'  => null,
    'movoto.com'   => null,
    'trulia.com'   => null,
];

// Pick first matching URL for each domain
foreach ($organic as $item) {
    $link = $item['link'] ?? '';
    if (!$link) continue;

    foreach ($targets as $domain => $current) {
        if ($current === null && stripos($link, $domain) !== false) {
            $targets[$domain] = $link;
        }
    }
}

// 2) Fetch HTML for each found listing using ScraperAPI
$pages = [];
foreach ($targets as $domain => $url) {
    if (!$url) continue;

    // Friendly label
    $sourceName = '';
    if (strpos($domain, 'zillow') !== false)   $sourceName = 'zillow';
    if (strpos($domain, 'redfin') !== false)   $sourceName = 'redfin';
    if (strpos($domain, 'realtor') !== false)  $sourceName = 'realtor';
    if (strpos($domain, 'movoto') !== false)   $sourceName = 'movoto';
    if (strpos($domain, 'trulia') !== false)   $sourceName = 'trulia';

    $scrapeUrl = 'https://api.scraperapi.com/?' . http_build_query([
        'api_key' => $SCRAPERAPI_KEY,
        'url'     => $url,
        'render'  => 'true'
    ]);

    $scrapeRes = http_get($scrapeUrl, 40);
    if (!$scrapeRes['ok']) {
        // Log error-style info into pages list but continue
        $pages[] = [
            'source' => $sourceName ?: $domain,
            'url'    => $url,
            'error'  => 'Failed to fetch HTML: HTTP ' . $scrapeRes['status']
        ];
        continue;
    }

    $html = $scrapeRes['body'] ?? '';

    // Limit size to avoid blowing up token count
    $maxLen = 20000; // chars; adjust as needed
    if (function_exists('mb_substr')) {
        $htmlSnippet = mb_substr($html, 0, $maxLen);
    } else {
        $htmlSnippet = substr($html, 0, $maxLen);
    }

    $pages[] = [
        'source' => $sourceName ?: $domain,
        'url'    => $url,
        'html'   => $htmlSnippet
    ];
}

if (empty($pages)) {
    http_response_code(404);
    echo json_encode(['error' => 'No listing pages found for this address']);
    exit;
}

$systemPrompt = <<<'SYS'
You are MLS9 Live Web Property Intelligence AI.

You will receive:
- A U.S. property address.
- A list of listing pages from Zillow, Redfin, Realtor, Movoto, Trulia, etc. (as HTML snippets with source labels and URLs).

Your tasks:

1) For EACH page, extract as many property data points as possible:
   - beds, baths, half baths
   - interior square feet
   - lot size in sqft
   - year built
   - property type (single family, duplex, condo, etc.)
   - stories / levels
   - parking type and spaces
   - HOA fees (monthly)
   - property taxes (annual)
   - listing price
   - any site-specific value estimate (Zestimate, Redfin Estimate, etc.)
   - last sale date and last sale price
   - days on market
   - MLS number
   - listing status (active, pending, sold, off market)
   - brief features (HVAC, roof type, foundation, etc.)
   - rent estimate if shown

2) Reconcile across all sources to produce a single best-guess data set.
   - When multiple sites agree, confidence should be high.
   - When they disagree, pick the most common / most credible and lower confidence.
   - If you truly cannot infer a field, set its value to null and confidence <= 60.

3) Compute RENT ANALYTICS:
   - Estimate:
     - rent_low
     - rent_high
     - rent_average
   - If there are rent estimates in the listing data, anchor to them.
   - Also estimate section8_rent_estimate based on typical HUD-like fair market rent for that area and bedroom count.
   - Provide a single rent_confidence (1-100).

4) Compute VALUE ANALYTICS:
   - Using list price, site-specific estimates (Zestimate, Redfin, etc.), and any other value data:
     - value_min
     - value_max
     - value_average
     - arv (after-repair value upper bound for typical condition/rehab level in that area)
   - All numeric fields should be plain numbers (no "$" or commas).

5) Compute BASIC INVESTOR METRICS:
   - annual_rent_to_price = (rent_average * 12) / value_average
   - gross_rent_multiplier = value_average / (rent_average * 12)

6) Estimate LOCATION STATS (best-effort, you may use your general knowledge if not in HTML):
   - city name
   - county name
   - city population (approximate)
   - county population (approximate)
   - median household income for city and county (approximate)
   - demographics (fractional share or percentages for white, black, hispanic, asian, other)
   - nearest city with population >= 30,000:
     - name
     - approximate population
     - approximate distance in miles
     - approximate drive time in minutes

7) OUTPUT FORMAT:
Return a single JSON object with this structure exactly (values are examples):

{
  "address": "123 Main St, City, ST 00000",
  "location": {
    "city": "City Name",
    "county": "County Name",
    "state": "ST",
    "city_population": 55000,
    "county_population": 320000,
    "city_median_income": 52000,
    "county_median_income": 60000,
    "city_demographics": {
      "white": 0.65,
      "black": 0.18,
      "hispanic": 0.10,
      "asian": 0.04,
      "other": 0.03
    },
    "county_demographics": {
      "white": 0.60,
      "black": 0.20,
      "hispanic": 0.12,
      "asian": 0.05,
      "other": 0.03
    },
    "nearest_30k_city": {
      "name": "Nearest Big Town",
      "population": 45000,
      "distance_miles": 27,
      "drive_minutes": 32
    }
  },
  "property": {
    "bedrooms":         { "value": 3,     "confidence": 97 },
    "bathrooms":        { "value": 2,     "confidence": 95 },
    "half_bathrooms":   { "value": 1,     "confidence": 85 },
    "square_feet":      { "value": 1450,  "confidence": 93 },
    "lot_size_sqft":    { "value": 7200,  "confidence": 88 },
    "year_built":       { "value": 1955,  "confidence": 96 },
    "property_type":    { "value": "Single family residential", "confidence": 99 },
    "stories":          { "value": 1,     "confidence": 90 },
    "parking":          { "value": "Driveway / Garage", "confidence": 80 },
    "hoa_monthly":      { "value": 0,     "confidence": 99 },
    "taxes_annual":     { "value": 2200,  "confidence": 94 },
    "insurance_annual": { "value": 1500,  "confidence": 80 }
  },
  "values": {
    "list_price":          { "value": 210000, "confidence": 96 },
    "zestimate":           { "value": 215000, "confidence": 92 },
    "redfin_estimate":     { "value": 212000, "confidence": 90 },
    "realtor_estimate":    { "value": 218000, "confidence": 88 },
    "movoto_estimate":     { "value": 214000, "confidence": 87 },
    "last_sale_price":     { "value": 185000, "confidence": 99 },
    "last_sale_date":      { "value": "2019-06-15", "confidence": 95 },
    "value_min":           205000,
    "value_max":           220000,
    "value_average":       213800,
    "arv":                 225000
  },
  "rents": {
    "rent_low":               1400,
    "rent_high":              1700,
    "rent_average":           1550,
    "section8_rent_estimate": 1600,
    "rent_confidence":        88
  },
  "ratios": {
    "annual_rent_to_price": 0.087,
    "gross_rent_multiplier": 11.5
  },
  "source_summary": {
    "zillow_url":  "https://www.zillow.com/...",
    "redfin_url":  "https://www.redfin.com/...",
    "realtor_url": "https://www.realtor.com/...",
    "movoto_url":  "https://www.movoto.com/...",
    "trulia_url":  "https://www.trulia.com/..."
  }
}

Rules:
- All numeric values should be plain numbers (no $ or commas).
- Confidence is 1–100.
- If you cannot infer something, use null and a low confidence. Do NOT invent highly precise values without evidence.
- Return ONLY the JSON object, no extra text.
SYS;

$userPayload = [
    'address' => $address,
    'pages'   => $pages
];

$openaiPayload = [
    'model' => 'gpt-4.1-mini',
    'messages' => [
        [ 'role' => 'system', 'content' => $systemPrompt ],
        [ 'role' => 'user',   'content' => json_encode($userPayload, JSON_PRETTY_PRINT) ]
    ],
    'response_format' => [ 'type' => 'json_object' ],
    'max_tokens'      => 1800
];

$ch = curl_init('https://api.openai.com/v1/chat/completions');
curl_setopt_array($ch, [
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_POST           => true,
    CURLOPT_HTTPHEADER     => [
        'Content-Type: application/json',
        'Authorization: ' . 'Bearer ' . $OPENAI_API_KEY
    ],
    CURLOPT_POSTFIELDS     => json_encode($openaiPayload)
]);

$openaiRes  = curl_exec($ch);
$openaiErr  = curl_error($ch);
$openaiCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

if ($openaiRes === false) {
    http_response_code(500);
    echo json_encode(['error' => 'OpenAI cURL error: ' . $openaiErr]);
    exit;
}

if ($openaiCode < 200 || $openaiCode >= 300) {
    http_response_code($openaiCode);
    echo $openaiRes;
    exit;
}

$openaiJson = json_decode($openaiRes, true);
if (!is_array($openaiJson)) {
    http_response_code(500);
    echo json_encode(['error' => 'Failed to decode OpenAI response JSON']);
    exit;
}

$content = $openaiJson['choices'][0]['message']['content'] ?? '';
$result  = json_decode($content, true);

if (!is_array($result)) {
    http_response_code(500);
    echo json_encode([
        'error'       => 'Failed to parse AI JSON content',
        'raw_content' => $content
    ]);
    exit;
}

echo json_encode($result);
?>