<?PHP
#
#   FILE:  SearchResults.php
#
#   Part of the Metavus digital collections platform
#   Copyright 2015-2023 Edward Almasy and Internet Scout Research Group
#   http://metavus.net
#
#   @scout:phpstan

namespace Metavus;
use Exception;
use InvalidArgumentException;
use ScoutLib\ApplicationFramework;
use ScoutLib\PluginManager;
use ScoutLib\StdLib;


$AF = ApplicationFramework::getInstance();

# request that this page not be indexed by search engines
$AF->addMetaTag(["robots" => "noindex"]);

# retrieve user currently logged in
$User = User::getCurrentUser();

$PluginMgr = PluginManager::getInstance();

/**
*
*   Search Form Protocol
*
*   KEYWORD SEARCH
*   F_SearchString      text to search for
*
*   TEXT SEARCH ("N" is an arbitrary zero-based index)
*   F_SearchCatN        metadata field ID or "KEYWORD" for keyword search
*   F_SearchTextN       text to search for
*
*   SEARCH LIMITS ("N" is the metadtata field ID)
*   F_SearchLimitN      one or more values as defined below
*
*   SEARCH LIMIT VALUES (metadata field type and how it will be compared)
*   Controlled Name     controlled name IDs (all or any, depending on setting)
*   Flag                1 for TRUE and 0 for FALSE (equal to)
*   Number              minimum value for field (greater than or equal to)
*   Option              controlled name IDs (all or any, depending on setting)
*   User                user IDs (all or any, depending on setting)
*
*   PRESENTATION (corresponding GET variable names follow the descriptions)
*   F_ResultsPerPage    number of search results to display per page
*   F_ReverseSortOrder  1 to sort in reverse of default order for field (RS)
*   F_SavedSearchId     ID of saved search to run (ID)
*   F_SortField         metadata field ID or "R" for sort by relevance (SF)
*   F_StartingIndex     zero-based index into search results (SI)
*
*   NOTES
*   - Search parameters will be assembled into single parameter set with "AND"
*           logic at the top level.
*   - Fields that are not viewable by the current user will be omitted from
*           search parameters.
*   - If a saved search ID is specified, search parameters will be ignored.
*   - "Refine Search" link will include search parameters as generated by
*           SearchParameterSet::UrlParameterString()
*
*/

# ----- ACCESS CONTROL -------------------------------------------------------

# assume load is normal
$H_HighLoad = false;

# check for access by bots
if ($PluginMgr->pluginReady("BotDetector") &&
    $PluginMgr->getPlugin("BotDetector")->CheckForBot()) {
    $AF->doNotCacheCurrentPage();
    $H_IsBot = true;
    return;
} else {
    $H_IsBot = false;
}

# for anon users
if (!$User->isLoggedIn()) {
    # check system load
    $LoadAverage = sys_getloadavg();
    $SysCfg = SystemConfiguration::getInstance();
    $LoadCutoff = $SysCfg->getInt("AnonSearchCpuLoadCutoff");
    if (is_array($LoadAverage) && ($LoadAverage[0] > $LoadCutoff)) {
        $AF->doNotCacheCurrentPage();
        header($_SERVER["SERVER_PROTOCOL"]." 429 Too Many Requests");
        $H_HighLoad = true;
        return;
    }
}

# ----- CONFIGURATION  -------------------------------------------------------

# possible types of metadata fields for search limits
$GLOBALS["G_MDFTypesForLimits"] =
        MetadataSchema::MDFTYPE_OPTION |
        MetadataSchema::MDFTYPE_USER |
        MetadataSchema::MDFTYPE_FLAG |
        MetadataSchema::MDFTYPE_NUMBER |
        MetadataSchema::MDFTYPE_TREE;

# default sort field
$DefaultSortFieldId = "R";

# default active tab
$H_ActiveTab = MetadataSchema::SCHEMAID_DEFAULT;

# default presentation parameters
$IntConfig = InterfaceConfiguration::getInstance();
$H_DefaultResultsPerPage = $IntConfig->getInt("DefaultRecordsPerPage");
$H_DefaultStartingIndex = 0;

# if a user is logged in and has a RecordsPerPage setting, that should
# override the system default
if ($User->isLoggedIn() &&
    !is_null($User->get("RecordsPerPage"))) {
    $H_DefaultResultsPerPage = $User->get("RecordsPerPage");
}

# ----- EXPORTED FUNCTIONS ---------------------------------------------------

/**
 * Get possible values for sorting field list.
 * @param int $SchemaId Schema to retrieve fields from.
 * @return array Array of field names, with field IDs for the index.
 */
function GetPossibleSortFields($SchemaId)
{
    $AF = ApplicationFramework::getInstance();

    # log usage warning since a newer function should be used
    $AF->logMessage(
        ApplicationFramework::LOGLVL_WARNING,
        "Call to deprecated function ".__FUNCTION__. " at ".StdLib::getMyCaller()
    );

    # retrieve and return fields that are of types that could be used for sort
    return (new MetadataSchema($SchemaId))->getSortFields();
}


# ----- LOCAL FUNCTIONS ------------------------------------------------------

/**
 * Filter input strings (e.g., from _GET or _POST) to remove embedded
 * <script> tags, including those that are multiply urlencoded().
 * @param array $Value Value to filter.
 * @return array Filtered value.
 * @see filterInputValuesRecursive()
 */
function filterInputValues(array $Value): array
{
    return array_map(function ($ChildValue) {
        return filterInputValuesRecursive($ChildValue);
    }, $Value);
}

/**
 * Recursive version of filterInputValues(), working on string and arrays.
 * @param string|array $Value Value to filter.
 * @return string|array Filtered value.
 * @see filterInputValues()
 */
function filterInputValuesRecursive($Value)
{
    if (is_array($Value)) {
        # iterate through all array elements, filtering each
        $Result = [];
        foreach ($Value as $Key => $ChildValue) {
            $Result[$Key] = filterInputValuesRecursive($ChildValue);
        }
    } else {
        $Result = $Value;

        # if the string contains multiple levels of urlencoding(),
        # strip all of them off (determined by looking for
        # urlencoded versions of '%' and '<'
        while (stripos($Result, '%25') !== false ||
               stripos($Result, '%3C') !== false) {
            $Result = urldecode($Result);
        }

        # strip out any script tags
        $Result = preg_replace(
            "%<script[^>]*>.*?</script>%",
            "",
            $Result
        ) ?? $Result;
    }

    return $Result;
}

/**
 * Retrieve search parameters from form values.
 * @param array $FormVars Form variable array (usually $_POST).
 * @return SearchParameterSet Parameters in SearchParameterSet object.
 */
function getSearchParametersFromForm($FormVars)
{
    # retrieve user currently logged in
    $User = User::getCurrentUser();

    # start with empty set
    $Params = new SearchParameterSet();

    # if there is a keyword ("quick") search value
    if (isset($FormVars["F_SearchString"])) {
        # if there was a search string supplied
        if (strlen(trim($FormVars["F_SearchString"]))) {
            # add keyword string to search criteria
            $Params->addParameter($FormVars["F_SearchString"]);
        }
    }

    # while there are search text fields left to examine
    $FormFieldIndex = 0;

    # track which fields were selected
    $SearchSelections = [];

    while (isset($FormVars["F_SearchCat".$FormFieldIndex])
            && (strlen($FormVars["F_SearchCat".$FormFieldIndex]) > 0)) {
        # retrieve metadata field type for box
        $FieldKey = $FormVars["F_SearchCat".$FormFieldIndex];

        $SearchSelections["F_SearchCat".$FormFieldIndex] = $FieldKey;

        # if value is available for box
        if (isset($FormVars["F_SearchText".$FormFieldIndex])
                && (strlen($FormVars["F_SearchText".$FormFieldIndex]))) {
            # retrieve box value
            $Value = $FormVars["F_SearchText".$FormFieldIndex];

            # if this is a keyword search field
            if (strtoupper($FieldKey) == "KEYWORD") {
                # add keyword search for value
                $Params->addParameter($Value);
            } else {
                $FieldIds = explode("-", $FieldKey);

                if (count($FieldIds) == 1) {
                    $Params->addParameter($Value, intval($FieldIds[0]));
                } else {
                    $Subgroup = new SearchParameterSet();
                    $Subgroup->logic("OR");

                    foreach ($FieldIds as $FieldId) {
                        $Subgroup->addParameter($Value, intval($FieldId));
                    }
                    $Params->addSet($Subgroup);
                }
            }
        }

        # save search selections if user was logged in
        if ($User->isLoggedIn()) {
            $User->set(
                "SearchSelections",
                serialize($SearchSelections)
            );
        }

        # move to next search box
        $FormFieldIndex++;
    }

    # for each possible limit field
    foreach (MetadataSchema::getAllSchemas() as $SchemaId => $Schema) {
        $Subgroups = [];
        $Fields = $Schema->getFields($GLOBALS["G_MDFTypesForLimits"]);
        foreach ($Fields as $FieldId => $Field) {
            $FieldType = $Field->type();
            $FieldName = $Field->Name();

            # if value is available for this field
            if (isset($FormVars["F_SearchLimit".$FieldId])) {
                # retrieve value and convert to an array if necessary
                $Values = $FormVars["F_SearchLimit".$FieldId];
                if (!is_array($Values)) {
                    $Values = [$Values];
                }

                # handle value based on field type
                switch ($Field->type()) {
                    case MetadataSchema::MDFTYPE_FLAG:
                        # add flag value to set for this field (if meaningful)
                        if ($Values[0] >= 0) {
                            if (!isset($Subgroups[$FieldId])) {
                                $Subgroups[$FieldId] = new SearchParameterSet();
                            }
                            $Subgroups[$FieldId]->addParameter("=".$Values[0], $Field);
                        }
                        break;

                    case MetadataSchema::MDFTYPE_NUMBER:
                        # add numeric value to set for this field (if meaningful)
                        if ($Values[0] >= 0) {
                            if (!isset($Subgroups[$FieldId])) {
                                $Subgroups[$FieldId] = new SearchParameterSet();
                            }
                            $Subgroups[$FieldId]->addParameter(">=".$Values[0], $Field);
                        }
                        break;

                    default:
                        # retrieve possible values for field
                        # valid values for user fields are those that are in use
                        $PossibleValues = ($Field->type() ==
                                MetadataSchema::MDFTYPE_USER) ?
                                    $Field->getValuesInUse() :
                                    $Field->getPossibleValues();

                        # for each value selected
                        $ValuesToAdd = [];
                        foreach ($Values as $TermId) {
                            # if value is a possible value for this field
                            if (isset($PossibleValues[$TermId])) {
                                # include value to be added to set
                                if ($Field->type() == MetadataSchema::MDFTYPE_TREE) {
                                    $ValuesToAdd[] = "^".$TermId;
                                } else {
                                    $ValuesToAdd[] = "=".$TermId;
                                }
                            }
                        }

                        # if there were valid values found
                        if (count($ValuesToAdd)) {
                            # add values to set for this field
                            if (!isset($Subgroups[$FieldId])) {
                                $Subgroups[$FieldId] = new SearchParameterSet();
                            }
                            $Subgroups[$FieldId]->addParameter($ValuesToAdd, $Field);
                        }
                        break;
                }
            }
        }

        # if there were limit search parameters found
        if (count($Subgroups)) {
            # for each field with limit search parameters
            foreach ($Subgroups as $FieldId => $Subgroup) {
                # set search logic for field subgroup
                $Subgroup->logic($Fields[$FieldId]->searchGroupLogic());

                # add field subgroup to search parameters
                $Params->addSet($Subgroup);
            }
        }
    }

    # return search parameters to caller
    return $Params;
}

/**
 * Get the sort order (ascending or descending) appropriate to the specified
 * type of metadata field.
 * @param string|int $FieldId ID of metadata field.
 * @param bool $ReverseOrder TRUE to reverse normal order.
 * @return bool TRUE to sort descending, or FALSE to sort ascending.
 */
function getFieldSortOrder($FieldId, $ReverseOrder)
{
    # if sorting on the pseudo-field "Relevance"
    if ($FieldId == "R") {
        # sort order is descending
        $SortDescending = true;
    } elseif (MetadataSchema::fieldExistsInAnySchema($FieldId)) {
        $Field = MetadataField::getField((int)$FieldId);

        # determine sort order based on field type
        switch ($Field->type()) {
            case MetadataSchema::MDFTYPE_DATE:
            case MetadataSchema::MDFTYPE_TIMESTAMP:
                $SortDescending = true;
                break;

            default:
                $SortDescending = false;
                break;
        }
    } else {
        # assume sort order is ascending
        $SortDescending = false;
    }

    # reverse sort order if requested
    if ($ReverseOrder) {
        $SortDescending = $SortDescending ? false : true;
    }

    # return order to caller
    return $SortDescending;
}

/**
 * Get search results tab to make active, based on whether the user
 * had previously selected a tab or which type of item in the search
 * results had the most items.
 * @param array $SearchResults Nested arrays of search result data, with
 *      the outer index being the item type.
 * @return int Tab to make active, in the form of an item type.
 */
function getActiveTab(array $SearchResults): int
{
    if (!count($SearchResults)) {
        return MetadataSchema::SCHEMAID_DEFAULT;
    }

    # determine default active tab based on number of results, using score
    #       totals in cases where number of results are identical
    foreach ($SearchResults as $ResultType => $ResultScores) {
        if (!isset($HighestCount)
                || !isset($ActiveTab)
                || (count($ResultScores) > $HighestCount)) {
            $HighestCount = count($ResultScores);
            $ActiveTab = $ResultType;
        } elseif (count($ResultScores) == $HighestCount) {
            if (array_sum($ResultScores)
                    > array_sum($SearchResults[$ActiveTab])) {
                $ActiveTab = $ResultType;
            }
        }
    }

    # if the user has selected a tab
    if (isset($_COOKIE["SearchResults_TabNo"])) {
        # if we have results for that schema, use those
        # otherwise, clear the selection
        if (isset($SearchResults[$_COOKIE["SearchResults_TabNo"]])) {
            $ActiveTab = $_COOKIE["SearchResults_TabNo"];
        } else {
            unset($_COOKIE["SearchResults_TabNo"]);
            setcookie("SearchResults_TabNo", "", time() - 3600);
        }
    }

    return $ActiveTab;
}


# ----- MAIN: LOAD PARAMETERS ------------------------------------------------

# filter out stuff that looks like XSS attempts
$_GET = filterInputValues($_GET);
$_POST = filterInputValues($_POST);

# load search parameters from form values
$H_SearchParams = getSearchParametersFromForm($_POST);

$SearchParametersGottenFromForm = ($H_SearchParams->parameterCount() > 0);

# if the user is coming from UserLogin, ignore RP setting to avoid overwriting
# the setting from the user's preferences
if ($User->isLoggedIn() && $User->lastLocation() == "UserLogin"
    && isset($_GET["RP"])) {
    unset($_GET["RP"]);
}

# retrieve results presentation parameters
$H_ResultsPerPage = $SearchParametersGottenFromForm ?
        StdLib::getArrayValue($_POST, "F_RecordsPerPage", $H_DefaultResultsPerPage) :
        StdLib::getArrayValue($_GET, "RP", $H_DefaultResultsPerPage);

$LegacySortFields = (isset($_GET["SF"]) && is_array($_GET["SF"])) ? $_GET["SF"] : null;
$H_SortDescending = [];
$H_TransportUIs = [];
foreach (MetadataSchema::getAllSchemaIds() as $SchemaId) {
    $Schema = new MetadataSchema($SchemaId);
    $TransportUI = new TransportControlsUI($SchemaId);

    $TransportUI->itemCount(0);
    $TransportUI->itemsPerPage($H_ResultsPerPage);
    $TransportUI->itemTypeName($Schema->resourceName());
    $SchemaDefaultSortField = $Schema->defaultSortField();
    # use "R" for nonexistent default sort field
    if ($SchemaDefaultSortField === false) {
        $SchemaDefaultSortField = $DefaultSortFieldId;
    }
    $TransportUI->defaultSortField((string) $SchemaDefaultSortField);
    if ($SearchParametersGottenFromForm && isset($_POST["F_SortField"])) {
        $TransportUI->sortField($_POST["F_SortField"]);
    }

    # determine sort direction
    $H_SortDescending[$SchemaId] = getFieldSortOrder(
        $TransportUI->sortField(),
        $TransportUI->reverseSortFlag()
    );

    $H_TransportUIs[$SchemaId] = $TransportUI;
}

# add any search parameters from URL
if (SearchParameterSet::isLegacyUrl($_SERVER["REQUEST_URI"])) {
    $H_SearchParams->setFromLegacyUrl($_SERVER["REQUEST_URI"]);
} else {
    # (trap exceptions in case $_GET includes illegal search parameter values)
    try {
        $SearchParamsBackupCopy = $H_SearchParams;
        $H_SearchParams->urlParameters($_GET);
    } catch (InvalidArgumentException $Exception) {
        $H_SearchParams = $SearchParamsBackupCopy;
    }
}


# ----- MAIN: PERFORM ACTIONS ------------------------------------------------

# if user requested to save search
if (isset($_POST["Submit"]) && ($_POST["Submit"] == "Save")) {
    # if we're editing an existing search
    if (isset($_POST["F_SavedSearchId"])) {
        # pull the search ID out of _POST
        $SearchId = intval($_POST["F_SavedSearchId"]);

        # check that the search exists
        $SSFactory = new SavedSearchFactory();
        if ($SSFactory->itemExists($SearchId)) {
            $SavedSearch = new SavedSearch($SearchId);

            # and if the current user owns the search, save their changes
            if ($SavedSearch->userId() == $User->id()) {
                $SavedSearch->searchName(trim($_POST["F_SearchName"]));
                $SavedSearch->searchParameters($H_SearchParams);

                $AF->setJumpToPage("ListSavedSearches");
                return;
            }
        }
    } else {
        # if search parameters exist
        if (strlen($H_SearchParams->textDescription())) {
            # jump to new saved search page
            $AF->setJumpToPage("NewSavedSearch&"
                .$H_SearchParams->urlParameterString());
        } else {
            # else jump to advanced search page with error no parameters
            $AF->setJumpToPage("AdvancedSearch&Err=E_NOPARAMS");
        }
        return;
    }
}

# if saved search specified and user is logged in
if (isset($_GET["ID"]) && $User->isLoggedIn()) {
    # if specified saved search exists
    $SSFactory = new SavedSearchFactory();
    if ($SSFactory->itemExists($_GET["ID"])) {
        # if search is owned by current user
        $SavedSearch = new SavedSearch($_GET["ID"]);
        if ($SavedSearch->userId() == $User->id()) {
            # load saved search parameters
            $H_SearchParams = $SavedSearch->searchParameters();
        } else {
            # load empty search parameter set and results
            $H_SearchParams = new SearchParameterSet();
            $H_SearchResults = [
                MetadataSchema::SCHEMAID_DEFAULT => [],
            ];
        }
    }
} else {
    # if some search parameters came from form values
    if ($SearchParametersGottenFromForm) {
        # construct new URL with all parameters
        $TransportUI = reset($H_TransportUIs);
        if ($TransportUI === false) {
            throw new Exception("Transport Controls UI is not available");
        }

        $NewPageParameters = "SearchResults&"
                .$H_SearchParams->urlParameterString()
                .$TransportUI->urlParameterString(false);

        # add on parameters as needed
        if ($H_ResultsPerPage != $H_DefaultResultsPerPage) {
            $NewPageParameters .= "&RP=".$H_ResultsPerPage;
        }

        # reload with new URL containing all parameters
        $AF->setJumpToPage($NewPageParameters);
        return;
    } elseif (!is_null($LegacySortFields)) {
        foreach ($H_TransportUIs as $SchemaId => $TransportUI) {
            $Schema = new MetadataSchema((int) $SchemaId);
            foreach ($LegacySortFields as $SortField) {
                if ($Schema->fieldExists($SortField)) {
                    $TransportUI->sortField($SortField);
                    break;
                }
            }
        }
    }
}

# if we have search parameters
if ($H_SearchParams->parameterCount()) {
    # retrieve sort fields for search (convert Relevance to NULL for search engine)
    $SortFields = [];
    foreach ($H_TransportUIs as $SchemaId => $TransportUI) {
        $SortField = $TransportUI->sortField();
        if ($SortField == "R") {
            $SortFields[$SchemaId] = null;
        } else {
            $FieldExists = (new MetadataSchema((int) $SchemaId))->fieldExists($SortField);
            $SortFields[$SchemaId] = $FieldExists ? $SortField : null;
            # if field doesn't exist, update TCUI's sort field
            if (!$FieldExists) {
                $TransportUI->sortField("R");
            }
        }
    }

    # run search
    $SEngine = new SearchEngine();
    $H_SearchParams->sortBy($SortFields);
    $H_SearchParams->sortDescending($H_SortDescending);
    $H_SearchResults = $SEngine->searchAll($H_SearchParams);
    $H_SearchTime = $SEngine->searchTime();

    $CacheExpirationTimestamp = false;

    # filter out item types not to be displayed in search results in this interface
    $IntCfg = InterfaceConfiguration::getInstance();
    $ItemTypesToDisplay = $IntCfg->getArray("ItemTypesToDisplayInSearchResults");
    foreach ($H_SearchResults as $SchemaId => $SchemaResults) {
        if (!in_array($SchemaId, $ItemTypesToDisplay)) {
            unset($H_SearchResults[$SchemaId]);
        }
    }

    # filter out any temporary or unviewable records
    foreach ($H_SearchResults as $SchemaId => $SchemaResults) {
        $RFactory = new RecordFactory($SchemaId);
        $ViewableResourceIds = $RFactory->filterOutUnviewableRecords(
            array_keys($SchemaResults),
            $User
        );

        if (!$User->isLoggedIn()) {
            $SchemaCacheExpirationDate = $RFactory->getViewCacheExpirationDate(
                array_keys($SchemaResults),
                $User
            );

            if ($SchemaCacheExpirationDate !== false) {
                $SchemaCacheExpirationTimestamp = strtotime($SchemaCacheExpirationDate);

                if ($CacheExpirationTimestamp === false ||
                    $SchemaCacheExpirationTimestamp < $CacheExpirationTimestamp) {
                    $CacheExpirationTimestamp = $SchemaCacheExpirationTimestamp;
                }
            }
        }

        $FlippedViewableResourceIds = array_flip($ViewableResourceIds);
        $TempSearchResults = [];
        foreach ($SchemaResults as $Id => $Score) {
            if (($Id >= 0) && isset($FlippedViewableResourceIds[$Id])) {
                $TempSearchResults[$Id] = $Score;
            }
        }

        if (count($TempSearchResults)) {
            $H_SearchResults[$SchemaId] = $TempSearchResults;
            $AF->addPageCacheTag(
                "ResourceList".$SchemaId
            );
        } else {
            unset($H_SearchResults[$SchemaId]);
        }
    }

    $PageExpirationDate = $AF->expirationDateForCurrentPage();
    if ($CacheExpirationTimestamp !== false
            && ($PageExpirationDate === false
                    || $CacheExpirationTimestamp < strtotime($PageExpirationDate))) {
        $AF->expirationDateForCurrentPage(
            date(StdLib::SQL_DATE_FORMAT, $CacheExpirationTimestamp)
        );
    }

    # set up list of checksums
    $H_ListChecksums = [];
    # inform transport control UI of search results
    foreach ($H_TransportUIs as $SchemaId => $TransportUI) {
        if (isset($H_SearchResults[$SchemaId])) {
            $H_TransportUIs[$SchemaId]->itemCount(count($H_SearchResults[$SchemaId]));
            # add checksum to list
            $ChecksumIndex = "CK".$SchemaId;
            $H_ListChecksums[$ChecksumIndex] = md5(serialize($H_SearchResults[$SchemaId]));
            if ($H_ListChecksums[$ChecksumIndex] != StdLib::getFormValue($ChecksumIndex)) {
                $H_TransportUIs[$SchemaId]->startingIndex(0);
            }
        }
    }

    $SignalResult = $AF->signalEvent(
        "EVENT_SEARCH_COMPLETE",
        [
            "SearchParameters" => $H_SearchParams,
            "SearchResults" => $H_SearchResults
        ]
    );

    # if we had no results at all for any schema
    if (count($H_SearchResults) == 0) {
        # dummy up empty result set so results pages is not completely empty
        $H_SearchResults[MetadataSchema::SCHEMAID_DEFAULT] = [];
    } else {
        $H_ActiveTab = getActiveTab($H_SearchResults);
    }
} else {
    # load empty search parameter set and results
    $H_SearchParams = new SearchParameterSet();
    $H_SearchResults = [
        MetadataSchema::SCHEMAID_DEFAULT => [],
    ];
    $H_ListChecksums["CK".MetadataSchema::SCHEMAID_DEFAULT] = md5(serialize([]));

    $H_SearchTime = 0;
}
