/*
 * Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */



#pragma once

// std
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <ctime>
#include <limits>
#include <string>
#include <vector>
// 3rd party
#include "nlohmann/json.hpp"
// sharp_am
#include "telemetry_config.h"

class Fabric;

/**
 * Telemetry's metrics
 *
 * NOTE:
 *   fields names are by spec, so DON'T change public fields' name
 */
struct Metrics
{
    struct Metadata
    {
        explicit Metadata(std::string host) : host{std::move(host)} {}

        std::string host;
        std::time_t timestamp{};
    };

    struct Histroy
    {
        std::time_t starting_timestamp{std::chrono::system_clock::to_time_t(std::chrono::system_clock::now())};
        int64_t denied_reservations{};                   // not likely, due to duplicated guids
        int64_t denied_jobs_by_reservations{};           // guids dont match the reservation
        int64_t denied_jobs_by_resource_limit{};         // Reservation resource limit
        int64_t denied_jobs_by_no_resources{};           // No available route to serve the job
        int64_t jobs_ended_due_to_client_failure{};      // Keepalive detected no client'
        int64_t jobs_ended_due_to_fatal_sharp_error{};   // For example, link failure.
        int64_t jobs_ended_successfully{};
    };

    //! ctor. we need to know the histograms buckets, so we get it in config
    explicit Metrics(MetricsConfig config, std::string host);

    //! reset metrics' counters.
    //! zero out all metrics counters, but keep the bucket configuration & metadata intact.
    void ResetCounters();

    Metadata metadata;
    int64_t active_jobs{};
    int64_t active_sat_jobs{};
    int64_t agg_nodes_in_invalid_state{};               // Reported in sharp status API
    IntegralHistogram active_jobs_num_hcas_histogram;   // default buckets: 8,16,32,64,128,256,512,1024,inifinity
    IntegralHistogram trees_level_histogram;            // default buckets: 0,1,2,3
    Histroy history;
    HoursHistogram history_ended_jobs_duration_in_hours_histogram{};   // default buckets: 1,12,24,72,168
};

class Telemetry
{
   public:
    Telemetry(std::string end_point, std::chrono::seconds publish_interval);

    //! publish telemetry values to clients (currently done by saving a JSON to a file)
    void Publish(const Metrics& metrics);

    //! return true if we need to publish telemetry data
    bool IsEnabled() const { return (not m_end_point.empty()) and (m_publish_interval > std::chrono::seconds(0)); }

    //! interval to publish telemetry data
    std::chrono::seconds GetPublishInterval() const { return m_publish_interval; }

    //! gatehr live metrics (current time, invalid agg nodes, etc.) + accumulated metrics and publish
    void GatherAndPublish(const Fabric& fabric);

   private:
    std::string m_end_point;
    std::chrono::seconds m_publish_interval{};
};

// this is commented out, as we currently serialize the Histogram class in a non standard way.
// when we will use the standard way, we need to fix:
// 1. in telementry.cpp, define 'using json = nlohmann::json;' as we won't need ordered json
// anymore.
// 2. in telemetry.h, remove to_json/from_json function declerations below.
// 3. delete 'telemetry_json.cpp' file, and uncomment the code here:
//
// NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Metrics::Metadata, host, timestamp);
// NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Metrics::Histroy,
//                                    denied_reservations,
//                                    denied_jobs_by_reservations,
//                                    denied_jobs_by_resource_limit,
//                                    denied_jobs_by_no_resources,
//                                    jobs_ended_due_to_client_failure,
//                                    jobs_ended_due_to_fatal_sharp_error,
//                                    jobs_ended_successfully);
// NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Metrics,
//                                    metadata,
//                                    active_jobs,
//                                    active_sat_jobs,
//                                    active_jobs_num_hcas_histogram,
//                                    agg_nodes_in_invalid_state,
//                                    active_jobs_num_hcas_histogram,
//                                    trees_level_histogram,
//                                    history,
//                                    history_ended_jobs_duration_in_hours_histogram);
// NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Telemetry, metrics);

// jsonize Telemetry
void to_json(nlohmann::ordered_json& j, const Metrics& h);
void from_json(const nlohmann::ordered_json& j, Metrics& h);
