/*
 *  $Id: gxsm4.c 29026 2025-12-19 14:03:38Z yeti-dn $
 *  Copyright (C) 2025 David Necas (Yeti).
 *  E-mail: yeti@gwyddion.net
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
 * [FILE-MAGIC-USERGUIDE]
 * GXSM4 HDF5
 * .nc
 * Read
 **/

/**
 * [FILE-MAGIC-MISSING]
 * Avoding clash with a standard file format.
 **/

#include "config.h"
#include <glib/gi18n-lib.h>
#include <gwy.h>

#include "gwyhdf5.h"
#include "hdf5file.h"

#define Angstrom 1e-10

typedef enum {
    GXSM4_NONE       = 0,
    GXSM4_IMAGE      = 2,
    GXSM4_TIMESERIES = 3,
} GXSM4DataKind;

typedef struct {
    const gchar *filename;
    GwyFile *file;
    GwyContainer *meta;
    const gchar *dataset_name;
    GXSM4DataKind datakind;
} GXSM4File;

static gint     detect_file           (const GwyFileDetectInfo *fileinfo,
                                       gboolean only_name);
static GwyFile* load_file             (const gchar *filename,
                                       GwyRunModeFlags mode,
                                       GError **error);
static gboolean find_main_dataset     (GXSM4File *g4file,
                                       hid_t file_id,
                                       GError **error);
static gboolean read_image            (GXSM4File *g4file,
                                       hid_t file_id,
                                       GError **error);
static gboolean read_timeseries       (GXSM4File *g4file,
                                       hid_t file_id,
                                       GError **error);
static void     set_brick_zcalibration(GwyBrick *brick,
                                       hid_t dscale,
                                       gint zlen);
static void     add_more_meta         (hid_t file_id,
                                       GwyContainer *meta);

void
gwyhdf5_register_gxsm4(void)
{
    gwy_file_func_register("gxsm4",
                           N_("GXSM4 HDF5 files (.nc)"),
                           detect_file,
                           load_file,
                           NULL,
                           NULL);
}

static gint
detect_file(const GwyFileDetectInfo *fileinfo, gboolean only_name)
{
    static const gchar *attributes[] = {
        "Creator", "DataIOVer", "HardwareConnectionDev", "HardwareCtrlType", "InstrumentName", "InstrumentType",
    };
    static const gchar *datasets[] = {
        "dimx", "dimy", "dx", "dy", "dz", "time", "value", "rangex", "rangey", "offsetx", "offsety",
    };
    hid_t file_id;
    gint antiscore = 100;

    if ((file_id = gwyhdf5_quick_check(fileinfo, only_name)) < 0)
        return 0;

    for (guint i = 0; i < G_N_ELEMENTS(attributes); i++) {
        if (H5Aexists(file_id, attributes[i])) {
            gwy_debug("found attr %s", attributes[i]);
            antiscore = 3*antiscore/4;
        }
    }
    for (guint i = 0; i < G_N_ELEMENTS(datasets) && antiscore > 0; i++) {
        if (H5Lexists(file_id, datasets[i], H5P_DEFAULT)) {
            gwy_debug("found dataset %s", datasets[i]);
            antiscore = 3*antiscore/4;
        }
    }

    H5Fclose(file_id);

    return 100 - antiscore;
}

static GwyFile*
load_file(const gchar *filename,
          G_GNUC_UNUSED GwyRunModeFlags mode,
          GError **error)
{
    hid_t file_id;

    if ((file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT)) < 0) {
        err_HDF5(error, "H5Fopen", file_id);
        return NULL;
    }
    gwy_debug("file_id %d", (gint)file_id);

    H5O_info_t infobuf;
    herr_t status = H5Oget_info(file_id, &infobuf, H5O_INFO_BASIC);
    if (!gwyhdf5_check_status(status, file_id, NULL, "H5Oget_info", error))
        return NULL;

    GwyHDF5File ghfile;
    GXSM4File g4file;
    GwyFile *file = NULL;

    gwy_clear(&g4file, 1);
    g4file.filename = filename;
    gwyhdf5_init(&ghfile, file_id, infobuf.token);
    ghfile.impl = &g4file;

    status = H5Aiterate2(file_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, gwyhdf5_process_attribute, &ghfile);
    if (!gwyhdf5_check_status(status, file_id, &ghfile, "H5Aiterate2", error))
        return NULL;

    g4file.meta = gwyhdf5_meta_slash_to_4dots(ghfile.meta);
    gwy_container_remove_by_name(g4file.meta, "_NCProperties");
    add_more_meta(file_id, g4file.meta);

    if (!find_main_dataset(&g4file, file_id, error))
        goto fail;

    gwy_debug("data kind %d", g4file.datakind);
    g4file.file = file = gwy_file_new_in_construction();
    if (g4file.datakind == GXSM4_IMAGE) {
        if (!read_image(&g4file, file_id, error))
            g_clear_object(&file);
    }
    else if (g4file.datakind == GXSM4_TIMESERIES) {
        if (!read_timeseries(&g4file, file_id, error))
            g_clear_object(&file);
    }
    else {
        g_assert_not_reached();
    }

fail:
    status = gwyhdf5_fclose(file_id);
    gwy_debug("status %d", status);
    g_clear_object(&g4file.meta);
    gwyhdf5_free(&ghfile);

    return file;
}

static gdouble
read_step(hid_t file_id, const gchar *name)
{
    gdouble step = 1.0;
    hid_t dataset;

    if ((dataset = gwyhdf5_open_and_check_dataset(file_id, name, 0, NULL, NULL)) < 0) {
        g_warning("Cannot find %s.", name);
    }
    else {
        if (H5Dread(dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &step) < 0) {
            g_warning("Cannot read %s.", name);
            step = 1.0;
        }
        else
            sanitise_real_size(&step, name);
        H5Dclose(dataset);
    }

    return step;
}

static gboolean
find_main_dataset(GXSM4File *g4file, hid_t file_id, GError **error)
{
    const gchar *simple_datasets[] = {
        "H", "FloatField", "DoubleField", "ByteField", "Intensity",
    };
    // TODO: There are also ComplexDoubleField and RGBA_ByteField. Implement them when we see some examples.
    // Do they create a complex type using H5Tcomplex_create() or just pack a pair of values there?

    for (guint i = 0; i < G_N_ELEMENTS(simple_datasets); i++) {
        const gchar *name = simple_datasets[i];
        // Currently we do not know what to do with non-1 "value" dimension. Require 1.
        gint dims[4] = { -1, 1, -1, -1 };
        hid_t dataset;

        if ((dataset = gwyhdf5_open_and_check_dataset(file_id, name, G_N_ELEMENTS(dims), dims, NULL)) >= 0) {
            gwy_debug("found dataset %s (%d x %dx%d)", name, dims[0], dims[3], dims[2]);
            H5Dclose(dataset);
            g4file->dataset_name = name;
            if (dims[0] == 1) {
                g4file->datakind = GXSM4_IMAGE;
                return TRUE;
            }
            if (dims[0] > 1) {
                g4file->datakind = GXSM4_TIMESERIES;
                return TRUE;
            }
            err_NO_DATA(error);
            return FALSE;
        }
    }

    err_NO_DATA(error);
    return FALSE;
}

static void
fix_grad(gchar *s, gdouble *q)
{
    if (gwy_strequal(s, "Grad")) {
        strcpy(s, "deg");
        /* 100 grad is only 90 deg */
        *q *= 90.0/100.0;
    }
}

static gboolean
read_image(GXSM4File *g4file, hid_t file_id, GError **error)
{
    GwyField *field = NULL;
    gboolean ok = FALSE;

    hid_t dataset;
    gint dims[4] = { 1, 1, -1, -1 };
    if ((dataset = gwyhdf5_open_and_check_dataset(file_id, g4file->dataset_name, G_N_ELEMENTS(dims), dims, error)) < 0)
        return FALSE;

    gwy_debug("reading dataset %s (%dx%d)", g4file->dataset_name, dims[3], dims[2]);
    if (err_DIMENSION(error, dims[2]) || err_DIMENSION(error, dims[3]))
        goto fail;

    gdouble dx = read_step(file_id, "dx") * Angstrom;
    gdouble dy = read_step(file_id, "dy") * Angstrom;
    gdouble dz = read_step(file_id, "dz");

    field = gwy_field_new(dims[3], dims[2], dims[3]*dx, dims[2]*dy, FALSE);
    gint status = H5Dread(dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, gwy_field_get_data(field));
    if (status < 0)
        goto fail;

    gwy_unit_set_from_string(gwy_field_get_unit_xy(field), "m");
    gchar *s;
    if (gwyhdf5_get_str_attr_g(file_id, "/rangez", "unit", &s, NULL)) {
        gint power10;

        gwy_debug("found zunit %s", s);
        fix_grad(s, &dz);
        power10 = gwy_unit_set_from_string(gwy_field_get_unit_z(field), s);
        g_free(s);

        gwy_field_multiply(field, dz*gwy_exp10(power10));
    }

    gwy_file_pass_image(g4file->file, 0, field);

    if (gwyhdf5_get_str_attr_g(file_id, "/rangez", "label", &s, NULL)) {
        gwy_debug("found label %s", s);
        gwy_file_pass_title(g4file->file, GWY_FILE_IMAGE, 0, s);
    }
    else
        gwy_image_title_fall_back(g4file->file, 0);

    gwy_file_pass_meta(g4file->file, GWY_FILE_IMAGE, 0, gwy_container_copy(g4file->meta));
    gwy_log_add_import(g4file->file, GWY_FILE_IMAGE, 0, NULL, g4file->filename);

    ok = TRUE;

fail:
    if (!ok)
        g_clear_object(&field);
    H5Dclose(dataset);
    return ok;
}

static gboolean
read_timeseries(GXSM4File *g4file, hid_t file_id, GError **error)
{
    GwyBrick *brick = NULL;
    gboolean ok = FALSE;
    gint zlen = -1;

    hid_t dataset;
    gint dims[4] = { -1, 1, -1, -1 };
    if ((dataset = gwyhdf5_open_and_check_dataset(file_id, g4file->dataset_name, G_N_ELEMENTS(dims), dims, error)) < 0)
        return FALSE;

    gwy_debug("reading dataset %s (%d x %dx%d)", g4file->dataset_name, dims[0], dims[3], dims[2]);
    if (err_DIMENSION(error, dims[0]) || err_DIMENSION(error, dims[2]) || err_DIMENSION(error, dims[3]))
        goto fail;

    gdouble dx = read_step(file_id, "dx") * Angstrom;
    gdouble dy = read_step(file_id, "dy") * Angstrom;
    gdouble dw = read_step(file_id, "dz");

    brick = gwy_brick_new(dims[3], dims[2], dims[0], dims[3]*dx, dims[2]*dy, dims[0], FALSE);
    gint status = H5Dread(dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, gwy_brick_get_data(brick));
    if (status < 0)
        goto fail;

    gwy_unit_set_from_string(gwy_brick_get_unit_x(brick), "m");
    gwy_unit_set_from_string(gwy_brick_get_unit_y(brick), "m");
    gchar *s;
    if (gwyhdf5_get_str_attr_g(file_id, "/rangez", "unit", &s, NULL)) {
        gint power10;

        gwy_debug("found wunit %s", s);
        fix_grad(s, &dw);
        power10 = gwy_unit_set_from_string(gwy_brick_get_unit_w(brick), s);
        g_free(s);

        gwy_brick_multiply(brick, dw*gwy_exp10(power10));
    }

    /* FIXME: The main dataset has "DIMENSION_LIST" attribute with references to the scales, which we should probably
     * use for generality. However, it is just called "time". So look for "time" and check if it is attached. */
    hid_t dscale;
    if ((dscale = gwyhdf5_open_and_check_dataset(file_id, "time", 1, &zlen, NULL)) >= 0) {
        gboolean is_attached = H5DSis_attached(dataset, dscale, 0);
        gwy_debug("found /time (size %d, is_attached: %d)", zlen, is_attached);
        if (is_attached)
            set_brick_zcalibration(brick, dscale, zlen);
        H5Dclose(dscale);
    }

    gwy_file_pass_volume(g4file->file, 0, brick);

    if (gwyhdf5_get_str_attr_g(file_id, "/rangez", "label", &s, NULL)) {
        gwy_debug("found label %s", s);
        gwy_file_pass_title(g4file->file, GWY_FILE_VOLUME, 0, s);
    }
    else 
        gwy_file_set_title(g4file->file, GWY_FILE_VOLUME, 0, "Timeseries", FALSE);

    gwy_file_pass_meta(g4file->file, GWY_FILE_VOLUME, 0, gwy_container_copy(g4file->meta));
    gwy_log_add_import(g4file->file, GWY_FILE_VOLUME, 0, NULL, g4file->filename);

    ok = TRUE;

fail:
    if (!ok)
        g_clear_object(&brick);
    H5Dclose(dataset);
    return ok;
}

/* If we fail, do not worry too much. */
static void
set_brick_zcalibration(GwyBrick *brick, hid_t dscale, gint zlen)
{
    GwyLine *zcal;
    gdouble min, max, dz, zoff;

    if (zlen != gwy_brick_get_zres(brick) || zlen < 2)
        return;

    zcal = gwy_line_new(zlen, zlen, FALSE);
    if (H5Dread(dscale, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, gwy_line_get_data(zcal)) < 0) {
        g_object_unref(zcal);
        return;
    }

    min = gwy_line_get_val(zcal, 0);
    max = gwy_line_get_val(zcal, zlen-1);
    if (min > max) {
        gwy_brick_flip(brick, FALSE, FALSE, TRUE);
        gwy_line_flip(zcal);
        GWY_SWAP(gdouble, min, max);
    }
    if (!(max > min)) {
        g_object_unref(zcal);
        return;
    }

    dz = (max - min)/(zlen - 1);
    zoff = -0.5*dz;
    gwy_brick_set_zoffset(brick, zoff);
    gwy_brick_set_zreal(brick, zlen*dz);
    gwy_unit_set_from_string(gwy_line_get_unit_y(zcal), "s");
    gwy_brick_set_zcalibration(brick, zcal);
    g_object_unref(zcal);
}

static hid_t
open_aux_dataset(hid_t file_id, const gchar *name, gchar **unit)
{
    hid_t dataset;

    /* Only accept scalars. */
    if ((dataset = gwyhdf5_open_and_check_dataset(file_id, name, 0, NULL, NULL)) < 0)
        return -1;

    gwyhdf5_get_str_attr_g(dataset, ".", "unit", unit, NULL);

    return dataset;
}

static herr_t
scan_aux_datasets(hid_t loc_id,
                  const char *name,
                  G_GNUC_UNUSED const H5L_info_t *info,
                  void *user_data)
{
    static const gchar *metaitems[] = {
        "alpha", "offsetx", "offsety", "opt_xpiezo_av", "opt_ypiezo_av", "opt_zpiezo_av", "t_start", "t_end", "time",
        "basename", "bright", "contrast", "comment", "dateofscan", "reftime", "extra_scan_info", "spm_scancontrol",
        "spm_scancontrol_dim", "title", "username",
    };
    GwyContainer *meta = (GwyContainer*)user_data;
    gchar *unit = NULL;
    H5T_class_t type_class;
    H5O_info_t infobuf;
    hid_t dataset, dataset_type;
    const gchar *metaname = name;
    gint status;

    if (g_str_has_prefix(metaname, "JSON_"))
        metaname += strlen("JSON_");
    else if (g_str_has_prefix(metaname, "rpspmc_")) {
        metaname += strlen("rpspmc_");
        /* This is some long multiline log. */
        if (gwy_strequal(metaname, "info"))
            return 0;
    }
    else {
        guint i = 0;
        while (i < G_N_ELEMENTS(metaitems) && !gwy_strequal(name, metaitems[i]))
            i++;
        if (i == G_N_ELEMENTS(metaitems))
            return 0;
    }

    status = H5Oget_info_by_name(loc_id, name, &infobuf, H5O_INFO_BASIC, H5P_DEFAULT);
    if (status < 0)
        return status;

    if (infobuf.type != H5O_TYPE_DATASET)
        return 0;

    dataset = open_aux_dataset(loc_id, name, &unit);
    /* When opening as a scalar fails, try to read it as a string dataset. */
    if (dataset < 0) {
        gchar *s = gwyhdf5_read_string_dataset(loc_id, name, NULL);
        if (s) {
            g_strstrip(s);
            gwy_debug("read as string dataset %s as <%s>", name, s);
            gwy_container_set_string_by_name(meta, metaname, s);
        }
        return 0;
    }

    dataset_type = H5Dget_type(dataset);
    type_class = H5Tget_class(dataset_type);
    if (type_class == H5T_FLOAT) {
        gdouble v;
        if (H5Dread(dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &v) >= 0) {
            if (unit)
                gwy_container_set_string_by_name(meta, metaname, g_strdup_printf("%g %s", v, unit));
            else
                gwy_container_set_string_by_name(meta, metaname, g_strdup_printf("%g", v));
        }
    }
    else if (type_class == H5T_INTEGER) {
        glong v;
        if (H5Dread(dataset, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, &v) >= 0)
            gwy_container_set_string_by_name(meta, metaname, g_strdup_printf("%ld", v));
    }
    else {
        gwy_debug("don't know what to do with type class %d", type_class);
    }
    g_free(unit);

    H5Tclose(dataset_type);
    H5Dclose(dataset);

    return 0;
}

static void
add_more_meta(hid_t file_id, GwyContainer *meta)
{
    H5Literate(file_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, scan_aux_datasets, meta);
}

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
