TensorflowDevLearn

このリポジトリは TensorFlow デベロッパー認定資格の勉強の備忘録として作成したもの

View project on GitHub

時系列系データセット形成メモ

  • indexのenumマップを作るやつ
    • 結構便利そうなのでメモっとく
# columns名をindexに落とし込んでるだけ。
column_indices = {name: i for i, name in enumerate(df.columns)}

print(df.columns)
pprint(column_indices)
  • 出力結果(columnsがindex番号付きでマッピングされる)
    • 🌟numpyに変換したときに何のデータかわかるようになる。
Index(['p (mbar)', 'T (degC)', 'Tpot (K)', 'Tdew (degC)', 'rh (%)',
       'VPmax (mbar)', 'VPact (mbar)', 'VPdef (mbar)', 'sh (g/kg)',
       'H2OC (mmol/mol)', 'rho (g/m**3)', 'Wx', 'Wy', 'max Wx', 'max Wy',
       'Day sin', 'Day cos', 'Year sin', 'Year cos'],
      dtype='object')
{'Day cos': 16,
 'Day sin': 15,
 'H2OC (mmol/mol)': 9,
 'T (degC)': 1,
 'Tdew (degC)': 3,
 'Tpot (K)': 2,
 'VPact (mbar)': 6,
 'VPdef (mbar)': 7,
 'VPmax (mbar)': 5,
 'Wx': 11,
 'Wy': 12,
 'Year cos': 18,
 'Year sin': 17,
 'max Wx': 13,
 'max Wy': 14,
 'p (mbar)': 0,
 'rh (%)': 4,
 'rho (g/m**3)': 10,
 'sh (g/kg)': 8}
"""🌟 入出力を整える関数 """
def split_window(self, features):
    print(f"{split_window.__name__}---------------")
    print(f"features(shape):{features.shape}")
    print(f'input_slice: {self.input_slice}')
    print(f'label_slice: {self.labels_slice}')
    print(f'input_width: {self.input_width}')
    print(f'label_width: {self.label_width}')

    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        print(f'column_indices: {self.column_indices}')
        labels = tf.stack(
            [labels[:, :, self.column_indices[name]] 
             for name in self.label_columns],axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])
    print("----------------------------")
    return inputs, labels

"""🌟 tensorflow datasetに変換する関数 """
def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
  
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=32,)

    ds = ds.map(self.split_window)
    return ds