Alexandria  2.27.0
SDC-CH common library for the Euclid project
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Table2Numpy.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2022 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "Pyston/Table2Numpy.h"
21 #include <boost/python/list.hpp>
22 #include <boost/python/numpy.hpp>
23 #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
24 #include <boost/python/tuple.hpp>
25 
26 namespace py = boost::python;
27 namespace np = boost::python::numpy;
28 
29 namespace Pyston {
30 
31 namespace {
36 template <typename T>
37 py::tuple getVectorShape(const Euclid::Table::Table& table, size_t idx) {
38  auto& first_row = *table.begin();
39  auto& first_value = boost::get<std::vector<T>>(first_row[idx]);
40  auto size = first_value.size();
41 
42  // Make sure all entries have the same shape!
43  for (auto& row : table) {
44  if (size != boost::get<std::vector<T>>(row[idx]).size()) {
45  throw Elements::Exception("All vectors on the column must have the same size");
46  }
47  }
48 
49  return py::make_tuple(size);
50 }
51 
55 std::size_t getStringShape(const Euclid::Table::Table& table, size_t idx) {
56  auto& first_row = *table.begin();
57  auto& first_value = boost::get<std::string>(first_row[idx]);
58  auto size = first_value.size();
59 
60  // Make sure all entries have the same shape!
61  for (auto& row : table) {
62  if (size != boost::get<std::string>(row[idx]).size()) {
63  throw Elements::Exception("All vectors on the column must have the same size");
64  }
65  }
66 
67  return size + 1;
68 }
69 
74 template <typename T>
75 py::tuple getNdArrayShape(const Euclid::Table::Table& table, size_t idx) {
76  auto& first_row = *table.begin();
77  auto& first_value = boost::get<Euclid::NdArray::NdArray<T>>(first_row[idx]);
78  auto shape = first_value.shape();
79 
80  // Make sure all entries have the same shape!
81  for (auto& row : table) {
82  if (shape != boost::get<Euclid::NdArray::NdArray<T>>(row[idx]).shape()) {
83  throw Elements::Exception("All NdArrays on the column must have the same shape");
84  }
85  }
86 
87  // Need to convert the std::vector to a Python tuple
88  py::list pyshape;
89  for (auto d : shape) {
90  pyshape.append(d);
91  }
92  return py::tuple(pyshape);
93 }
94 
98 py::tuple numpyType(const Euclid::Table::Table& table, size_t idx) {
99  auto& descr = table.getColumnInfo()->getDescription(idx);
100  auto& name = descr.name;
101 
102  std::type_index type = descr.type;
103 
104  if (type == typeid(int32_t)) {
105  return py::make_tuple(name, "i4");
106  } else if (type == typeid(int64_t)) {
107  return py::make_tuple(name, "i8");
108  } else if (type == typeid(float)) {
109  return py::make_tuple(name, "f4");
110  } else if (type == typeid(double)) {
111  return py::make_tuple(name, "f8");
112  } else if (type == typeid(std::string)) {
113  return py::make_tuple(name, "S" + std::to_string(getStringShape(table, idx)));
114  } else if (type == typeid(std::vector<int32_t>)) {
115  return py::make_tuple(name, "i4", getVectorShape<int32_t>(table, idx));
116  } else if (type == typeid(std::vector<int64_t>)) {
117  return py::make_tuple(name, "i8", getVectorShape<int64_t>(table, idx));
118  } else if (type == typeid(std::vector<float>)) {
119  return py::make_tuple(name, "f4", getVectorShape<float>(table, idx));
120  } else if (type == typeid(std::vector<double>)) {
121  return py::make_tuple(name, "f8", getVectorShape<double>(table, idx));
122  } else if (type == typeid(Euclid::NdArray::NdArray<int32_t>)) {
123  return py::make_tuple(name, "i4", getNdArrayShape<int32_t>(table, idx));
124  } else if (type == typeid(Euclid::NdArray::NdArray<int64_t>)) {
125  return py::make_tuple(name, "i8", getNdArrayShape<int64_t>(table, idx));
126  } else if (type == typeid(Euclid::NdArray::NdArray<float>)) {
127  return py::make_tuple(name, "f4", getNdArrayShape<float>(table, idx));
128  } else if (type == typeid(Euclid::NdArray::NdArray<double>)) {
129  return py::make_tuple(name, "f8", getNdArrayShape<double>(table, idx));
130  } else {
131  throw Elements::Exception("Unknown type ") << type.name();
132  }
133 }
134 
138 template <typename T>
139 std::tuple<off_t, const void*> getVectorCellData(const Euclid::Table::Row::cell_type& cell) {
140  auto& v = boost::get<std::vector<T>>(cell);
141  return std::make_tuple(sizeof(T) * v.size(), v.data());
142 }
143 
147 std::tuple<off_t, const void*> getStringCellData(const Euclid::Table::Row::cell_type& cell) {
148  auto& v = boost::get<std::string>(cell);
149  return std::make_tuple(v.size() + 1, v.data());
150 }
151 
155 template <typename T>
156 std::tuple<off_t, const void*> getNdArrayCellData(const Euclid::Table::Row::cell_type& cell) {
157  auto& v = boost::get<Euclid::NdArray::NdArray<T>>(cell);
158  return std::make_tuple(sizeof(T) * v.size(), &(*v.begin()));
159 }
160 
173 off_t copyCell(void* dst, const Euclid::Table::ColumnDescription& descr, const Euclid::Table::Row::cell_type& cell) {
174  std::type_index type = descr.type;
175  off_t data_size = 0;
176  const void* data_ptr;
177 
178  if (type == typeid(int32_t)) {
179  data_size = sizeof(int32_t);
180  data_ptr = &boost::get<int32_t>(cell);
181  } else if (type == typeid(int64_t)) {
182  data_size = sizeof(int64_t);
183  data_ptr = &boost::get<int64_t>(cell);
184  } else if (type == typeid(float)) {
185  data_size = sizeof(float);
186  data_ptr = &boost::get<float>(cell);
187  } else if (type == typeid(double)) {
188  data_size = sizeof(double);
189  data_ptr = &boost::get<double>(cell);
190  } else if (type == typeid(std::string)) {
191  std::tie(data_size, data_ptr) = getStringCellData(cell);
192  } else if (type == typeid(std::vector<int32_t>)) {
193  std::tie(data_size, data_ptr) = getVectorCellData<int32_t>(cell);
194  } else if (type == typeid(std::vector<int64_t>)) {
195  std::tie(data_size, data_ptr) = getVectorCellData<int64_t>(cell);
196  } else if (type == typeid(std::vector<float>)) {
197  std::tie(data_size, data_ptr) = getVectorCellData<float>(cell);
198  } else if (type == typeid(std::vector<double>)) {
199  std::tie(data_size, data_ptr) = getVectorCellData<double>(cell);
200  } else if (type == typeid(Euclid::NdArray::NdArray<int32_t>)) {
201  std::tie(data_size, data_ptr) = getNdArrayCellData<int32_t>(cell);
202  } else if (type == typeid(Euclid::NdArray::NdArray<int64_t>)) {
203  std::tie(data_size, data_ptr) = getNdArrayCellData<int64_t>(cell);
204  } else if (type == typeid(Euclid::NdArray::NdArray<float>)) {
205  std::tie(data_size, data_ptr) = getNdArrayCellData<float>(cell);
206  } else if (type == typeid(Euclid::NdArray::NdArray<double>)) {
207  std::tie(data_size, data_ptr) = getNdArrayCellData<double>(cell);
208  } else {
209  throw Elements::Exception("Unknown type ") << type.name();
210  }
211 
212  std::memcpy(dst, data_ptr, data_size);
213  return data_size;
214 }
215 
216 } // namespace
217 
218 boost::python::numpy::ndarray table2numpy(const Euclid::Table::Table& table) {
219  auto colinfo = table.getColumnInfo();
220  size_t ncols = colinfo->size();
221  size_t nrows = table.size();
222 
223  py::list cols;
224 
225  // Generate the dtypes for numpy
226  for (size_t i = 0; i < ncols; ++i) {
227  auto coldesc = colinfo->getDescription(i);
228  cols.append(numpyType(table, i));
229  }
230 
231  // Convert the list of dtypes to an array description
232  np::dtype dtype(cols);
233 
234  // Create the numpy array
235  auto array = np::zeros(py::make_tuple(table.size()), dtype);
236 
237  // Copy into each row the content from the table
238  char* nd_ptr = array.get_data();
239  for (size_t i = 0; i < nrows; ++i) {
240  const auto& row = table[i];
241  for (size_t j = 0; j < ncols; ++j) {
242  nd_ptr += copyCell(nd_ptr, colinfo->getDescription(j), row[j]);
243  }
244  }
245 
246  return array;
247 }
248 
249 } // namespace Pyston
T tie(T...args)
T to_string(T...args)
T make_tuple(T...args)
STL class.
std::shared_ptr< ColumnInfo > getColumnInfo() const
Returns a ColumnInfo object describing the columns of the table.
Definition: Table.cpp:50
const_iterator begin() const
Returns a const iterator to the first row.
Definition: Table.cpp:65
T memcpy(T...args)
std::size_t size() const
Returns the number of rows in the table.
Definition: Table.cpp:54
Represents a table.
Definition: Table.h:49
boost::variant< bool, int32_t, int64_t, float, double, std::string, std::vector< bool >, std::vector< int32_t >, std::vector< int64_t >, std::vector< float >, std::vector< double >, NdArray::NdArray< int32_t >, NdArray::NdArray< int64_t >, NdArray::NdArray< float >, NdArray::NdArray< double >> cell_type
The possible cell types.
Definition: Row.h:64
STL class.
T name(T...args)
Contains the description of a specific column of a Table.
boost::python::numpy::ndarray table2numpy(const Euclid::Table::Table &table)