@article{soybean, title = "Soybean yield prediction from UAV using multimodal data fusion and deep learning", journal = "Remote Sensing of Environment", volume = "237", pages = "111599", year = "2020", issn = "0034-4257", doi = "https://doi.org/10.1016/j.rse.2019.111599", url = "http://www.sciencedirect.com/science/article/pii/S0034425719306194", author = "Maitiniyazi Maimaitijiang and Vasit Sagan and Paheding Sidike and Sean Hartling and Flavio Esposito and Felix B. Fritschi", keywords = "Remote sensing, Yield prediction, Multimodality, Data fusion, Deep learning, Phenotyping, Spatial autocorrelation", abstract = "Preharvest crop yield prediction is critical for grain policy making and food security. Early estimation of yield at field or plot scale also contributes to high-throughput plant phenotyping and precision agriculture. New developments in Unmanned Aerial Vehicle (UAV) platforms and sensor technology facilitate cost-effective data collection through simultaneous multi-sensor/multimodal data collection at very high spatial and spectral resolutions. The objective of this study is to evaluate the power of UAV-based multimodal data fusion using RGB, multispectral and thermal sensors to estimate soybean (Glycine max) grain yield within the framework of Deep Neural Network (DNN). RGB, multispectral, and thermal images were collected using a low-cost multi-sensory UAV from a test site in Columbia, Missouri, USA. Multimodal information, such as canopy spectral, structure, thermal and texture features, was extracted and combined to predict crop grain yield using Partial Least Squares Regression (PLSR), Random Forest Regression (RFR), Support Vector Regression (SVR), input-level feature fusion based DNN (DNN-F1) and intermediate-level feature fusion based DNN (DNN-F2). The results can be summarized in three messages: (1) multimodal data fusion improves the yield prediction accuracy and is more adaptable to spatial variations; (2) DNN-based models improve yield prediction model accuracy: the highest accuracy was obtained by DNN-F2 with an R2 of 0.720 and a relative root mean square error (RMSE%) of 15.9%; (3) DNN-based models were less prone to saturation effects, and exhibited more adaptive performance in predicting grain yields across the Dwight, Pana and AG3432 soybean genotypes in our study. Furthermore, DNN-based models demonstrated consistent performance over space with less spatial dependency and variations. This study indicates that multimodal data fusion using low-cost UAV within a DNN framework can provide a relatively accurate and robust estimation of crop yield, and deliver valuable insight for high-throughput phenotyping and crop field management with high spatial precision." }