geturl.lib.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. <?php
  2. /* Copyright (C) 2008-2020 Laurent Destailleur <eldy@users.sourceforge.net>
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. * or see https://www.gnu.org/
  17. */
  18. /**
  19. * \file htdocs/core/lib/geturl.lib.php
  20. * \brief This file contains functions dedicated to get URLs.
  21. */
  22. /**
  23. * Function to get a content from an URL (use proxy if proxy defined).
  24. * Support Dolibarr setup for timeout and proxy.
  25. * Enhancement of CURL to add an anti SSRF protection:
  26. * - you can set MAIN_SECURITY_ANTI_SSRF_SERVER_IP to set static ip of server
  27. * - common local lookup ips like 127.*.*.* are automatically added
  28. *
  29. * @param string $url URL to call.
  30. * @param string $postorget 'POST', 'GET', 'HEAD', 'PUT', 'PUTALREADYFORMATED', 'POSTALREADYFORMATED', 'DELETE'
  31. * @param string $param Parameters of URL (x=value1&y=value2) or may be a formated content with $postorget='PUTALREADYFORMATED'
  32. * @param integer $followlocation 0=Do not follow, 1=Follow location.
  33. * @param string[] $addheaders Array of string to add into header. Example: ('Accept: application/xrds+xml', ....)
  34. * @param string[] $allowedschemes List of schemes that are allowed ('http' + 'https' only by default)
  35. * @param int $localurl 0=Only external URL are possible, 1=Only local URL, 2=Both external and local URL are allowed.
  36. * @param int $ssl_verifypeer -1=Auto (no ssl check on dev, check on prod), 0=No ssl check, 1=Always ssl check
  37. * @return array Returns an associative array containing the response from the server array('http_code'=>http response code, 'content'=>response, 'curl_error_no'=>errno, 'curl_error_msg'=>errmsg...)
  38. */
  39. function getURLContent($url, $postorget = 'GET', $param = '', $followlocation = 1, $addheaders = array(), $allowedschemes = array('http', 'https'), $localurl = 0, $ssl_verifypeer = -1)
  40. {
  41. //declaring of global variables
  42. global $conf;
  43. $USE_PROXY = empty($conf->global->MAIN_PROXY_USE) ? 0 : $conf->global->MAIN_PROXY_USE;
  44. $PROXY_HOST = empty($conf->global->MAIN_PROXY_HOST) ? 0 : $conf->global->MAIN_PROXY_HOST;
  45. $PROXY_PORT = empty($conf->global->MAIN_PROXY_PORT) ? 0 : $conf->global->MAIN_PROXY_PORT;
  46. $PROXY_USER = empty($conf->global->MAIN_PROXY_USER) ? 0 : $conf->global->MAIN_PROXY_USER;
  47. $PROXY_PASS = empty($conf->global->MAIN_PROXY_PASS) ? 0 : $conf->global->MAIN_PROXY_PASS;
  48. dol_syslog("getURLContent postorget=".$postorget." URL=".$url." param=".$param);
  49. //setting the curl parameters.
  50. $ch = curl_init();
  51. /*print $API_Endpoint."-".$API_version."-".$PAYPAL_API_USER."-".$PAYPAL_API_PASSWORD."-".$PAYPAL_API_SIGNATURE."<br>";
  52. print $USE_PROXY."-".$gv_ApiErrorURL."<br>";
  53. print $nvpStr;
  54. exit;*/
  55. curl_setopt($ch, CURLOPT_VERBOSE, 1);
  56. curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function');
  57. // We use @ here because this may return warning if safe mode is on or open_basedir is on (following location is forbidden when safe mode is on).
  58. // We force value to false so we will manage redirection ourself later.
  59. @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
  60. if (is_array($addheaders) && count($addheaders)) {
  61. curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders);
  62. }
  63. curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it
  64. // By default use tls decied by PHP.
  65. // You can force, if supported a version like TLSv1 or TLSv1.2
  66. if (!empty($conf->global->MAIN_CURL_SSLVERSION)) {
  67. curl_setopt($ch, CURLOPT_SSLVERSION, $conf->global->MAIN_CURL_SSLVERSION);
  68. }
  69. //curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2
  70. // Turning on or off the ssl target certificate
  71. if ($ssl_verifypeer < 0) {
  72. global $dolibarr_main_prod;
  73. $ssl_verifypeer = ($dolibarr_main_prod ? true : false);
  74. }
  75. if (!empty($conf->global->MAIN_CURL_DISABLE_VERIFYPEER)) {
  76. $ssl_verifypeer = 0;
  77. }
  78. // Turning off the server and peer verification(TrustManager Concept).
  79. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, ($ssl_verifypeer ? true : false));
  80. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, ($ssl_verifypeer ? true : false));
  81. // Restrict use to some protocols only
  82. $protocols = 0;
  83. if (is_array($allowedschemes)) {
  84. foreach ($allowedschemes as $allowedscheme) {
  85. if ($allowedscheme == 'http') {
  86. $protocols |= CURLPROTO_HTTP;
  87. }
  88. if ($allowedscheme == 'https') {
  89. $protocols |= CURLPROTO_HTTPS;
  90. }
  91. }
  92. curl_setopt($ch, CURLOPT_PROTOCOLS, $protocols);
  93. curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS, $protocols);
  94. }
  95. curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, empty($conf->global->MAIN_USE_CONNECT_TIMEOUT) ? 5 : $conf->global->MAIN_USE_CONNECT_TIMEOUT);
  96. curl_setopt($ch, CURLOPT_TIMEOUT, empty($conf->global->MAIN_USE_RESPONSE_TIMEOUT) ? 30 : $conf->global->MAIN_USE_RESPONSE_TIMEOUT);
  97. // limit size of downloaded files. TODO Add MAIN_SECURITY_MAXFILESIZE_DOWNLOADED
  98. $maxsize = getDolGlobalInt('MAIN_SECURITY_MAXFILESIZE_DOWNLOADED');
  99. if ($maxsize && defined('CURLOPT_MAXFILESIZE_LARGE')) {
  100. curl_setopt($ch, CURLOPT_MAXFILESIZE_LARGE, $maxsize);
  101. }
  102. if ($maxsize && defined('CURLOPT_MAXFILESIZE')) {
  103. curl_setopt($ch, CURLOPT_MAXFILESIZE, $maxsize);
  104. }
  105. //curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5
  106. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // We want response
  107. if ($postorget == 'POST') {
  108. curl_setopt($ch, CURLOPT_POST, 1); // POST
  109. curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // Setting param x=a&y=z as POST fields
  110. } elseif ($postorget == 'POSTALREADYFORMATED') {
  111. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); // HTTP request is 'POST' but param string is taken as it is
  112. curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
  113. } elseif ($postorget == 'PUT') {
  114. $array_param = null;
  115. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
  116. if (!is_array($param)) {
  117. parse_str($param, $array_param);
  118. } else {
  119. dol_syslog("parameter param must be a string", LOG_WARNING);
  120. $array_param = $param;
  121. }
  122. curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields
  123. } elseif ($postorget == 'PUTALREADYFORMATED') {
  124. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
  125. curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
  126. } elseif ($postorget == 'HEAD') {
  127. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD'
  128. curl_setopt($ch, CURLOPT_NOBODY, true);
  129. } elseif ($postorget == 'DELETE') {
  130. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); // POST
  131. } else {
  132. curl_setopt($ch, CURLOPT_POST, 0); // GET
  133. }
  134. //if USE_PROXY constant set at begin of this method.
  135. if ($USE_PROXY) {
  136. dol_syslog("getURLContent set proxy to ".$PROXY_HOST.":".$PROXY_PORT." - ".$PROXY_USER.":".$PROXY_PASS);
  137. //curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10
  138. curl_setopt($ch, CURLOPT_PROXY, $PROXY_HOST.":".$PROXY_PORT);
  139. if ($PROXY_USER) {
  140. curl_setopt($ch, CURLOPT_PROXYUSERPWD, $PROXY_USER.":".$PROXY_PASS);
  141. }
  142. }
  143. $newUrl = $url;
  144. $maxRedirection = 5;
  145. $info = array();
  146. $response = '';
  147. do {
  148. if ($maxRedirection < 1) {
  149. break;
  150. }
  151. curl_setopt($ch, CURLOPT_URL, $newUrl);
  152. // Parse $newUrl
  153. $newUrlArray = parse_url($newUrl);
  154. $hosttocheck = $newUrlArray['host'];
  155. $hosttocheck = str_replace(array('[', ']'), '', $hosttocheck); // Remove brackets of IPv6
  156. // Deny some reserved host names
  157. if (in_array($hosttocheck, array('metadata.google.internal'))) {
  158. $info['http_code'] = 400;
  159. $info['content'] = 'Error bad hostname '.$hosttocheck.' (Used by Google metadata). This value for hostname is not allowed.';
  160. break;
  161. }
  162. // Clean host name $hosttocheck to convert it into an IP $iptocheck
  163. if (in_array($hosttocheck, array('localhost', 'localhost.domain'))) {
  164. $iptocheck = '127.0.0.1';
  165. } elseif (in_array($hosttocheck, array('ip6-localhost', 'ip6-loopback'))) {
  166. $iptocheck = '::1';
  167. } else {
  168. // Resolve $hosttocheck to get the IP $iptocheck
  169. if (function_exists('gethostbyname')) {
  170. $iptocheck = gethostbyname($hosttocheck);
  171. } else {
  172. $iptocheck = $hosttocheck;
  173. }
  174. // TODO Resolve ip v6
  175. }
  176. // Check $iptocheck is an IP (v4 or v6), if not clear value.
  177. if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6)) { // This is not an IP, we clean data
  178. $iptocheck = '0'; //
  179. }
  180. if ($iptocheck) {
  181. $tmpresult = isIPAllowed($iptocheck, $localurl);
  182. if ($tmpresult) {
  183. $info['http_code'] = 400;
  184. $info['content'] = $tmpresult;
  185. break;
  186. }
  187. }
  188. if ($iptocheck) {
  189. // Set CURLOPT_CONNECT_TO so curl will not try another resolution that may give a different result. Possible only on PHP v7+
  190. if (defined('CURLOPT_CONNECT_TO')) {
  191. $connect_to = array(sprintf("%s:%d:%s:%d", $newUrlArray['host'], empty($newUrlArray['port'])?'':$newUrlArray['port'], $iptocheck, empty($newUrlArray['port'])?'':$newUrlArray['port']));
  192. //var_dump($newUrlArray);
  193. //var_dump($connect_to);
  194. curl_setopt($ch, CURLOPT_CONNECT_TO, $connect_to);
  195. }
  196. }
  197. // Getting response from server
  198. $response = curl_exec($ch);
  199. $info = curl_getinfo($ch); // Reading of request must be done after sending request
  200. $http_code = $info['http_code'];
  201. if ($followlocation && ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307)) {
  202. $newUrl = $info['redirect_url'];
  203. $maxRedirection--;
  204. // TODO Use $info['local_ip'] and $info['primary_ip'] ?
  205. continue;
  206. }
  207. $http_code = 0;
  208. } while ($http_code);
  209. $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request
  210. dol_syslog("getURLContent request=".$request);
  211. if (!empty($conf->global->MAIN_GETURLCONTENT_OUTPUT_RESPONSE)) {
  212. // This may contains binary data, so we dont output reponse by default.
  213. dol_syslog("getURLContent response =".$response);
  214. }
  215. dol_syslog("getURLContent response size=".strlen($response)); // This may contains binary data, so we dont output it
  216. $rep = array();
  217. if (curl_errno($ch)) {
  218. // Add keys to $rep
  219. $rep['content'] = $response;
  220. // moving to display page to display curl errors
  221. $rep['curl_error_no'] = curl_errno($ch);
  222. $rep['curl_error_msg'] = curl_error($ch);
  223. dol_syslog("getURLContent response array is ".join(',', $rep));
  224. } else {
  225. //$info = curl_getinfo($ch);
  226. // Add keys to $rep
  227. $rep = $info;
  228. //$rep['header_size']=$info['header_size'];
  229. //$rep['http_code']=$info['http_code'];
  230. dol_syslog("getURLContent http_code=".$rep['http_code']);
  231. // Add more keys to $rep
  232. if ($response) {
  233. $rep['content'] = $response;
  234. }
  235. $rep['curl_error_no'] = '';
  236. $rep['curl_error_msg'] = '';
  237. }
  238. //closing the curl
  239. curl_close($ch);
  240. return $rep;
  241. }
  242. /**
  243. * Is IP allowed
  244. *
  245. * @param string $iptocheck IP to check
  246. * @param int $localurl 0=external url only, 1=internal url only
  247. * @return string Error message or ''
  248. */
  249. function isIPAllowed($iptocheck, $localurl)
  250. {
  251. global $conf;
  252. if ($localurl == 0) { // Only external url allowed (dangerous, may allow to get malware)
  253. if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
  254. // Deny ips like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
  255. $errormsg = 'Error bad hostname IP (private or reserved range). Must be an external URL.';
  256. return $errormsg;
  257. }
  258. if (!empty($_SERVER["SERVER_ADDR"]) && $iptocheck == $_SERVER["SERVER_ADDR"]) {
  259. $errormsg = 'Error bad hostname IP (IP is a local IP). Must be an external URL.';
  260. return $errormsg;
  261. }
  262. if (!empty($conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP) && in_array($iptocheck, explode(',', $conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP))) {
  263. $errormsg = 'Error bad hostname IP (IP is a local IP defined into MAIN_SECURITY_SERVER_IP). Must be an external URL.';
  264. return $errormsg;
  265. }
  266. }
  267. if ($localurl == 1) { // Only local url allowed (dangerous, may allow to get metadata on server or make internal port scanning)
  268. // Deny ips NOT like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10...
  269. if (filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
  270. $errormsg = 'Error bad hostname '.$iptocheck.'. Must be a local URL.';
  271. return $errormsg;
  272. }
  273. if (!empty($conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP) && !in_array($iptocheck, explode(',', $conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP))) {
  274. $errormsg = 'Error bad hostname IP (IP is not a local IP defined into list MAIN_SECURITY_SERVER_IP). Must be a local URL in allowed list.';
  275. return $errormsg;
  276. }
  277. }
  278. // Common check on ip (local and external)
  279. // See list on https://tagmerge.com/gist/a7b9d57ff8ec11d63642f8778609a0b8
  280. // Not evasive url that ar enot IP are excluded by test on IP v4/v6 validity.
  281. $arrayofmetadataserver = array(
  282. '100.100.100.200' => 'Alibaba',
  283. '192.0.0.192' => 'Oracle',
  284. '192.80.8.124' => 'Packet',
  285. '100.88.222.5' => 'Tencent cloud',
  286. );
  287. foreach ($arrayofmetadataserver as $ipofmetadataserver => $nameofmetadataserver) {
  288. if ($iptocheck == $ipofmetadataserver) {
  289. $errormsg = 'Error bad hostname IP (Used by '.$nameofmetadataserver.' metadata server). This IP is forbidden.';
  290. return $errormsg;
  291. }
  292. }
  293. return '';
  294. }
  295. /**
  296. * Function get second level domain name.
  297. * For example: https://www.abc.mydomain.com/dir/page.html return 'mydomain'
  298. *
  299. * @param string $url Full URL.
  300. * @param int $mode 0=return 'mydomain', 1=return 'mydomain.com', 2=return 'abc.mydomain.com'
  301. * @return string Returns domaine name
  302. */
  303. function getDomainFromURL($url, $mode = 0)
  304. {
  305. $tmpdomain = preg_replace('/^https?:\/\//i', '', $url); // Remove http(s)://
  306. $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain
  307. if ($mode == 2) {
  308. $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.' before 'abc.mydomain.com'
  309. } else {
  310. $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
  311. }
  312. if (empty($mode)) {
  313. $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...)
  314. }
  315. return $tmpdomain;
  316. }
  317. /**
  318. * Function root url from a long url
  319. * For example: https://www.abc.mydomain.com/dir/page.html return 'https://www.abc.mydomain.com'
  320. * For example: http://www.abc.mydomain.com/ return 'https://www.abc.mydomain.com'
  321. *
  322. * @param string $url Full URL.
  323. * @return string Returns root url
  324. */
  325. function getRootURLFromURL($url)
  326. {
  327. $prefix = '';
  328. $tmpurl = $url;
  329. $reg = null;
  330. if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) {
  331. $prefix = $reg[1];
  332. }
  333. $tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s)://
  334. $tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain
  335. return $prefix.$tmpurl;
  336. }
  337. /**
  338. * Function to remove comments into HTML content
  339. *
  340. * @param string $content Text content
  341. * @return string Returns text without HTML comments
  342. */
  343. function removeHtmlComment($content)
  344. {
  345. $content = preg_replace('/<!--[^\-]+-->/', '', $content);
  346. return $content;
  347. }